#!/usr/bin/env python3 # Copyright 2026 Exabeam, Inc. # SPDX-License-Identifier: Apache-2.0 """Aggregate OWASP finding counts across a Praxen baseline set and render a self-contained HTML coverage report. Walks every `/-findings-*.json` under the chosen baseline directory, sums the per-finding `owasp_llm` / `owasp_agentic` primary scalars, and writes an HTML report with bar charts and target links. The report's look comes from the shared design system at `assets/praxen-theme.css` (inlined at render time so the output stays a single self-contained file); only the chart/card components are defined locally in OWASP_CSS below. Usage: python3 tests/baselines/owasp_coverage.py [--baseline-dir DIR] [--out FILE] Defaults: reads `tests/baselines/v0.7.7-claude48/`, writes `./owasp-coverage-report.html` in the current working directory. """ import argparse import html import json import os import sys from collections import Counter from datetime import datetime, timezone from pathlib import Path from theme_utils import load_theme_css, DOCS_BASE THIS_DIR = Path(__file__).resolve().parent def _baseline_sort_key(p: Path): """Version-aware sort key for `v*` baseline dirs: compares the numeric version components, so v0.7.10 sorts above v0.7.9 (plain name sorting puts it below). Non-numeric parts and unparseable names fall back to 0.""" version = p.name[1:].split("-", 1)[0] # "0.7.10" from "v0.7.10-claude48" nums = [int(part) if part.isdigit() else 0 for part in version.split(".")] return (nums, p.name) def _default_baseline() -> Path: """Return the canonical baseline named in CURRENT, falling back to the newest v* dir.""" current_file = THIS_DIR / "CURRENT" if current_file.is_file(): name = current_file.read_text(encoding="utf-8").strip() candidate = THIS_DIR / name if candidate.is_dir(): return candidate candidates = sorted([p for p in THIS_DIR.glob("v*") if p.is_dir()], key=_baseline_sort_key, reverse=True) return candidates[0] if candidates else THIS_DIR / "v0.7.7-claude48" DEFAULT_BASELINE = _default_baseline() DEFAULT_OUT = THIS_DIR / "owasp-coverage-report.html" TARGETS = [ ("finbot", "FinBot", "https://github.com/OWASP-ASI/finbot-ctf-demo", "OWASP Agentic AI CTF — invoice processor"), ("helperbot", "HelperBot", "https://github.com/opena2a-org/damn-vulnerable-ai-agent", "Damn Vulnerable AI Agent — training agent"), ("langchain-sql", "LangChain SQL Agent", "https://github.com/langchain-ai/langchain-community", "create_sql_agent toolkit"), ("openai-customer-service", "OpenAI Customer Service", "https://github.com/openai/openai-agents-python", "OpenAI Agents SDK example"), ("autogen-code-executor", "AutoGen Code Executor", "https://github.com/microsoft/autogen", "Microsoft AutoGen code-executor family"), ("sweep", "Sweep", "https://github.com/sweepai/sweep", "GitHub issue-to-code agent"), ("devika", "Devika", "https://github.com/stitionai/devika", "Autonomous software engineer"), ("aider", "Aider", "https://github.com/Aider-AI/aider", "Interactive pair-programming agent"), ("openhands", "OpenHands", "https://github.com/All-Hands-AI/OpenHands", "Autonomous software-engineering platform"), ("deepagents-cli", "Deep Agents CLI", "https://github.com/langchain-ai/deepagents", "LangChain agent harness (MCP coverage)"), ("yaah", "yaah", "https://github.com/dirien/yet-another-agent-harness", "Yet Another Agent Harness (MCP coverage)"), ("hermes-agent-desktop", "Hermes (Agent + Desktop)", "https://github.com/NousResearch/hermes-agent", "Multi-component LLM agent + desktop control layer"), ] LLM_TITLES = [ ("LLM01", "Prompt Injection"), ("LLM02", "Sensitive Information Disclosure"), ("LLM03", "Supply Chain"), ("LLM04", "Data and Model Poisoning"), ("LLM05", "Improper Output Handling"), ("LLM06", "Excessive Agency"), ("LLM07", "System Prompt Leakage"), ("LLM08", "Vector and Embedding Weaknesses"), ("LLM09", "Misinformation"), ("LLM10", "Unbounded Consumption"), ] ASI_TITLES = [ ("ASI01", "Agent Goal Hijack"), ("ASI02", "Tool Misuse and Exploitation"), ("ASI03", "Identity and Privilege Abuse"), ("ASI04", "Agentic Supply Chain Vulnerabilities"), ("ASI05", "Unexpected Code Execution (RCE)"), ("ASI06", "Memory and Context Poisoning"), ("ASI07", "Insecure Inter-Agent Communication"), ("ASI08", "Cascading Failures"), ("ASI09", "Human-Agent Trust Exploitation"), ("ASI10", "Rogue Agents"), ] # Report-specific components (cards + bar charts). Tokens, base elements, # buttons, the hero, sections and footer all come from the shared theme # (assets/praxen-theme.css), inlined ahead of this in the

OWASP Coverage Across Praxen Baseline Targets

Aggregate finding counts by category, taken from the frozen tests/baselines/{html.escape(baseline_name)}/ set.

{n_targets}targets analyzed

{total}total findings

{llm_total}LLM-classified

{asi_total}Agentic-classified

Targets analyzed {n_targets} frozen Praxen baseline scans

Each card links to both the agent's source repository and the per-target Praxen baseline analysis report. Counts shown are the primary OWASP classifications drawn from each finding's owasp_llm / owasp_agentic scalar.

{target_cards(per_target, out_dir)}

OWASP LLM Top 10 — finding count by category

Coverage of OWASP Top 10 for LLM Applications 2025 across all baseline targets. Empty cells show categories the suite does not currently exercise.

{bar_chart(llm_rows, max_llm, "var(--orange)")}

OWASP Agentic Top 10 — finding count by category

Coverage of OWASP Top 10 for Agentic AI Applications 2026 across all baseline targets.

{bar_chart(asi_rows, max_asi, "var(--accent-2)")}

Methodology how these numbers were computed

Every finding's canonical record carries a primary OWASP classification in two scalar fields, owasp_llm (one of LLM01–LLM10 or null) and owasp_agentic (one of ASI01–ASI10 or null). This report sums those scalars across the frozen baseline JSONs in tests/baselines/{html.escape(baseline_name)}/ — one per target — yielding the primary-classification counts shown. A finding can carry both an LLM and an Agentic primary tag, so the two totals overlap; a finding without any OWASP primary classification (a RAISE-only or supply-chain-only finding) appears in neither chart but still in the per-target total. The frozen baselines are version-pinned outputs of the cold pre-release scans; see tests/baselines/README.md. For how Praxen classifies findings against the OWASP Top 10, see the OWASP Gen AI Security guide.

Generated {generated} · Built on the Praxen {html.escape(baseline_name)} baseline set · github.com/open-agent-ai-security/praxen

""" def main(): parser = argparse.ArgumentParser( description="Render a Praxen OWASP coverage HTML report from a baseline set.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog="Example:\n python3 tests/baselines/owasp_coverage.py --out /tmp/owasp.html\n", ) parser.add_argument( "--baseline-dir", type=Path, default=DEFAULT_BASELINE, help=f"Baseline set to aggregate (default: tests/baselines/{DEFAULT_BASELINE.name}/).", ) parser.add_argument( "--out", type=Path, default=DEFAULT_OUT, help="Output HTML path (default: ./owasp-coverage-report.html in the current working directory).", ) args = parser.parse_args() if not args.baseline_dir.is_dir(): print(f"owasp_coverage.py: baseline directory not found: {args.baseline_dir}", file=sys.stderr) sys.exit(1) report = build_report(args.baseline_dir, args.out) args.out.parent.mkdir(parents=True, exist_ok=True) args.out.write_text(report, encoding="utf-8") print(f"owasp_coverage.py: wrote {args.out}") if __name__ == "__main__": main()