{
  "format_version": 3,
  "claim_formal": {
    "subject": "AI-generated code (from major LLMs such as GPT-4, Claude, Copilot, DeepSeek)",
    "property": "security vulnerability rate compared to human-written code",
    "operator": ">=",
    "operator_note": "To DISPROVE the claim, we need >= 3 independent, verified sources showing AI-generated code has EQUAL OR MORE vulnerabilities than human-written code. 'Fewer' is interpreted as a strict inequality: if AI code has the same or more vulnerabilities, the claim is false. We use proof_direction='disprove' with threshold=3, meaning 3+ verified sources rejecting the claim suffices for DISPROVED.",
    "threshold": 3,
    "proof_direction": "disprove"
  },
  "claim_natural": "AI-generated code has fewer security vulnerabilities than typical human-written code",
  "evidence": {
    "B1": {
      "type": "empirical",
      "label": "Stanford CCS 2023: AI assistant users wrote significantly less secure code",
      "sub_claim": null,
      "source": {
        "name": "Perry et al., ACM CCS 2023 (Stanford University)",
        "url": "https://arxiv.org/html/2211.03622v3",
        "quote": "Overall, we find that participants who had access to an AI assistant wrote significantly less secure code than those without access to an assistant."
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "live",
        "credibility": {
          "domain": "arxiv.org",
          "source_type": "academic",
          "tier": 4,
          "flags": [],
          "note": "Known academic/scholarly publisher"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "Overall, we find that participants who had access to an AI assistant wrote signi"
      }
    },
    "B2": {
      "type": "empirical",
      "label": "Veracode 2025: 45% of AI code contains OWASP vulnerabilities",
      "sub_claim": null,
      "source": {
        "name": "Help Net Security / Veracode 2025 GenAI Code Security Report",
        "url": "https://www.helpnetsecurity.com/2025/08/07/create-ai-code-security-risks/",
        "quote": "in 45 percent of all test cases, LLMs produced code containing vulnerabilities aligned with the OWASP Top 10"
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "live",
        "credibility": {
          "domain": "helpnetsecurity.com",
          "source_type": "unknown",
          "tier": 2,
          "flags": [],
          "note": "Unclassified domain \u2014 verify source authority manually"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "in 45 percent of all test cases, LLMs produced code containing vulnerabilities a"
      }
    },
    "B3": {
      "type": "empirical",
      "label": "CodeRabbit Dec 2025: AI PRs have 1.7x more issues, security up to 2.74x higher",
      "sub_claim": null,
      "source": {
        "name": "CodeRabbit State of AI vs Human Code Generation Report (Dec 2025)",
        "url": "https://www.coderabbit.ai/blog/state-of-ai-vs-human-code-generation-report",
        "quote": "Security issues were up to 2.74x higher"
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "live",
        "credibility": {
          "domain": "coderabbit.ai",
          "source_type": "unknown",
          "tier": 2,
          "flags": [],
          "note": "Unclassified domain \u2014 verify source authority manually"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "Security issues were up to 2.74x higher"
      }
    },
    "B4": {
      "type": "empirical",
      "label": "The Register/Georgia Tech 2026: 74 CVEs from AI-authored code tracked",
      "sub_claim": null,
      "source": {
        "name": "The Register / Georgia Tech SSLab (Mar 2026)",
        "url": "https://www.theregister.com/2026/03/26/ai_coding_assistant_not_more_secure/",
        "quote": "Claude Code alone now appears in more than 4 percent of public commits on GitHub. If AI were truly responsible for only 74 out of 50,000 public vulnerabilities, that would imply AI-generated code is orders of magnitude safer than human-written code. We do not think that is credible."
      },
      "verification": {
        "status": "partial",
        "method": "fragment",
        "coverage_pct": 50.0,
        "fetch_mode": "live",
        "credibility": {
          "domain": "theregister.com",
          "source_type": "unknown",
          "tier": 2,
          "flags": [],
          "note": "Unclassified domain \u2014 verify source authority manually"
        }
      },
      "extraction": {
        "value": "partial",
        "value_in_quote": true,
        "quote_snippet": "Claude Code alone now appears in more than 4 percent of public commits on GitHub"
      }
    },
    "A1": {
      "type": "computed",
      "label": "Verified source count rejecting the claim",
      "sub_claim": null,
      "method": "count(verified citations) = 4",
      "result": "4",
      "depends_on": []
    }
  },
  "cross_checks": [
    {
      "description": "Multiple independent sources consulted across different research methodologies",
      "n_sources_consulted": 4,
      "n_sources_verified": 4,
      "sources": {
        "source_stanford": "verified",
        "source_veracode": "verified",
        "source_coderabbit": "verified",
        "source_register": "partial"
      },
      "independence_note": "Sources are from independent institutions using different methodologies: (1) Stanford \u2014 controlled user study with 47 participants, (2) Veracode \u2014 automated testing of 100+ LLMs across 80 tasks, (3) CodeRabbit \u2014 analysis of 470 real-world GitHub PRs, (4) Georgia Tech \u2014 CVE tracking across open-source ecosystem. No two sources share methodology or data.",
      "fact_ids": []
    }
  ],
  "adversarial_checks": [
    {
      "question": "Are there any peer-reviewed studies showing AI-generated code has FEWER vulnerabilities than human code?",
      "verification_performed": "Searched: 'AI generated code more secure than human code evidence study 2025 2026'. Reviewed top 10 results from Google. No study found that concludes AI-generated code is more secure overall. All results either show AI code has more vulnerabilities or discuss the security risks of AI-generated code.",
      "finding": "No peer-reviewed study found showing AI-generated code has fewer vulnerabilities. The Veracode Spring 2026 update title explicitly states: 'Despite Claims, AI Models Are Still Failing Security.' The Register's March 2026 article is titled: 'Using AI to code does not mean your code is more secure.'",
      "breaks_proof": false
    },
    {
      "question": "Could AI code be safer in specific narrow contexts even if worse overall?",
      "verification_performed": "Searched for domain-specific studies where AI might outperform humans on security. Some sources note that AI models are improving at syntax correctness (50% to 95% since 2023), but Veracode found security pass rates have remained flat at 45-55% regardless of model generation. No narrow domain was identified where AI code is demonstrably safer.",
      "finding": "While AI coding accuracy has improved, security-specific performance has not. The claim is stated broadly ('AI-generated code'), not for a specific narrow domain, so the broad evidence applies.",
      "breaks_proof": false
    },
    {
      "question": "Do the studies use outdated AI models that no longer reflect current capabilities?",
      "verification_performed": "Checked recency of sources: Stanford study used Codex (2023), Veracode tested 100+ LLMs including current models (2025), CodeRabbit analyzed real-world GitHub PRs (Dec 2025), Georgia Tech tracked CVEs through March 2026. The most recent sources (2025-2026) test current-generation models and still find elevated vulnerability rates.",
      "finding": "Sources span 2023-2026, with the most recent using current models. The pattern of AI code having more vulnerabilities is consistent across model generations. This does not break the proof.",
      "breaks_proof": false
    }
  ],
  "verdict": {
    "value": "DISPROVED",
    "qualified": true,
    "qualifier": "unverified_citations",
    "reason": null
  },
  "key_results": {
    "n_confirmed": 4,
    "threshold": 3,
    "operator": ">=",
    "claim_holds": true
  },
  "generator": {
    "name": "proof-engine",
    "version": "1.2.0",
    "repo": "https://github.com/yaniv-golan/proof-engine",
    "generated_at": "2026-03-29"
  },
  "proof_py_url": "/proofs/ai-generated-code-has-fewer-security-vulnerabiliti/proof.py",
  "citation": {
    "doi": null,
    "concept_doi": null,
    "url": "https://proofengine.info/proofs/ai-generated-code-has-fewer-security-vulnerabiliti/",
    "author": "Proof Engine",
    "cite_bib_url": "/proofs/ai-generated-code-has-fewer-security-vulnerabiliti/cite.bib",
    "cite_ris_url": "/proofs/ai-generated-code-has-fewer-security-vulnerabiliti/cite.ris"
  },
  "depends_on": []
}