{
  "prefix": {
    "pe": "https://proofengine.info/proofs/current-ai-systems-in-2026-have-near-zero-hallucinations-and-human-level/",
    "prov": "http://www.w3.org/ns/prov#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "schema": "http://schema.org/"
  },
  "entity": {
    "pe:claim": {
      "prov:type": "pe:Claim",
      "prov:label": "Current AI systems in 2026 have near-zero hallucinations and human-level reasoning across most domains.",
      "prov:value": "Current AI systems in 2026 have near-zero hallucinations and human-level reasoning across most domains."
    },
    "pe:evidence-B1": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1 \u2014 Duke Univ. Libraries (Jan 2026): LLMs still hallucinate",
      "pe:factId": "B1",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://blogs.library.duke.edu/blog/2026/01/05/its-2026-why-are-llms-still-hallucinating/",
      "pe:sourceName": "Duke University Libraries Blog (January 2026)"
    },
    "pe:evidence-B2": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1 \u2014 Vectara Hallucination Leaderboard (2025): top models >10% rate",
      "pe:factId": "B2",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://www.vectara.com/blog/introducing-the-next-generation-of-vectaras-hallucination-leaderboard",
      "pe:sourceName": "Vectara Hallucination Leaderboard Blog (2025)"
    },
    "pe:evidence-B3": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1 \u2014 OpenAI SimpleQA paper (arXiv 2411.04368): benchmark for factual failures",
      "pe:factId": "B3",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://arxiv.org/abs/2411.04368",
      "pe:sourceName": "OpenAI SimpleQA benchmark paper (arXiv 2024)"
    },
    "pe:evidence-B4": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2 \u2014 The Decoder (Mar 2026): ARC-AGI-3, all frontier models <1%",
      "pe:factId": "B4",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://the-decoder.com/arc-agi-3-offers-2m-to-any-ai-that-matches-untrained-humans-yet-every-frontier-model-scores-below-1/",
      "pe:sourceName": "The Decoder \u2014 ARC-AGI-3 results (March 2026)"
    },
    "pe:evidence-B5": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2 \u2014 ARC Prize 2025 results: best AI 37.6% vs 100% human baseline",
      "pe:factId": "B5",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://arcprize.org/blog/arc-prize-2025-results-analysis",
      "pe:sourceName": "ARC Prize 2025 Official Results (ARC-AGI-2, human baseline: 100%)"
    },
    "pe:evidence-B6": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2 \u2014 The Conversation (2025): Humanity's Last Exam, GPT-4o at 2.7%",
      "pe:factId": "B6",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://theconversation.com/ai-is-failing-humanitys-last-exam-so-what-does-that-mean-for-machine-intelligence-274620",
      "pe:sourceName": "The Conversation \u2014 Humanity's Last Exam (2025)"
    },
    "pe:evidence-A1": {
      "prov:type": "pe:ComputedEvidence",
      "prov:label": "SC1 verified source count (disproof of near-zero hallucinations)",
      "pe:factId": "A1",
      "pe:evidenceType": "computed",
      "pe:method": "count(verified SC1 disproof citations) = 3",
      "pe:result": "3 of 3 sources verified"
    },
    "pe:evidence-A2": {
      "prov:type": "pe:ComputedEvidence",
      "prov:label": "SC2 verified source count (disproof of human-level reasoning claim)",
      "pe:factId": "A2",
      "pe:evidenceType": "computed",
      "pe:method": "count(verified SC2 disproof citations) = 3",
      "pe:result": "3 of 3 sources verified"
    },
    "pe:verdict": {
      "prov:type": "pe:Verdict",
      "prov:label": "Verdict: DISPROVED",
      "prov:value": "DISPROVED",
      "pe:doi": "10.5281/zenodo.19489832"
    }
  },
  "activity": {
    "pe:verify-B1": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B1: Duke University Libraries Blog (January 2026)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B2": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B2: Vectara Hallucination Leaderboard Blog (2025)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B3": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B3: OpenAI SimpleQA benchmark paper (arXiv 2024)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B4": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B4: The Decoder \u2014 ARC-AGI-3 results (March 2026)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B5": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B5: ARC Prize 2025 Official Results (ARC-AGI-2, human baseline: 100%)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B6": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B6: The Conversation \u2014 Humanity's Last Exam (2025)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:determine-verdict": {
      "prov:type": "pe:VerdictDetermination",
      "prov:label": "Determine verdict from evidence",
      "prov:endTime": "2026-03-31"
    }
  },
  "agent": {
    "pe:proof-engine": {
      "prov:type": "prov:SoftwareAgent",
      "prov:label": "Proof Engine",
      "schema:version": "1.3.1",
      "schema:url": "https://github.com/yaniv-golan/proof-engine"
    }
  },
  "wasGeneratedBy": {
    "pe:gen-B1": {
      "prov:entity": "pe:evidence-B1",
      "prov:activity": "pe:verify-B1"
    },
    "pe:gen-B2": {
      "prov:entity": "pe:evidence-B2",
      "prov:activity": "pe:verify-B2"
    },
    "pe:gen-B3": {
      "prov:entity": "pe:evidence-B3",
      "prov:activity": "pe:verify-B3"
    },
    "pe:gen-B4": {
      "prov:entity": "pe:evidence-B4",
      "prov:activity": "pe:verify-B4"
    },
    "pe:gen-B5": {
      "prov:entity": "pe:evidence-B5",
      "prov:activity": "pe:verify-B5"
    },
    "pe:gen-B6": {
      "prov:entity": "pe:evidence-B6",
      "prov:activity": "pe:verify-B6"
    },
    "pe:gen-verdict": {
      "prov:entity": "pe:verdict",
      "prov:activity": "pe:determine-verdict"
    }
  },
  "wasDerivedFrom": {},
  "wasAttributedTo": {
    "pe:attr-verdict": {
      "prov:entity": "pe:verdict",
      "prov:agent": "pe:proof-engine"
    }
  },
  "used": {
    "pe:used-B1": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B1"
    },
    "pe:used-B2": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B2"
    },
    "pe:used-B3": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B3"
    },
    "pe:used-B4": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B4"
    },
    "pe:used-B5": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B5"
    },
    "pe:used-B6": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B6"
    },
    "pe:used-A1": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-A1"
    },
    "pe:used-A2": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-A2"
    }
  }
}