{
  "format_version": 3,
  "claim_natural": "Topaz et al. (2026) analyzed 2.5 million biomedical papers in PubMed Central from January 2023 through February 2026 and identified 4,046 references pointing to studies that do not exist, distributed across 2,810 papers.",
  "claim_formal": {
    "subject": "Topaz, Roguin, Gupta, Zhang, Peltonen \u2014 'Fabricated citations: an audit across 2.5 million biomedical papers', Lancet 2026; 407: 1779-81 (correspondence)",
    "purpose": "fact_verification",
    "sub_claims": [
      {
        "id": "SC1",
        "property": "Corpus size \u2248 2.5 million biomedical papers in PubMed Central",
        "operator": "==",
        "threshold": true,
        "operator_note": "Paper reports 2,471,758 papers scanned from PMC Open Access subset; this rounds to '2.5 million' as used in the paper's own title and the claim."
      },
      {
        "id": "SC2",
        "property": "Time window: January 2023 through February 2026",
        "operator": "==",
        "threshold": true,
        "operator_note": "Paper specifies exactly Jan 1, 2023 to Feb 18, 2026. The claim's month-resolution phrasing 'January 2023 through February 2026' is a faithful summary of this interval."
      },
      {
        "id": "SC3",
        "property": "Number of fabricated references identified = 4,046",
        "operator": "==",
        "threshold": true,
        "operator_note": "Paper's text says '4046 fabricated references'. The paper's supplementary appendix uses the more cautious term 'suspected fabricated references' and reports pipeline precision of 91% \u2014 see adversarial checks. The claim describes them as 'references pointing to studies that do not exist', which matches the paper's own definition: 'references whose claimed titles correspond to no existing publication'."
      },
      {
        "id": "SC4",
        "property": "Number of distinct papers containing fabricated refs = 2,810",
        "operator": "==",
        "threshold": true,
        "operator_note": "Paper states '4046 fabricated references across 2810 papers' and later 'Of the 2810 affected papers, 98\u00b74% had received no publisher action'."
      }
    ],
    "compound_operator": "AND",
    "proof_direction": "affirm",
    "operator_note": "All four sub-claims must hold for the compound claim to be PROVED. Each sub-claim is verified against verbatim text from the Topaz et al. 2026 correspondence article in The Lancet (the primary and only authoritative source for what that paper reports).",
    "subclaim_to_sources": {
      "SC1": [
        "sc1_corpus_size"
      ],
      "SC2": [
        "sc2_date_range"
      ],
      "SC3": [
        "sc3_fab_count"
      ],
      "SC4": [
        "sc4_affected_papers"
      ]
    }
  },
  "evidence": {
    "B1": {
      "type": "empirical",
      "label": "SC1: corpus size from Topaz et al. 2026 methods",
      "sub_claim": "SC1",
      "source": {
        "name": "Topaz M, Roguin N, Gupta P, Zhang Z, Peltonen L-M. Fabricated citations: an audit across 2\u00b75 million biomedical papers. Lancet 2026; 407: 1779-81.",
        "url": "https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(26)00603-3/fulltext",
        "quote": "We developed an automated reference verification system scanning PubMed Central's Open Access subset from Jan 1, 2023, to Feb 18, 2026: 2 471 758 papers and 125 615 773 structured references."
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "snapshot",
        "credibility": {
          "domain": "thelancet.com",
          "source_type": "academic",
          "tier": 4,
          "flags": [],
          "note": "Known academic/scholarly publisher"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "We developed an automated reference verification system scanning PubMed Central'"
      }
    },
    "B2": {
      "type": "empirical",
      "label": "SC2: date range from Topaz et al. 2026 methods",
      "sub_claim": "SC2",
      "source": {
        "name": "Topaz M, Roguin N, Gupta P, Zhang Z, Peltonen L-M. Fabricated citations: an audit across 2\u00b75 million biomedical papers. Lancet 2026; 407: 1779-81.",
        "url": "https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(26)00603-3/fulltext",
        "quote": "scanning PubMed Central's Open Access subset from Jan 1, 2023, to Feb 18, 2026"
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "snapshot",
        "credibility": {
          "domain": "thelancet.com",
          "source_type": "academic",
          "tier": 4,
          "flags": [],
          "note": "Known academic/scholarly publisher"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "scanning PubMed Central's Open Access subset from Jan 1, 2023, to Feb 18, 2026"
      }
    },
    "B3": {
      "type": "empirical",
      "label": "SC3: 4046 fabricated references (Topaz et al. 2026 results)",
      "sub_claim": "SC3",
      "source": {
        "name": "Topaz M, Roguin N, Gupta P, Zhang Z, Peltonen L-M. Fabricated citations: an audit across 2\u00b75 million biomedical papers. Lancet 2026; 407: 1779-81.",
        "url": "https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(26)00603-3/fulltext",
        "quote": "Among 97\u00b71 million verified references, we identified 4046 fabricated references across 2810 papers"
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "snapshot",
        "credibility": {
          "domain": "thelancet.com",
          "source_type": "academic",
          "tier": 4,
          "flags": [],
          "note": "Known academic/scholarly publisher"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "Among 97\u00b71 million verified references, we identified 4046 fabricated references"
      }
    },
    "B4": {
      "type": "empirical",
      "label": "SC4: 2810 affected papers (Topaz et al. 2026 limitations)",
      "sub_claim": "SC4",
      "source": {
        "name": "Topaz M, Roguin N, Gupta P, Zhang Z, Peltonen L-M. Fabricated citations: an audit across 2\u00b75 million biomedical papers. Lancet 2026; 407: 1779-81.",
        "url": "https://www.thelancet.com/journals/lancet/article/PIIS0140-6736(26)00603-3/fulltext",
        "quote": "Of the 2810 affected papers, 98\u00b74% had received no publisher action at the time of our audit"
      },
      "verification": {
        "status": "verified",
        "method": "full_quote",
        "coverage_pct": null,
        "fetch_mode": "live",
        "credibility": {
          "domain": "thelancet.com",
          "source_type": "academic",
          "tier": 4,
          "flags": [],
          "note": "Known academic/scholarly publisher"
        }
      },
      "extraction": {
        "value": "verified",
        "value_in_quote": true,
        "quote_snippet": "Of the 2810 affected papers, 98\u00b74% had received no publisher action at the time "
      }
    },
    "A1": {
      "type": "computed",
      "label": "SC1 verified-source count",
      "sub_claim": "SC1",
      "method": "count(verified sc1 citations) = 1",
      "result": "1",
      "depends_on": [
        "B1"
      ]
    },
    "A2": {
      "type": "computed",
      "label": "SC2 verified-source count",
      "sub_claim": "SC2",
      "method": "count(verified sc2 citations) = 1",
      "result": "1",
      "depends_on": [
        "B2"
      ]
    },
    "A3": {
      "type": "computed",
      "label": "SC3 verified-source count",
      "sub_claim": "SC3",
      "method": "count(verified sc3 citations) = 1",
      "result": "1",
      "depends_on": [
        "B3"
      ]
    },
    "A4": {
      "type": "computed",
      "label": "SC4 verified-source count",
      "sub_claim": "SC4",
      "method": "count(verified sc4 citations) = 1",
      "result": "1",
      "depends_on": [
        "B4"
      ]
    }
  },
  "cross_checks": [
    {
      "description": "SC1: in-source quote verification",
      "fact_ids": [
        "B1"
      ],
      "agreement": true,
      "n_sources_consulted": 1,
      "n_sources_verified": 1,
      "sources": {
        "sc1_corpus_size": "verified"
      },
      "independence_note": "Single authoritative source (Topaz et al. 2026) \u2014 claim is about what that paper reports.",
      "coi_flags": []
    },
    {
      "description": "SC2: in-source quote verification",
      "fact_ids": [
        "B2"
      ],
      "agreement": true,
      "n_sources_consulted": 1,
      "n_sources_verified": 1,
      "sources": {
        "sc2_date_range": "verified"
      },
      "independence_note": "Single authoritative source (Topaz et al. 2026) \u2014 claim is about what that paper reports.",
      "coi_flags": []
    },
    {
      "description": "SC3: in-source quote verification",
      "fact_ids": [
        "B3"
      ],
      "agreement": true,
      "n_sources_consulted": 1,
      "n_sources_verified": 1,
      "sources": {
        "sc3_fab_count": "verified"
      },
      "independence_note": "Single authoritative source (Topaz et al. 2026) \u2014 claim is about what that paper reports.",
      "coi_flags": []
    },
    {
      "description": "SC4: in-source quote verification",
      "fact_ids": [
        "B4"
      ],
      "agreement": true,
      "n_sources_consulted": 1,
      "n_sources_verified": 1,
      "sources": {
        "sc4_affected_papers": "verified"
      },
      "independence_note": "Single authoritative source (Topaz et al. 2026) \u2014 claim is about what that paper reports.",
      "coi_flags": []
    }
  ],
  "adversarial_checks": [
    {
      "question": "Does the paper itself use weaker language than 'studies that do not exist' for these 4,046 references?",
      "verification_performed": "Searched the supplementary appendix (uploaded mmc1.pdf) for the exact language used to describe the 4,046 entries. The main Lancet correspondence uses 'fabricated references' (defined as 'references whose claimed titles correspond to no existing publication'). The supplementary appendix consistently uses 'suspected fabricated references' and reports pipeline precision of 91% (Fleiss' kappa = 0.71) on a 500-entry masked validation. This means roughly 9% of the 4,046 entries may be false positives.",
      "finding": "The user's phrasing 'references pointing to studies that do not exist' tracks the paper's own definition and headline term. However, the rigorous statement is that the pipeline FLAGGED 4,046 entries as suspected fabrications with 91% precision, so the true number of references-to-nothing is approximately 4046 * 0.91 \u2248 3682, not exactly 4,046. The claim under verification is about what the paper REPORTS, and the paper reports the figure 4,046 explicitly. No precision adjustment is applied to the headline number in the paper's own text.",
      "breaks_proof": false
    },
    {
      "question": "Are there any independent retractions, corrections, or rebuttals of the Topaz et al. 2026 finding that would change the headline numbers?",
      "verification_performed": "The paper was published as a Lancet correspondence on May 9, 2026 (vol 407, pp 1779-1781); today's date is May 20, 2026. Eleven days post-publication leaves essentially no time for a formal retraction or correction to appear. No such notice is attached to the uploaded PDF. The user-supplied PDF and supplement are the canonical source for what the paper reports.",
      "finding": "No retractions, corrections, or errata are known for the source paper at the time of this proof. The headline figures (2.5M / Jan 2023 - Feb 2026 / 4,046 / 2,810) appear unchanged.",
      "breaks_proof": false
    },
    {
      "question": "Could the date phrasing 'January 2023 through February 2026' overstate the actual interval?",
      "verification_performed": "Compared the claim's interval ('January 2023 through February 2026') against the paper's exact interval ('Jan 1, 2023, to Feb 18, 2026'). The claim describes both bounds at month resolution; the paper specifies day-of-month bounds within those months. The paper also notes the early-2026 quarter is incomplete (Jan 1 - Feb 18 represents the first 7 weeks of 2026).",
      "finding": "The claim's month-level phrasing is consistent with the paper. It does not imply analysis through Feb 28, 2026; saying 'through February 2026' to describe a period ending Feb 18, 2026 is a common and accurate summary. No overstatement.",
      "breaks_proof": false
    },
    {
      "question": "Is the corpus-size figure '2.5 million' a fair rounding of the paper's actual 2,471,758?",
      "verification_performed": "2,471,758 rounded to one significant figure beyond 'millions' gives 2.5 million (since 2.47 rounds up to 2.5 at 2 sig figs). The paper itself uses '2\u00b75 million' in its title and in the supplementary appendix subtitle. The user's claim adopts the paper's own rounding.",
      "finding": "'2.5 million' is the paper's own headline rounding. Match is exact.",
      "breaks_proof": false
    }
  ],
  "verdict": {
    "value": "PROVED",
    "qualified": false,
    "qualifier": null,
    "reason": null
  },
  "key_results": {
    "n_holding": 4,
    "n_total": 4,
    "claim_holds": true
  },
  "generator": {
    "name": "proof-engine",
    "version": "1.34.0",
    "repo": "https://github.com/yaniv-golan/proof-engine",
    "generated_at": "2026-05-20"
  },
  "sub_claim_results": [
    {
      "id": "SC1",
      "n_confirming": 1,
      "threshold": 1,
      "holds": true
    },
    {
      "id": "SC2",
      "n_confirming": 1,
      "threshold": 1,
      "holds": true
    },
    {
      "id": "SC3",
      "n_confirming": 1,
      "threshold": 1,
      "holds": true
    },
    {
      "id": "SC4",
      "n_confirming": 1,
      "threshold": 1,
      "holds": true
    }
  ],
  "proof_py_url": "/proofs/topaz-et-al-2026-analyzed-2-5-million-biomedical-papers-in-pubmed-central-from/proof.py",
  "citation": {
    "doi": "10.5281/zenodo.20306620",
    "concept_doi": "10.5281/zenodo.20306619",
    "url": "https://proofengine.info/proofs/topaz-et-al-2026-analyzed-2-5-million-biomedical-papers-in-pubmed-central-from/",
    "author": "Proof Engine",
    "cite_bib_url": "/proofs/topaz-et-al-2026-analyzed-2-5-million-biomedical-papers-in-pubmed-central-from/cite.bib",
    "cite_ris_url": "/proofs/topaz-et-al-2026-analyzed-2-5-million-biomedical-papers-in-pubmed-central-from/cite.ris"
  },
  "depends_on": []
}