{
  "prefix": {
    "pe": "https://proofengine.info/proofs/the-pattern-matching-limitations-identified-in-gsm-noop-are-practically/",
    "prov": "http://www.w3.org/ns/prov#",
    "xsd": "http://www.w3.org/2001/XMLSchema#",
    "schema": "http://schema.org/"
  },
  "entity": {
    "pe:claim": {
      "prov:type": "pe:Claim",
      "prov:label": "The pattern-matching limitations identified in GSM-NoOp are practically surmountable when LLMs are allowed to offload formal reasoning steps to code execution.",
      "prov:value": "The pattern-matching limitations identified in GSM-NoOp are practically surmountable when LLMs are allowed to offload formal reasoning steps to code execution."
    },
    "pe:evidence-B1": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1: GSM-Symbolic/NoOp paper (Mirzadeh et al., ICLR 2025)",
      "pe:factId": "B1",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://arxiv.org/html/2410.05229v1",
      "pe:sourceName": "Mirzadeh et al., GSM-Symbolic (ICLR 2025)"
    },
    "pe:evidence-B2": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1: Independent analysis of GSM-NoOp findings",
      "pe:factId": "B2",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://www.emergentmind.com/topics/gsm-symbolic-benchmark",
      "pe:sourceName": "EmergentMind GSM-Symbolic Analysis"
    },
    "pe:evidence-B3": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1: Tech press coverage of GSM-NoOp results",
      "pe:factId": "B3",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://appleinsider.com/articles/24/10/12/apples-study-proves-that-llm-based-ai-models-are-flawed-because-they-cannot-reason",
      "pe:sourceName": "AppleInsider coverage of GSM-Symbolic research"
    },
    "pe:evidence-B8": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC1: Gary Marcus analysis of GSM-Symbolic findings",
      "pe:factId": "B8",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://garymarcus.substack.com/p/llms-dont-do-formal-reasoning-and",
      "pe:sourceName": "Gary Marcus, 'LLMs don't do formal reasoning' (2024)"
    },
    "pe:evidence-B4": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2: PAL \u2014 Program-aided Language Models (Gao et al., ICML 2023)",
      "pe:factId": "B4",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://ar5iv.labs.arxiv.org/html/2211.10435",
      "pe:sourceName": "Gao et al., PAL: Program-aided Language Models (ICML 2023)"
    },
    "pe:evidence-B5": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2: Survey on code-enhanced reasoning (2025)",
      "pe:factId": "B5",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://arxiv.org/html/2502.19411",
      "pe:sourceName": "Code to Think, Think to Code: Survey on Code-Enhanced Reasoning (2025)"
    },
    "pe:evidence-B6": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2: IIPC execution-driven reasoning augmentation (2025)",
      "pe:factId": "B6",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://arxiv.org/html/2602.03950",
      "pe:sourceName": "IIPC: Execution-Driven Reasoning Augmentation (2025)"
    },
    "pe:evidence-B7": {
      "prov:type": "pe:EmpiricalEvidence",
      "prov:label": "SC2: Proof Engine as meta-evidence \u2014 this system itself",
      "pe:factId": "B7",
      "pe:evidenceType": "empirical",
      "pe:sourceUrl": "https://github.com/yaniv-golan/proof-engine",
      "pe:sourceName": "Proof Engine \u2014 meta-evidence (this system)"
    },
    "pe:evidence-A1": {
      "prov:type": "pe:ComputedEvidence",
      "prov:label": "SC1 verified source count",
      "pe:factId": "A1",
      "pe:evidenceType": "computed",
      "pe:method": "count(verified sc1 citations) = 4",
      "pe:result": "4 independent sources confirmed SC1"
    },
    "pe:evidence-A2": {
      "prov:type": "pe:ComputedEvidence",
      "prov:label": "SC2 verified source count",
      "pe:factId": "A2",
      "pe:evidenceType": "computed",
      "pe:method": "count(verified sc2 citations) = 4",
      "pe:result": "4 independent sources confirmed SC2"
    },
    "pe:verdict": {
      "prov:type": "pe:Verdict",
      "prov:label": "Verdict: PROVED",
      "prov:value": "PROVED",
      "pe:doi": "10.5281/zenodo.19467525"
    }
  },
  "activity": {
    "pe:verify-B1": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B1: Mirzadeh et al., GSM-Symbolic (ICLR 2025)",
      "pe:status": "verified",
      "pe:method": "fragment",
      "pe:fetchMode": "live"
    },
    "pe:verify-B2": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B2: EmergentMind GSM-Symbolic Analysis",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B3": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B3: AppleInsider coverage of GSM-Symbolic research",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B8": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B8: Gary Marcus, 'LLMs don't do formal reasoning' (2024)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B4": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B4: Gao et al., PAL: Program-aided Language Models (ICML 2023)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B5": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B5: Code to Think, Think to Code: Survey on Code-Enhanced Reasoning (2025)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B6": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B6: IIPC: Execution-Driven Reasoning Augmentation (2025)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:verify-B7": {
      "prov:type": "pe:CitationVerification",
      "prov:label": "Verify B7: Proof Engine \u2014 meta-evidence (this system)",
      "pe:status": "verified",
      "pe:method": "full_quote",
      "pe:fetchMode": "live"
    },
    "pe:determine-verdict": {
      "prov:type": "pe:VerdictDetermination",
      "prov:label": "Determine verdict from evidence",
      "prov:endTime": "2026-04-08"
    }
  },
  "agent": {
    "pe:proof-engine": {
      "prov:type": "prov:SoftwareAgent",
      "prov:label": "Proof Engine",
      "schema:version": "1.10.0",
      "schema:url": "https://github.com/yaniv-golan/proof-engine"
    }
  },
  "wasGeneratedBy": {
    "pe:gen-B1": {
      "prov:entity": "pe:evidence-B1",
      "prov:activity": "pe:verify-B1"
    },
    "pe:gen-B2": {
      "prov:entity": "pe:evidence-B2",
      "prov:activity": "pe:verify-B2"
    },
    "pe:gen-B3": {
      "prov:entity": "pe:evidence-B3",
      "prov:activity": "pe:verify-B3"
    },
    "pe:gen-B8": {
      "prov:entity": "pe:evidence-B8",
      "prov:activity": "pe:verify-B8"
    },
    "pe:gen-B4": {
      "prov:entity": "pe:evidence-B4",
      "prov:activity": "pe:verify-B4"
    },
    "pe:gen-B5": {
      "prov:entity": "pe:evidence-B5",
      "prov:activity": "pe:verify-B5"
    },
    "pe:gen-B6": {
      "prov:entity": "pe:evidence-B6",
      "prov:activity": "pe:verify-B6"
    },
    "pe:gen-B7": {
      "prov:entity": "pe:evidence-B7",
      "prov:activity": "pe:verify-B7"
    },
    "pe:gen-verdict": {
      "prov:entity": "pe:verdict",
      "prov:activity": "pe:determine-verdict"
    }
  },
  "wasDerivedFrom": {},
  "wasAttributedTo": {
    "pe:attr-verdict": {
      "prov:entity": "pe:verdict",
      "prov:agent": "pe:proof-engine"
    }
  },
  "used": {
    "pe:used-B1": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B1"
    },
    "pe:used-B2": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B2"
    },
    "pe:used-B3": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B3"
    },
    "pe:used-B8": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B8"
    },
    "pe:used-B4": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B4"
    },
    "pe:used-B5": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B5"
    },
    "pe:used-B6": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B6"
    },
    "pe:used-B7": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-B7"
    },
    "pe:used-A1": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-A1"
    },
    "pe:used-A2": {
      "prov:activity": "pe:determine-verdict",
      "prov:entity": "pe:evidence-A2"
    }
  }
}