{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:OELEPP4IGTFEDTH3YY67TUBHDG","short_pith_number":"pith:OELEPP4I","canonical_record":{"source":{"id":"2512.20732","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","cross_cats_sorted":["cs.AI","cs.SE"],"title_canon_sha256":"5f767b424c1af0fdea74e67824bb6478c1ed80b804f4a7841fcad123464b4aec","abstract_canon_sha256":"a92a818082904ecc9cfd52121ac018927b37a5e0da072f19dbac9795bb191d17"},"schema_version":"1.0"},"canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","source":{"kind":"arxiv","id":"2512.20732","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.20732","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"arxiv_version","alias_value":"2512.20732v2","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.20732","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_12","alias_value":"OELEPP4IGTFE","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_16","alias_value":"OELEPP4IGTFEDTH3","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_8","alias_value":"OELEPP4I","created_at":"2026-06-01T01:03:47Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:OELEPP4IGTFEDTH3YY67TUBHDG","target":"record","payload":{"canonical_record":{"source":{"id":"2512.20732","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","cross_cats_sorted":["cs.AI","cs.SE"],"title_canon_sha256":"5f767b424c1af0fdea74e67824bb6478c1ed80b804f4a7841fcad123464b4aec","abstract_canon_sha256":"a92a818082904ecc9cfd52121ac018927b37a5e0da072f19dbac9795bb191d17"},"schema_version":"1.0"},"canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-01T01:03:47.950438Z","signature_b64":"mlCMmDtiC3NivWFNu2Uhr7qfdayMCiz7oMETTp6+97+RX/A28X3Kk13Z+ZY+d2HbN6+WOkkmoEHjf2B7uaf1Cg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","last_reissued_at":"2026-06-01T01:03:47.949108Z","signature_status":"signed_v1","first_computed_at":"2026-06-01T01:03:47.949108Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2512.20732","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:03:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TBis5sXLH9fpuhUmdxGJmwtDd/OJxTscYgmnOrzdtL4MuVhQRvF6BvYZ1H7aP0GOq3zrakQJMcfMEcIGoALhDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T17:45:45.722094Z"},"content_sha256":"a3289dadf6475acaccd6939178edc55ba022db1e767f0016a97076866db6f262","schema_version":"1.0","event_id":"sha256:a3289dadf6475acaccd6939178edc55ba022db1e767f0016a97076866db6f262"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:OELEPP4IGTFEDTH3YY67TUBHDG","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"FEM-Bench: A Structured Scientific Reasoning Benchmark for Evaluating Code-Generating LLMs","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.SE"],"primary_cat":"cs.LG","authors_text":"Emma Lejeune, Erfan Hamdi, Joel Shor, Saeed Mohammadzadeh","submitted_at":"2025-12-23T19:40:51Z","abstract_excerpt":"As LLMs advance their reasoning capabilities about the physical world, the absence of rigorous benchmarks for evaluating their ability to generate scientifically valid physical models has become a critical gap. Computational mechanics, which develops and applies mathematical models and numerical methods to predict the behavior of physical systems under forces, deformation, and constraints, provides an ideal foundation for structured scientific reasoning evaluation. Problems follow clear mathematical structure, enforce strict physical and numerical constraints, and support objective verificatio"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.20732","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2512.20732/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-01T01:03:47Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"j2zTs5Kh6fpHySbmIMB6nvYTGStwQqMrRoQr1ECcIIKI7lv89RYYBGUwL00vE4FrstzIEaNJAYw4OCOhcuyiDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-05T17:45:45.722535Z"},"content_sha256":"ad737f1c2cbe0c1c57be71635dfadc217d77b5d2fa896ab636da1b772219d932","schema_version":"1.0","event_id":"sha256:ad737f1c2cbe0c1c57be71635dfadc217d77b5d2fa896ab636da1b772219d932"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OELEPP4IGTFEDTH3YY67TUBHDG/bundle.json","state_url":"https://pith.science/pith/OELEPP4IGTFEDTH3YY67TUBHDG/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OELEPP4IGTFEDTH3YY67TUBHDG/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-05T17:45:45Z","links":{"resolver":"https://pith.science/pith/OELEPP4IGTFEDTH3YY67TUBHDG","bundle":"https://pith.science/pith/OELEPP4IGTFEDTH3YY67TUBHDG/bundle.json","state":"https://pith.science/pith/OELEPP4IGTFEDTH3YY67TUBHDG/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OELEPP4IGTFEDTH3YY67TUBHDG/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:OELEPP4IGTFEDTH3YY67TUBHDG","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"a92a818082904ecc9cfd52121ac018927b37a5e0da072f19dbac9795bb191d17","cross_cats_sorted":["cs.AI","cs.SE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","title_canon_sha256":"5f767b424c1af0fdea74e67824bb6478c1ed80b804f4a7841fcad123464b4aec"},"schema_version":"1.0","source":{"id":"2512.20732","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2512.20732","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"arxiv_version","alias_value":"2512.20732v2","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2512.20732","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_12","alias_value":"OELEPP4IGTFE","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_16","alias_value":"OELEPP4IGTFEDTH3","created_at":"2026-06-01T01:03:47Z"},{"alias_kind":"pith_short_8","alias_value":"OELEPP4I","created_at":"2026-06-01T01:03:47Z"}],"graph_snapshots":[{"event_id":"sha256:ad737f1c2cbe0c1c57be71635dfadc217d77b5d2fa896ab636da1b772219d932","target":"graph","created_at":"2026-06-01T01:03:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2512.20732/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"As LLMs advance their reasoning capabilities about the physical world, the absence of rigorous benchmarks for evaluating their ability to generate scientifically valid physical models has become a critical gap. Computational mechanics, which develops and applies mathematical models and numerical methods to predict the behavior of physical systems under forces, deformation, and constraints, provides an ideal foundation for structured scientific reasoning evaluation. Problems follow clear mathematical structure, enforce strict physical and numerical constraints, and support objective verificatio","authors_text":"Emma Lejeune, Erfan Hamdi, Joel Shor, Saeed Mohammadzadeh","cross_cats":["cs.AI","cs.SE"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","title":"FEM-Bench: A Structured Scientific Reasoning Benchmark for Evaluating Code-Generating LLMs"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2512.20732","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a3289dadf6475acaccd6939178edc55ba022db1e767f0016a97076866db6f262","target":"record","created_at":"2026-06-01T01:03:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"a92a818082904ecc9cfd52121ac018927b37a5e0da072f19dbac9795bb191d17","cross_cats_sorted":["cs.AI","cs.SE"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-12-23T19:40:51Z","title_canon_sha256":"5f767b424c1af0fdea74e67824bb6478c1ed80b804f4a7841fcad123464b4aec"},"schema_version":"1.0","source":{"id":"2512.20732","kind":"arxiv","version":2}},"canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"711647bf8834ca41ccfbc63df9d027199211136275390bca9324d6bfd895f4f0","first_computed_at":"2026-06-01T01:03:47.949108Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-01T01:03:47.949108Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"mlCMmDtiC3NivWFNu2Uhr7qfdayMCiz7oMETTp6+97+RX/A28X3Kk13Z+ZY+d2HbN6+WOkkmoEHjf2B7uaf1Cg==","signature_status":"signed_v1","signed_at":"2026-06-01T01:03:47.950438Z","signed_message":"canonical_sha256_bytes"},"source_id":"2512.20732","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a3289dadf6475acaccd6939178edc55ba022db1e767f0016a97076866db6f262","sha256:ad737f1c2cbe0c1c57be71635dfadc217d77b5d2fa896ab636da1b772219d932"],"state_sha256":"586ff107a17df9fe7ea11eab04493430b5a737201c8e18a2e1e813b59a0ba895"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8yQyhIL3v0G1xVnZnl4tzGnhJNTQjk0i5fVhTIclZTTbsXPjPHiK0KgSer3jiWDODmBR25qoZWrJ3SHfeT6AAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-05T17:45:45.724764Z","bundle_sha256":"d6ddfa932807cba764fe19c39facbe66a17921c15e0f5eb644527a5c9a16c738"}}