{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:GW3OUI7HWBXH6JZIFOK4KIGXPU","short_pith_number":"pith:GW3OUI7H","canonical_record":{"source":{"id":"2505.16178","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-05-22T03:34:29Z","cross_cats_sorted":[],"title_canon_sha256":"9e4b8cd92b823edd9f201b2f9912206b4ce49fa3f02551ed564df97488ab6e9b","abstract_canon_sha256":"78d82937124f656f5a309754837d1bd4bf7fee6d3213e1c40304ffe4fe509ee2"},"schema_version":"1.0"},"canonical_sha256":"35b6ea23e7b06e7f27282b95c520d77d1d3fb6e15c6d3c38409c2bf9f793d9d7","source":{"kind":"arxiv","id":"2505.16178","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.16178","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"arxiv_version","alias_value":"2505.16178v2","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.16178","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"pith_short_12","alias_value":"GW3OUI7HWBXH","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"pith_short_16","alias_value":"GW3OUI7HWBXH6JZI","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"pith_short_8","alias_value":"GW3OUI7H","created_at":"2026-05-29T01:04:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:GW3OUI7HWBXH6JZIFOK4KIGXPU","target":"record","payload":{"canonical_record":{"source":{"id":"2505.16178","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-05-22T03:34:29Z","cross_cats_sorted":[],"title_canon_sha256":"9e4b8cd92b823edd9f201b2f9912206b4ce49fa3f02551ed564df97488ab6e9b","abstract_canon_sha256":"78d82937124f656f5a309754837d1bd4bf7fee6d3213e1c40304ffe4fe509ee2"},"schema_version":"1.0"},"canonical_sha256":"35b6ea23e7b06e7f27282b95c520d77d1d3fb6e15c6d3c38409c2bf9f793d9d7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:04:53.290256Z","signature_b64":"1gucMjjmwrWOAQgToZhOyFX9zjrshRLlkufvp0/QofAkhlTPBmRcQfYKFhipUgZe7srKMfVuar9jj8rsokUdDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"35b6ea23e7b06e7f27282b95c520d77d1d3fb6e15c6d3c38409c2bf9f793d9d7","last_reissued_at":"2026-05-29T01:04:53.289805Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:04:53.289805Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2505.16178","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:04:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Z59unmtfWwryKUJ0tQhsRfQPKPK6EBHFelvbytwJMQv26EM4kNAkaKA0/fa96npYpJIlT6FSyvui4ZHUVgz4Dw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T01:27:42.512334Z"},"content_sha256":"925a22853da5645afe12ed46b14bde5c4c8ea310cb3a729267717a373c9c2130","schema_version":"1.0","event_id":"sha256:925a22853da5645afe12ed46b14bde5c4c8ea310cb3a729267717a373c9c2130"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:GW3OUI7HWBXH6JZIFOK4KIGXPU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Understanding Fact Recall in Language Models: Why Two-Stage Training Encourages Memorization but Mixed Training Teaches Knowledge","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Benjamin Heinzerling, Dongyuan Li, Kentaro Inui, Ying Zhang","submitted_at":"2025-05-22T03:34:29Z","abstract_excerpt":"While fine-tuning is the standard for injecting factual knowledge into large language models (LLMs), the mechanisms enabling reliable fact recall via unseen queries remain poorly understood. Common two-stage training strategies, which sequentially train on fact storage and query formats, often cause rote memorization. In contrast, mixed training jointly optimizes both formats and exhibits superior generalized recall. We investigate this success by comparing the two paradigms across 2.8$\\sim$4B LLMs and identify the core mechanism: the joint optimization objective in mixed training induces grad"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.16178","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2505.16178/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:04:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QWLDG/+SBbmQsgq72ltJl09daYqKWCIPNCOF0mVRa1sYmkaj6dWu8xCwaaTV8RASd1BMpnTLITMozyDx2VYXAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T01:27:42.512704Z"},"content_sha256":"4d88d774a561d80f28eff30b97c13a9c1a3f6aeef5d2fd3ea05273061de62750","schema_version":"1.0","event_id":"sha256:4d88d774a561d80f28eff30b97c13a9c1a3f6aeef5d2fd3ea05273061de62750"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU/bundle.json","state_url":"https://pith.science/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T01:27:42Z","links":{"resolver":"https://pith.science/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU","bundle":"https://pith.science/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU/bundle.json","state":"https://pith.science/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/GW3OUI7HWBXH6JZIFOK4KIGXPU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:GW3OUI7HWBXH6JZIFOK4KIGXPU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"78d82937124f656f5a309754837d1bd4bf7fee6d3213e1c40304ffe4fe509ee2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-05-22T03:34:29Z","title_canon_sha256":"9e4b8cd92b823edd9f201b2f9912206b4ce49fa3f02551ed564df97488ab6e9b"},"schema_version":"1.0","source":{"id":"2505.16178","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2505.16178","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"arxiv_version","alias_value":"2505.16178v2","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2505.16178","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"pith_short_12","alias_value":"GW3OUI7HWBXH","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"pith_short_16","alias_value":"GW3OUI7HWBXH6JZI","created_at":"2026-05-29T01:04:53Z"},{"alias_kind":"pith_short_8","alias_value":"GW3OUI7H","created_at":"2026-05-29T01:04:53Z"}],"graph_snapshots":[{"event_id":"sha256:4d88d774a561d80f28eff30b97c13a9c1a3f6aeef5d2fd3ea05273061de62750","target":"graph","created_at":"2026-05-29T01:04:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2505.16178/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While fine-tuning is the standard for injecting factual knowledge into large language models (LLMs), the mechanisms enabling reliable fact recall via unseen queries remain poorly understood. Common two-stage training strategies, which sequentially train on fact storage and query formats, often cause rote memorization. In contrast, mixed training jointly optimizes both formats and exhibits superior generalized recall. We investigate this success by comparing the two paradigms across 2.8$\\sim$4B LLMs and identify the core mechanism: the joint optimization objective in mixed training induces grad","authors_text":"Benjamin Heinzerling, Dongyuan Li, Kentaro Inui, Ying Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-05-22T03:34:29Z","title":"Understanding Fact Recall in Language Models: Why Two-Stage Training Encourages Memorization but Mixed Training Teaches Knowledge"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2505.16178","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:925a22853da5645afe12ed46b14bde5c4c8ea310cb3a729267717a373c9c2130","target":"record","created_at":"2026-05-29T01:04:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"78d82937124f656f5a309754837d1bd4bf7fee6d3213e1c40304ffe4fe509ee2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2025-05-22T03:34:29Z","title_canon_sha256":"9e4b8cd92b823edd9f201b2f9912206b4ce49fa3f02551ed564df97488ab6e9b"},"schema_version":"1.0","source":{"id":"2505.16178","kind":"arxiv","version":2}},"canonical_sha256":"35b6ea23e7b06e7f27282b95c520d77d1d3fb6e15c6d3c38409c2bf9f793d9d7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"35b6ea23e7b06e7f27282b95c520d77d1d3fb6e15c6d3c38409c2bf9f793d9d7","first_computed_at":"2026-05-29T01:04:53.289805Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:04:53.289805Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1gucMjjmwrWOAQgToZhOyFX9zjrshRLlkufvp0/QofAkhlTPBmRcQfYKFhipUgZe7srKMfVuar9jj8rsokUdDg==","signature_status":"signed_v1","signed_at":"2026-05-29T01:04:53.290256Z","signed_message":"canonical_sha256_bytes"},"source_id":"2505.16178","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:925a22853da5645afe12ed46b14bde5c4c8ea310cb3a729267717a373c9c2130","sha256:4d88d774a561d80f28eff30b97c13a9c1a3f6aeef5d2fd3ea05273061de62750"],"state_sha256":"6b18f5ba1230d3418a0e390a51772d585bfef305fd52c15f4001f20713ff9857"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9BeH3oRz+KfeRGxvrwv+Xd92BvFVPYtBIBVS9O2vExvjrDrZnldk24rMr4NMR4mPwZiBlW5JkLaoeiaEnh+DAA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T01:27:42.514598Z","bundle_sha256":"ce8abaf1790fe077d7032f0fd3e5bd00a4d6a7542cf949b1f8a1e55aa33b62ef"}}