{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:XAW366NP42VQE364EK27H34OZX","short_pith_number":"pith:XAW366NP","canonical_record":{"source":{"id":"2601.21725","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T13:48:43Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"2220e88d330656d813945ac6c6ba6f2d3e3c3b04eed256c69c0aa39653b1f41d","abstract_canon_sha256":"674e7c60ca153f66799cf4b4e228fa4ead44fc3727b8d1a5d32e5417616fcb1e"},"schema_version":"1.0"},"canonical_sha256":"b82dbf79afe6ab026fdc22b5f3ef8ecdccd31b111204a086288adf69ad803152","source":{"kind":"arxiv","id":"2601.21725","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.21725","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"arxiv_version","alias_value":"2601.21725v2","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.21725","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_12","alias_value":"XAW366NP42VQ","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_16","alias_value":"XAW366NP42VQE364","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_8","alias_value":"XAW366NP","created_at":"2026-05-29T01:04:36Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:XAW366NP42VQE364EK27H34OZX","target":"record","payload":{"canonical_record":{"source":{"id":"2601.21725","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T13:48:43Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"2220e88d330656d813945ac6c6ba6f2d3e3c3b04eed256c69c0aa39653b1f41d","abstract_canon_sha256":"674e7c60ca153f66799cf4b4e228fa4ead44fc3727b8d1a5d32e5417616fcb1e"},"schema_version":"1.0"},"canonical_sha256":"b82dbf79afe6ab026fdc22b5f3ef8ecdccd31b111204a086288adf69ad803152","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-29T01:04:36.680197Z","signature_b64":"TNz9IKNmU7RniCtWWd2HU6Mc4oiNeTKstc1xvD/Z33GJth8DICrSjn/kWE90cl4ihZHjGxN3z2rRo23cIeO5CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b82dbf79afe6ab026fdc22b5f3ef8ecdccd31b111204a086288adf69ad803152","last_reissued_at":"2026-05-29T01:04:36.679609Z","signature_status":"signed_v1","first_computed_at":"2026-05-29T01:04:36.679609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2601.21725","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:04:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Nus/7bJZWevrqJ5iB0hUl7X3XPdbZuIG+IjESvmkaMylU/4Ip53qGZDVH32lFXTKGIOo5A2I9fqC2V8MGg5tCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:45:35.098913Z"},"content_sha256":"1c1df8fbeaa4463d6d72b74c5c210e096b3ca23b9a9ae0d2dde3cf83e3cb7cd2","schema_version":"1.0","event_id":"sha256:1c1df8fbeaa4463d6d72b74c5c210e096b3ca23b9a9ae0d2dde3cf83e3cb7cd2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:XAW366NP42VQE364EK27H34OZX","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Procedural Pretraining: Warming Up Language Models with Abstract Data","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"Anton van den Hengel, Damien Teney, Hemanth Saratchandran, Liangze Jiang, Zachary Shinnick","submitted_at":"2026-01-29T13:48:43Z","abstract_excerpt":"Pretraining language models directly on web-scale corpora is the de facto paradigm. We study an alternative where the model is initially exposed to abstract structured data to ease the subsequent acquisition of rich semantic knowledge, much like humans learning simple logic and mathematics before higher reasoning. We focus on procedural data, generated by formal languages and other simple algorithms, as such abstract data. We first diagnose the algorithmic skills that different forms of procedural data can improve, often significantly. For example, the accuracy of context recall (Needle-in-a-h"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.21725","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.21725/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-29T01:04:36Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ryacVaV+QZyuGi7/3xbAklf9EWm7id43JZ7h2Q61X3bUTN7VjNcYQFB4YuD8mMHrivjdiSo4r0yhJ3dlwfXWAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T08:45:35.099300Z"},"content_sha256":"a0d14c75d58412a3e68ae714ffc8f8a68c643a9ab0a0130bfc77190ea8c873d2","schema_version":"1.0","event_id":"sha256:a0d14c75d58412a3e68ae714ffc8f8a68c643a9ab0a0130bfc77190ea8c873d2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XAW366NP42VQE364EK27H34OZX/bundle.json","state_url":"https://pith.science/pith/XAW366NP42VQE364EK27H34OZX/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XAW366NP42VQE364EK27H34OZX/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T08:45:35Z","links":{"resolver":"https://pith.science/pith/XAW366NP42VQE364EK27H34OZX","bundle":"https://pith.science/pith/XAW366NP42VQE364EK27H34OZX/bundle.json","state":"https://pith.science/pith/XAW366NP42VQE364EK27H34OZX/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XAW366NP42VQE364EK27H34OZX/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:XAW366NP42VQE364EK27H34OZX","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"674e7c60ca153f66799cf4b4e228fa4ead44fc3727b8d1a5d32e5417616fcb1e","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T13:48:43Z","title_canon_sha256":"2220e88d330656d813945ac6c6ba6f2d3e3c3b04eed256c69c0aa39653b1f41d"},"schema_version":"1.0","source":{"id":"2601.21725","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2601.21725","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"arxiv_version","alias_value":"2601.21725v2","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.21725","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_12","alias_value":"XAW366NP42VQ","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_16","alias_value":"XAW366NP42VQE364","created_at":"2026-05-29T01:04:36Z"},{"alias_kind":"pith_short_8","alias_value":"XAW366NP","created_at":"2026-05-29T01:04:36Z"}],"graph_snapshots":[{"event_id":"sha256:a0d14c75d58412a3e68ae714ffc8f8a68c643a9ab0a0130bfc77190ea8c873d2","target":"graph","created_at":"2026-05-29T01:04:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2601.21725/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Pretraining language models directly on web-scale corpora is the de facto paradigm. We study an alternative where the model is initially exposed to abstract structured data to ease the subsequent acquisition of rich semantic knowledge, much like humans learning simple logic and mathematics before higher reasoning. We focus on procedural data, generated by formal languages and other simple algorithms, as such abstract data. We first diagnose the algorithmic skills that different forms of procedural data can improve, often significantly. For example, the accuracy of context recall (Needle-in-a-h","authors_text":"Anton van den Hengel, Damien Teney, Hemanth Saratchandran, Liangze Jiang, Zachary Shinnick","cross_cats":["cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T13:48:43Z","title":"Procedural Pretraining: Warming Up Language Models with Abstract Data"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2601.21725","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1c1df8fbeaa4463d6d72b74c5c210e096b3ca23b9a9ae0d2dde3cf83e3cb7cd2","target":"record","created_at":"2026-05-29T01:04:36Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"674e7c60ca153f66799cf4b4e228fa4ead44fc3727b8d1a5d32e5417616fcb1e","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-29T13:48:43Z","title_canon_sha256":"2220e88d330656d813945ac6c6ba6f2d3e3c3b04eed256c69c0aa39653b1f41d"},"schema_version":"1.0","source":{"id":"2601.21725","kind":"arxiv","version":2}},"canonical_sha256":"b82dbf79afe6ab026fdc22b5f3ef8ecdccd31b111204a086288adf69ad803152","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b82dbf79afe6ab026fdc22b5f3ef8ecdccd31b111204a086288adf69ad803152","first_computed_at":"2026-05-29T01:04:36.679609Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-29T01:04:36.679609Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"TNz9IKNmU7RniCtWWd2HU6Mc4oiNeTKstc1xvD/Z33GJth8DICrSjn/kWE90cl4ihZHjGxN3z2rRo23cIeO5CQ==","signature_status":"signed_v1","signed_at":"2026-05-29T01:04:36.680197Z","signed_message":"canonical_sha256_bytes"},"source_id":"2601.21725","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1c1df8fbeaa4463d6d72b74c5c210e096b3ca23b9a9ae0d2dde3cf83e3cb7cd2","sha256:a0d14c75d58412a3e68ae714ffc8f8a68c643a9ab0a0130bfc77190ea8c873d2"],"state_sha256":"c52dc335893aba419614ca76c466e03db613cef0906ef975dd60fd4f38e8152a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"MZR+1EwTGC64jda9wPOYJRP1V1LHBkNyLJCdK+SONMlngfhnLNNyuJKqSA5vPFKk8PbCTwlLozQf6g7VY/BUBg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T08:45:35.101365Z","bundle_sha256":"a3cd7c0e54151f7091a290903ee09bdf34d6f80148038c5d6e000bc823aa81ea"}}