{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:FUEQA3NZHCRMH7QM4MJMS2DM4G","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"00f30bab3c11f2ffea1607cb33b4d674b3be5e7f106e2ee59730db91fff3c936","cross_cats_sorted":["cs.AI","physics.chem-ph","q-bio.QM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-03T09:21:49Z","title_canon_sha256":"15ea0e8490b681645031a7369cf98fa78ccf56020e8fce25a55646f07efbd399"},"schema_version":"1.0","source":{"id":"2502.01184","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2502.01184","created_at":"2026-05-26T02:03:48Z"},{"alias_kind":"arxiv_version","alias_value":"2502.01184v2","created_at":"2026-05-26T02:03:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2502.01184","created_at":"2026-05-26T02:03:48Z"},{"alias_kind":"pith_short_12","alias_value":"FUEQA3NZHCRM","created_at":"2026-05-26T02:03:48Z"},{"alias_kind":"pith_short_16","alias_value":"FUEQA3NZHCRMH7QM","created_at":"2026-05-26T02:03:48Z"},{"alias_kind":"pith_short_8","alias_value":"FUEQA3NZ","created_at":"2026-05-26T02:03:48Z"}],"graph_snapshots":[{"event_id":"sha256:85266367bf7869c40b28da9ce02db2d6169e31630483373d2aeac1004093931e","target":"graph","created_at":"2026-05-26T02:03:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2502.01184/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Molecular representation learning methods typically tokenize molecules as individual atoms or use rigid, rule-based fragment decompositions, limiting their ability to capture meaningful chemical substructure context. We introduce FragmentNet, a graph-to-sequence model built around a novel adaptive, learned tokenizer that decomposes molecular graphs into chemically valid fragments of adjustable granularity, complemented by chemically aware spatial positional encodings that preserve molecular topology in the resulting sequence. Extending masked pre-training strategies from natural language proce","authors_text":"Aditi Misra, Ankur Samanta, Christian McIntosh Clarke, Jayakumar Rajadas, Rohan Gupta","cross_cats":["cs.AI","physics.chem-ph","q-bio.QM"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-03T09:21:49Z","title":"FragmentNet: Adaptive Graph Fragmentation for Graph-to-Sequence Molecular Representation Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2502.01184","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a4d034af7b932bde9fe43ff1efc16a8c39810c115668727d139bfc41899f4ada","target":"record","created_at":"2026-05-26T02:03:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"00f30bab3c11f2ffea1607cb33b4d674b3be5e7f106e2ee59730db91fff3c936","cross_cats_sorted":["cs.AI","physics.chem-ph","q-bio.QM"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-02-03T09:21:49Z","title_canon_sha256":"15ea0e8490b681645031a7369cf98fa78ccf56020e8fce25a55646f07efbd399"},"schema_version":"1.0","source":{"id":"2502.01184","kind":"arxiv","version":2}},"canonical_sha256":"2d09006db938a2c3fe0ce312c9686ce1994faa93a9e96128dbe3644d9998a021","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2d09006db938a2c3fe0ce312c9686ce1994faa93a9e96128dbe3644d9998a021","first_computed_at":"2026-05-26T02:03:48.402463Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:03:48.402463Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"pi0UMpPC5z4faSi3IGsFD3fwCIzz9KqFCgU9ZjqEqhlaLU0Ut3tdhZSfxyhEW8g2aOd76C/PoX3NIvVsIW22BQ==","signature_status":"signed_v1","signed_at":"2026-05-26T02:03:48.403409Z","signed_message":"canonical_sha256_bytes"},"source_id":"2502.01184","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a4d034af7b932bde9fe43ff1efc16a8c39810c115668727d139bfc41899f4ada","sha256:85266367bf7869c40b28da9ce02db2d6169e31630483373d2aeac1004093931e"],"state_sha256":"a6ccb1a61747ef8e0e5056eb517abd8f6e179c76aeafb53d799957cea829b20c"}