{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:DGNFU6XGZCUE4OYM7HEFYNSFRZ","short_pith_number":"pith:DGNFU6XG","canonical_record":{"source":{"id":"2605.21625","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T18:36:57Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"f6e8d7a9b2c46f8eb0e8cfaa02e86d1520aa8031adca37ca830f3c0e060afe86","abstract_canon_sha256":"8604e91700112d90e35a79ee79780b8735c019b37e13e09c565c2ec19c671418"},"schema_version":"1.0"},"canonical_sha256":"199a5a7ae6c8a84e3b0cf9c85c36458e774305f2018d6e372ceaf163508e4ec7","source":{"kind":"arxiv","id":"2605.21625","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21625","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21625v1","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21625","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"pith_short_12","alias_value":"DGNFU6XGZCUE","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"pith_short_16","alias_value":"DGNFU6XGZCUE4OYM","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"pith_short_8","alias_value":"DGNFU6XG","created_at":"2026-05-22T01:03:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:DGNFU6XGZCUE4OYM7HEFYNSFRZ","target":"record","payload":{"canonical_record":{"source":{"id":"2605.21625","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T18:36:57Z","cross_cats_sorted":["cs.AI","cs.CL"],"title_canon_sha256":"f6e8d7a9b2c46f8eb0e8cfaa02e86d1520aa8031adca37ca830f3c0e060afe86","abstract_canon_sha256":"8604e91700112d90e35a79ee79780b8735c019b37e13e09c565c2ec19c671418"},"schema_version":"1.0"},"canonical_sha256":"199a5a7ae6c8a84e3b0cf9c85c36458e774305f2018d6e372ceaf163508e4ec7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:03:25.935225Z","signature_b64":"KbME+9WFPMnf2kHdQFsaIzZkOkiZufjibemAPwRpIwXN+vDS9mKfVs4RlHLoFN4XxwHcCQvuUNprSODwSKEaBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"199a5a7ae6c8a84e3b0cf9c85c36458e774305f2018d6e372ceaf163508e4ec7","last_reissued_at":"2026-05-22T01:03:25.934614Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:03:25.934614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.21625","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"589UYaHqI1eqYGufYUAXpqidhkGjD/xYNIxI9L51lg85TcxhpYew5ac5J1jCyBEsnGbnwY1TEKUzwfIkRv++AQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T17:58:02.119087Z"},"content_sha256":"6f57a9cf6e75b0d9b555f2fdacd4e0e7eba268339a81396734364b6644335437","schema_version":"1.0","event_id":"sha256:6f57a9cf6e75b0d9b555f2fdacd4e0e7eba268339a81396734364b6644335437"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:DGNFU6XGZCUE4OYM7HEFYNSFRZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CL"],"primary_cat":"cs.CV","authors_text":"Aditya Chetan, Bharath Hariharan, Bharath Raj Nagoor Kani, Eric Cai, Noah Snavely, Peeyush Kushwaha, Qianqian Wang, Utkarsh Mall","submitted_at":"2026-05-20T18:36:57Z","abstract_excerpt":"The emergence of Large Vision-Language Models (LVLMs) has significantly advanced video understanding capabilities. However, existing benchmarks focus predominantly on coarse-grained tasks such as action segmentation, classification, captioning, and retrieval. Furthermore, these benchmarks often rely on entities that can be easily identified verbally, like household objects, animals, human subjects, etc., limiting their applicability to complex, in-the-wild video scenarios. But, many applications such as furniture assembly, cooking, etc., require step-by-step fine-grained spatio-temporal unders"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21625","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.21625/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T01:03:25Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ranM51E0wnoDKZQUOH8b9NnwjXoO9A6FiD8nE1MDEtDBgc7RIGvkJxByt9dA1q5K4iic/nd4m9w7fkERmnVzCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-23T17:58:02.119850Z"},"content_sha256":"829d4406034da2b1f903774ecd4b73be8d321d39ce5c843a9a578bd9663dfff0","schema_version":"1.0","event_id":"sha256:829d4406034da2b1f903774ecd4b73be8d321d39ce5c843a9a578bd9663dfff0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ/bundle.json","state_url":"https://pith.science/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-23T17:58:02Z","links":{"resolver":"https://pith.science/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ","bundle":"https://pith.science/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ/bundle.json","state":"https://pith.science/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/DGNFU6XGZCUE4OYM7HEFYNSFRZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:DGNFU6XGZCUE4OYM7HEFYNSFRZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8604e91700112d90e35a79ee79780b8735c019b37e13e09c565c2ec19c671418","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T18:36:57Z","title_canon_sha256":"f6e8d7a9b2c46f8eb0e8cfaa02e86d1520aa8031adca37ca830f3c0e060afe86"},"schema_version":"1.0","source":{"id":"2605.21625","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.21625","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"arxiv_version","alias_value":"2605.21625v1","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.21625","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"pith_short_12","alias_value":"DGNFU6XGZCUE","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"pith_short_16","alias_value":"DGNFU6XGZCUE4OYM","created_at":"2026-05-22T01:03:25Z"},{"alias_kind":"pith_short_8","alias_value":"DGNFU6XG","created_at":"2026-05-22T01:03:25Z"}],"graph_snapshots":[{"event_id":"sha256:829d4406034da2b1f903774ecd4b73be8d321d39ce5c843a9a578bd9663dfff0","target":"graph","created_at":"2026-05-22T01:03:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.21625/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The emergence of Large Vision-Language Models (LVLMs) has significantly advanced video understanding capabilities. However, existing benchmarks focus predominantly on coarse-grained tasks such as action segmentation, classification, captioning, and retrieval. Furthermore, these benchmarks often rely on entities that can be easily identified verbally, like household objects, animals, human subjects, etc., limiting their applicability to complex, in-the-wild video scenarios. But, many applications such as furniture assembly, cooking, etc., require step-by-step fine-grained spatio-temporal unders","authors_text":"Aditya Chetan, Bharath Hariharan, Bharath Raj Nagoor Kani, Eric Cai, Noah Snavely, Peeyush Kushwaha, Qianqian Wang, Utkarsh Mall","cross_cats":["cs.AI","cs.CL"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T18:36:57Z","title":"Flat-Pack Bench: Evaluating Spatio-Temporal Understanding in Large Vision-Language Models through Furniture Assembly"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.21625","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6f57a9cf6e75b0d9b555f2fdacd4e0e7eba268339a81396734364b6644335437","target":"record","created_at":"2026-05-22T01:03:25Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8604e91700112d90e35a79ee79780b8735c019b37e13e09c565c2ec19c671418","cross_cats_sorted":["cs.AI","cs.CL"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-20T18:36:57Z","title_canon_sha256":"f6e8d7a9b2c46f8eb0e8cfaa02e86d1520aa8031adca37ca830f3c0e060afe86"},"schema_version":"1.0","source":{"id":"2605.21625","kind":"arxiv","version":1}},"canonical_sha256":"199a5a7ae6c8a84e3b0cf9c85c36458e774305f2018d6e372ceaf163508e4ec7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"199a5a7ae6c8a84e3b0cf9c85c36458e774305f2018d6e372ceaf163508e4ec7","first_computed_at":"2026-05-22T01:03:25.934614Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T01:03:25.934614Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"KbME+9WFPMnf2kHdQFsaIzZkOkiZufjibemAPwRpIwXN+vDS9mKfVs4RlHLoFN4XxwHcCQvuUNprSODwSKEaBg==","signature_status":"signed_v1","signed_at":"2026-05-22T01:03:25.935225Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.21625","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6f57a9cf6e75b0d9b555f2fdacd4e0e7eba268339a81396734364b6644335437","sha256:829d4406034da2b1f903774ecd4b73be8d321d39ce5c843a9a578bd9663dfff0"],"state_sha256":"e9c797527d6035bc7afdd2f83700af5d1db691182e49bb05ff0723776ef7e5d7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QXxmHgiER83bZd7/gEZv+xRjhEEzzU4kQanawbVyZgonzRXldJqNrX7FCnf/8d3YPz5WbM/mNXd98v+MGNfKCg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-23T17:58:02.123757Z","bundle_sha256":"db8932c671a54da6d7dcd301ed2aac95b7478b3f08a9a9886f23160a5200be11"}}