{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2023:5YFC77G3LPXN2NP53KT4DZ6C2L","short_pith_number":"pith:5YFC77G3","canonical_record":{"source":{"id":"2312.00869","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-12-01T19:00:17Z","cross_cats_sorted":[],"title_canon_sha256":"0f32bc8df1b651b07c56c14a8190cc79ab2b2c222252164c68da6471d02e81be","abstract_canon_sha256":"daff94fae34a9b44abbda2f3a4516b2af58d56e61fad6b021fd449284201cf46"},"schema_version":"1.0"},"canonical_sha256":"ee0a2ffcdb5beedd35fddaa7c1e7c2d2e3b67f345f997949df20d453c30a71d6","source":{"kind":"arxiv","id":"2312.00869","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2312.00869","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"arxiv_version","alias_value":"2312.00869v2","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2312.00869","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"pith_short_12","alias_value":"5YFC77G3LPXN","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"pith_short_16","alias_value":"5YFC77G3LPXN2NP5","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"pith_short_8","alias_value":"5YFC77G3","created_at":"2026-07-05T08:00:39Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2023:5YFC77G3LPXN2NP53KT4DZ6C2L","target":"record","payload":{"canonical_record":{"source":{"id":"2312.00869","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-12-01T19:00:17Z","cross_cats_sorted":[],"title_canon_sha256":"0f32bc8df1b651b07c56c14a8190cc79ab2b2c222252164c68da6471d02e81be","abstract_canon_sha256":"daff94fae34a9b44abbda2f3a4516b2af58d56e61fad6b021fd449284201cf46"},"schema_version":"1.0"},"canonical_sha256":"ee0a2ffcdb5beedd35fddaa7c1e7c2d2e3b67f345f997949df20d453c30a71d6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T08:00:39.093277Z","signature_b64":"btqAyA0DaCtz3Iz6bWvLgLd1BU2gYh/1DK5QNJC7r0ZJXU733sTSPe+9gFWEyHnMvj/QFksURgkwC5gwoWJkCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"ee0a2ffcdb5beedd35fddaa7c1e7c2d2e3b67f345f997949df20d453c30a71d6","last_reissued_at":"2026-07-05T08:00:39.092740Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T08:00:39.092740Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2312.00869","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:00:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SwzH4qJ51DdzhbPlj4Q13HiqZo2gwPaZz1DAQMwLxxB64zzXjCWP9Y8jTKL1CqXztjq70u/cjG+639Hx3RYIDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:55:53.761454Z"},"content_sha256":"754e7200de63b0c1addb6f5405774bcb82c9ae652f81ce45f4ee061dfd5c5d23","schema_version":"1.0","event_id":"sha256:754e7200de63b0c1addb6f5405774bcb82c9ae652f81ce45f4ee061dfd5c5d23"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2023:5YFC77G3LPXN2NP53KT4DZ6C2L","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Segment and Caption Anything","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Han Hu, Jianfeng Wang, Jiwen Lu, Lijuan Wang, Xiaoke Huang, Yansong Tang, Zheng Zhang, Zicheng Liu","submitted_at":"2023-12-01T19:00:17Z","abstract_excerpt":"We propose a method to efficiently equip the Segment Anything Model (SAM) with the ability to generate regional captions. SAM presents strong generalizability to segment anything while is short for semantic understanding. By introducing a lightweight query-based feature mixer, we align the region-specific features with the embedding space of language models for later caption generation. As the number of trainable parameters is small (typically in the order of tens of millions), it costs less computation, less memory usage, and less communication bandwidth, resulting in both fast and scalable t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2312.00869","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2312.00869/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T08:00:39Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"7NrfdBZUbrKR6lWQPJTDfBkPIaz7M0Vfl/0IrYrAyZqyp8FCEx29uYww++MEY1NwHm73kyXO1NaAaNYOT/lKDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:55:53.762053Z"},"content_sha256":"5ae12bdeafcb67cc21c1d76db5c846a6e4375b5a597816d4d4789d0ce02f6cff","schema_version":"1.0","event_id":"sha256:5ae12bdeafcb67cc21c1d76db5c846a6e4375b5a597816d4d4789d0ce02f6cff"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5YFC77G3LPXN2NP53KT4DZ6C2L/bundle.json","state_url":"https://pith.science/pith/5YFC77G3LPXN2NP53KT4DZ6C2L/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5YFC77G3LPXN2NP53KT4DZ6C2L/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T15:55:53Z","links":{"resolver":"https://pith.science/pith/5YFC77G3LPXN2NP53KT4DZ6C2L","bundle":"https://pith.science/pith/5YFC77G3LPXN2NP53KT4DZ6C2L/bundle.json","state":"https://pith.science/pith/5YFC77G3LPXN2NP53KT4DZ6C2L/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5YFC77G3LPXN2NP53KT4DZ6C2L/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2023:5YFC77G3LPXN2NP53KT4DZ6C2L","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"daff94fae34a9b44abbda2f3a4516b2af58d56e61fad6b021fd449284201cf46","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-12-01T19:00:17Z","title_canon_sha256":"0f32bc8df1b651b07c56c14a8190cc79ab2b2c222252164c68da6471d02e81be"},"schema_version":"1.0","source":{"id":"2312.00869","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2312.00869","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"arxiv_version","alias_value":"2312.00869v2","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2312.00869","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"pith_short_12","alias_value":"5YFC77G3LPXN","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"pith_short_16","alias_value":"5YFC77G3LPXN2NP5","created_at":"2026-07-05T08:00:39Z"},{"alias_kind":"pith_short_8","alias_value":"5YFC77G3","created_at":"2026-07-05T08:00:39Z"}],"graph_snapshots":[{"event_id":"sha256:5ae12bdeafcb67cc21c1d76db5c846a6e4375b5a597816d4d4789d0ce02f6cff","target":"graph","created_at":"2026-07-05T08:00:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2312.00869/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We propose a method to efficiently equip the Segment Anything Model (SAM) with the ability to generate regional captions. SAM presents strong generalizability to segment anything while is short for semantic understanding. By introducing a lightweight query-based feature mixer, we align the region-specific features with the embedding space of language models for later caption generation. As the number of trainable parameters is small (typically in the order of tens of millions), it costs less computation, less memory usage, and less communication bandwidth, resulting in both fast and scalable t","authors_text":"Han Hu, Jianfeng Wang, Jiwen Lu, Lijuan Wang, Xiaoke Huang, Yansong Tang, Zheng Zhang, Zicheng Liu","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-12-01T19:00:17Z","title":"Segment and Caption Anything"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2312.00869","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:754e7200de63b0c1addb6f5405774bcb82c9ae652f81ce45f4ee061dfd5c5d23","target":"record","created_at":"2026-07-05T08:00:39Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"daff94fae34a9b44abbda2f3a4516b2af58d56e61fad6b021fd449284201cf46","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2023-12-01T19:00:17Z","title_canon_sha256":"0f32bc8df1b651b07c56c14a8190cc79ab2b2c222252164c68da6471d02e81be"},"schema_version":"1.0","source":{"id":"2312.00869","kind":"arxiv","version":2}},"canonical_sha256":"ee0a2ffcdb5beedd35fddaa7c1e7c2d2e3b67f345f997949df20d453c30a71d6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"ee0a2ffcdb5beedd35fddaa7c1e7c2d2e3b67f345f997949df20d453c30a71d6","first_computed_at":"2026-07-05T08:00:39.092740Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T08:00:39.092740Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"btqAyA0DaCtz3Iz6bWvLgLd1BU2gYh/1DK5QNJC7r0ZJXU733sTSPe+9gFWEyHnMvj/QFksURgkwC5gwoWJkCA==","signature_status":"signed_v1","signed_at":"2026-07-05T08:00:39.093277Z","signed_message":"canonical_sha256_bytes"},"source_id":"2312.00869","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:754e7200de63b0c1addb6f5405774bcb82c9ae652f81ce45f4ee061dfd5c5d23","sha256:5ae12bdeafcb67cc21c1d76db5c846a6e4375b5a597816d4d4789d0ce02f6cff"],"state_sha256":"d367ea7b8078f9427b156adfb8f4c70bec24db007d9260662c735597e3084b72"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DOo6ulF+4TvxLmlV9fyR+ictLwlqkmKU4GNG9KSrRHQx4WRB11FbWt8g6H095i7ttBtwCJUK8vSDIVwJy9O6BQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T15:55:53.765050Z","bundle_sha256":"ccb69ea0ece14d497705983fa1dc1ac07f2522b3421a063efa3185789ec8b0f6"}}