{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:64JAUUJVNDTMT5RKGQVS5KYZX3","short_pith_number":"pith:64JAUUJV","canonical_record":{"source":{"id":"2606.11602","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T02:59:13Z","cross_cats_sorted":[],"title_canon_sha256":"71f5a3d582ed5bcc2a498556ef532103103e26de0d708603be6699723059009f","abstract_canon_sha256":"58b7413214280a1fd7b3b928343c895671d9a43b5e91d44d5d705219fed3cbce"},"schema_version":"1.0"},"canonical_sha256":"f7120a513568e6c9f62a342b2eab19beee25d13805e365a12fba3b34510ff6dc","source":{"kind":"arxiv","id":"2606.11602","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11602","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11602v1","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11602","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"pith_short_12","alias_value":"64JAUUJVNDTM","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"pith_short_16","alias_value":"64JAUUJVNDTMT5RK","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"pith_short_8","alias_value":"64JAUUJV","created_at":"2026-06-11T01:09:58Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:64JAUUJVNDTMT5RKGQVS5KYZX3","target":"record","payload":{"canonical_record":{"source":{"id":"2606.11602","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T02:59:13Z","cross_cats_sorted":[],"title_canon_sha256":"71f5a3d582ed5bcc2a498556ef532103103e26de0d708603be6699723059009f","abstract_canon_sha256":"58b7413214280a1fd7b3b928343c895671d9a43b5e91d44d5d705219fed3cbce"},"schema_version":"1.0"},"canonical_sha256":"f7120a513568e6c9f62a342b2eab19beee25d13805e365a12fba3b34510ff6dc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:09:58.530274Z","signature_b64":"0jfRBM82xsoQgFdAkPlQpgkjcP5XzGuxmS1QUIJkiQdIG55SEYNA7bCvQkntXb0aCKZN3pWLDH1Q2fmTv8arAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"f7120a513568e6c9f62a342b2eab19beee25d13805e365a12fba3b34510ff6dc","last_reissued_at":"2026-06-11T01:09:58.529435Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:09:58.529435Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2606.11602","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:09:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tz8PJvkCxd3YyJYRMXPI7nlf4A9iKcyjDn38agXbXZpuUD8qH4CKLzuqrTR+JgM70NqB0T+fvciLNkRO4J6qAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T11:23:44.326046Z"},"content_sha256":"e09281c0c8a55b1b8f094cdc3cfd4164b2b42f91c28c11ad8c5edb9d22b0e1ab","schema_version":"1.0","event_id":"sha256:e09281c0c8a55b1b8f094cdc3cfd4164b2b42f91c28c11ad8c5edb9d22b0e1ab"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:64JAUUJVNDTMT5RKGQVS5KYZX3","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"On Aligning Hierarchical Standardized Embedding for Audio-visual Generalized Zero-shot Learning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Jie Hong, Pengfei Fang, Siyuan Fan, Yanghao Zhou, Zihan Zhang","submitted_at":"2026-06-10T02:59:13Z","abstract_excerpt":"Audio-visual Generalized Zero-shot Learning (AV-GZSL) is a challenging task that aims to classify both seen and unseen objects or scenes by integrating data from audio and visual modalities. Recent studies primarily focus on fusing or aligning audio and visual features to generate more informative audio-visual embeddings. Also, aligning the audio-visual and textual features of most existing methods relies solely on the optimization objectives. However, those methods neglect the inherent distributional and structural differences between audio-visual and textual modalities. To address this limit"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11602","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.11602/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:09:58Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+DY+E1YdSaqEcWJs/oN388EPuW6Fn6g3Y1Z/m6Gf6+/Klygy7XUIiBqdKJEasCMscXAjLlstuFAhVEhuZXTZDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-11T11:23:44.326471Z"},"content_sha256":"2dd655edd3eed2fea68a61d7364b5701f0889b44e769709d003277e860a299a3","schema_version":"1.0","event_id":"sha256:2dd655edd3eed2fea68a61d7364b5701f0889b44e769709d003277e860a299a3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/64JAUUJVNDTMT5RKGQVS5KYZX3/bundle.json","state_url":"https://pith.science/pith/64JAUUJVNDTMT5RKGQVS5KYZX3/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/64JAUUJVNDTMT5RKGQVS5KYZX3/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-11T11:23:44Z","links":{"resolver":"https://pith.science/pith/64JAUUJVNDTMT5RKGQVS5KYZX3","bundle":"https://pith.science/pith/64JAUUJVNDTMT5RKGQVS5KYZX3/bundle.json","state":"https://pith.science/pith/64JAUUJVNDTMT5RKGQVS5KYZX3/state.json","well_known_bundle":"https://pith.science/.well-known/pith/64JAUUJVNDTMT5RKGQVS5KYZX3/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:64JAUUJVNDTMT5RKGQVS5KYZX3","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"58b7413214280a1fd7b3b928343c895671d9a43b5e91d44d5d705219fed3cbce","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T02:59:13Z","title_canon_sha256":"71f5a3d582ed5bcc2a498556ef532103103e26de0d708603be6699723059009f"},"schema_version":"1.0","source":{"id":"2606.11602","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2606.11602","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"arxiv_version","alias_value":"2606.11602v1","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.11602","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"pith_short_12","alias_value":"64JAUUJVNDTM","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"pith_short_16","alias_value":"64JAUUJVNDTMT5RK","created_at":"2026-06-11T01:09:58Z"},{"alias_kind":"pith_short_8","alias_value":"64JAUUJV","created_at":"2026-06-11T01:09:58Z"}],"graph_snapshots":[{"event_id":"sha256:2dd655edd3eed2fea68a61d7364b5701f0889b44e769709d003277e860a299a3","target":"graph","created_at":"2026-06-11T01:09:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2606.11602/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Audio-visual Generalized Zero-shot Learning (AV-GZSL) is a challenging task that aims to classify both seen and unseen objects or scenes by integrating data from audio and visual modalities. Recent studies primarily focus on fusing or aligning audio and visual features to generate more informative audio-visual embeddings. Also, aligning the audio-visual and textual features of most existing methods relies solely on the optimization objectives. However, those methods neglect the inherent distributional and structural differences between audio-visual and textual modalities. To address this limit","authors_text":"Jie Hong, Pengfei Fang, Siyuan Fan, Yanghao Zhou, Zihan Zhang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T02:59:13Z","title":"On Aligning Hierarchical Standardized Embedding for Audio-visual Generalized Zero-shot Learning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.11602","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:e09281c0c8a55b1b8f094cdc3cfd4164b2b42f91c28c11ad8c5edb9d22b0e1ab","target":"record","created_at":"2026-06-11T01:09:58Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"58b7413214280a1fd7b3b928343c895671d9a43b5e91d44d5d705219fed3cbce","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-06-10T02:59:13Z","title_canon_sha256":"71f5a3d582ed5bcc2a498556ef532103103e26de0d708603be6699723059009f"},"schema_version":"1.0","source":{"id":"2606.11602","kind":"arxiv","version":1}},"canonical_sha256":"f7120a513568e6c9f62a342b2eab19beee25d13805e365a12fba3b34510ff6dc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"f7120a513568e6c9f62a342b2eab19beee25d13805e365a12fba3b34510ff6dc","first_computed_at":"2026-06-11T01:09:58.529435Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:09:58.529435Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"0jfRBM82xsoQgFdAkPlQpgkjcP5XzGuxmS1QUIJkiQdIG55SEYNA7bCvQkntXb0aCKZN3pWLDH1Q2fmTv8arAw==","signature_status":"signed_v1","signed_at":"2026-06-11T01:09:58.530274Z","signed_message":"canonical_sha256_bytes"},"source_id":"2606.11602","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:e09281c0c8a55b1b8f094cdc3cfd4164b2b42f91c28c11ad8c5edb9d22b0e1ab","sha256:2dd655edd3eed2fea68a61d7364b5701f0889b44e769709d003277e860a299a3"],"state_sha256":"ca687b9800ade279d5a07be402a8403078375b90bd66bd9a77386d4299708e63"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"PM8PKZm8Q/YmZIgZR5ECzRdg2D1o5kLc+u85MVZ+swDtihl/n9K71tk08y3omnO3KJSHgs8Pv0hjuqSIpBmJDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-11T11:23:44.328526Z","bundle_sha256":"43ed56a8c4089b0afa2802e57533860bf6b8b7fbcc2495cd4c5fa0749ae8ba48"}}