{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:UEFX2XFQV7VY3QTDAWQNVFNCSM","short_pith_number":"pith:UEFX2XFQ","canonical_record":{"source":{"id":"2508.08237","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.MM","submitted_at":"2025-08-11T17:53:23Z","cross_cats_sorted":["cs.AI","cs.CV","cs.SD","eess.AS"],"title_canon_sha256":"a62da705add4e2b7a8caa04d55246e53ac4fa47f7a4b47fb5049a650a55966c9","abstract_canon_sha256":"5245ecaca821eea73d7cc9eb2741bb20a31913ee80b9e52742558d5ad95930e8"},"schema_version":"1.0"},"canonical_sha256":"a10b7d5cb0afeb8dc26305a0da95a2932f29965fa4277eabc2e9f37d6f603bdc","source":{"kind":"arxiv","id":"2508.08237","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.08237","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"arxiv_version","alias_value":"2508.08237v4","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.08237","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"pith_short_12","alias_value":"UEFX2XFQV7VY","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"pith_short_16","alias_value":"UEFX2XFQV7VY3QTD","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"pith_short_8","alias_value":"UEFX2XFQ","created_at":"2026-06-04T01:09:38Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:UEFX2XFQV7VY3QTDAWQNVFNCSM","target":"record","payload":{"canonical_record":{"source":{"id":"2508.08237","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.MM","submitted_at":"2025-08-11T17:53:23Z","cross_cats_sorted":["cs.AI","cs.CV","cs.SD","eess.AS"],"title_canon_sha256":"a62da705add4e2b7a8caa04d55246e53ac4fa47f7a4b47fb5049a650a55966c9","abstract_canon_sha256":"5245ecaca821eea73d7cc9eb2741bb20a31913ee80b9e52742558d5ad95930e8"},"schema_version":"1.0"},"canonical_sha256":"a10b7d5cb0afeb8dc26305a0da95a2932f29965fa4277eabc2e9f37d6f603bdc","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-04T01:09:38.038975Z","signature_b64":"JBFglGdfnZUF9PBGN43zGjr7FxV10M6dufFb4ET5jPn2drILcFKlunducH5OYOKG32aJC2ZDR4VpJriCYmbBCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a10b7d5cb0afeb8dc26305a0da95a2932f29965fa4277eabc2e9f37d6f603bdc","last_reissued_at":"2026-06-04T01:09:38.038367Z","signature_status":"signed_v1","first_computed_at":"2026-06-04T01:09:38.038367Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2508.08237","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-04T01:09:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LYyXgZi88yGZZpVPi7UE5OuOUwl2nl1VMP0y42es/9pMg8kskg6L2jh9dRuyX7ynej7CvJgtld9QC4M0PYvcDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T22:12:34.592513Z"},"content_sha256":"0f33c896bc9b1131b25de5b039fef3b5f537276c411a6fc8338f27f286c2d838","schema_version":"1.0","event_id":"sha256:0f33c896bc9b1131b25de5b039fef3b5f537276c411a6fc8338f27f286c2d838"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:UEFX2XFQV7VY3QTDAWQNVFNCSM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"VGGSounder: Audio-Visual Evaluations for Foundation Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI","cs.CV","cs.SD","eess.AS"],"primary_cat":"cs.MM","authors_text":"Ameya Prabhu, A. Sophia Koepke, Daniil Zverev, Matthias Bethge, Thadd\\\"aus Wiedemer, Wieland Brendel","submitted_at":"2025-08-11T17:53:23Z","abstract_excerpt":"The emergence of audio-visual foundation models underscores the importance of reliably assessing their multi-modal understanding. The VGGSound dataset is commonly used as a benchmark for evaluation audio-visual classification. However, our analysis identifies several limitations of VGGSound, including incomplete labelling, partially overlapping classes, and misaligned modalities. These lead to distorted evaluations of auditory and visual capabilities. To address these limitations, we introduce VGGSounder, a comprehensively re-annotated, multi-label test set that extends VGGSound and is specifi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.08237","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.08237/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-04T01:09:38Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zL/wvSvMU8jJLHk0p1VcjYQP+p7EhTvllboTgBzWplFFfBaSwuZGQoNIASI8k1LX3W80dDkXU4BYwHgGnMtfAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-07T22:12:34.593304Z"},"content_sha256":"b48ea6af5e463a3813b389448195018471bedff306485bb69b635f7f98cf6759","schema_version":"1.0","event_id":"sha256:b48ea6af5e463a3813b389448195018471bedff306485bb69b635f7f98cf6759"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM/bundle.json","state_url":"https://pith.science/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-07T22:12:34Z","links":{"resolver":"https://pith.science/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM","bundle":"https://pith.science/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM/bundle.json","state":"https://pith.science/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UEFX2XFQV7VY3QTDAWQNVFNCSM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:UEFX2XFQV7VY3QTDAWQNVFNCSM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5245ecaca821eea73d7cc9eb2741bb20a31913ee80b9e52742558d5ad95930e8","cross_cats_sorted":["cs.AI","cs.CV","cs.SD","eess.AS"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.MM","submitted_at":"2025-08-11T17:53:23Z","title_canon_sha256":"a62da705add4e2b7a8caa04d55246e53ac4fa47f7a4b47fb5049a650a55966c9"},"schema_version":"1.0","source":{"id":"2508.08237","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.08237","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"arxiv_version","alias_value":"2508.08237v4","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.08237","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"pith_short_12","alias_value":"UEFX2XFQV7VY","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"pith_short_16","alias_value":"UEFX2XFQV7VY3QTD","created_at":"2026-06-04T01:09:38Z"},{"alias_kind":"pith_short_8","alias_value":"UEFX2XFQ","created_at":"2026-06-04T01:09:38Z"}],"graph_snapshots":[{"event_id":"sha256:b48ea6af5e463a3813b389448195018471bedff306485bb69b635f7f98cf6759","target":"graph","created_at":"2026-06-04T01:09:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2508.08237/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The emergence of audio-visual foundation models underscores the importance of reliably assessing their multi-modal understanding. The VGGSound dataset is commonly used as a benchmark for evaluation audio-visual classification. However, our analysis identifies several limitations of VGGSound, including incomplete labelling, partially overlapping classes, and misaligned modalities. These lead to distorted evaluations of auditory and visual capabilities. To address these limitations, we introduce VGGSounder, a comprehensively re-annotated, multi-label test set that extends VGGSound and is specifi","authors_text":"Ameya Prabhu, A. Sophia Koepke, Daniil Zverev, Matthias Bethge, Thadd\\\"aus Wiedemer, Wieland Brendel","cross_cats":["cs.AI","cs.CV","cs.SD","eess.AS"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.MM","submitted_at":"2025-08-11T17:53:23Z","title":"VGGSounder: Audio-Visual Evaluations for Foundation Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.08237","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0f33c896bc9b1131b25de5b039fef3b5f537276c411a6fc8338f27f286c2d838","target":"record","created_at":"2026-06-04T01:09:38Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5245ecaca821eea73d7cc9eb2741bb20a31913ee80b9e52742558d5ad95930e8","cross_cats_sorted":["cs.AI","cs.CV","cs.SD","eess.AS"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.MM","submitted_at":"2025-08-11T17:53:23Z","title_canon_sha256":"a62da705add4e2b7a8caa04d55246e53ac4fa47f7a4b47fb5049a650a55966c9"},"schema_version":"1.0","source":{"id":"2508.08237","kind":"arxiv","version":4}},"canonical_sha256":"a10b7d5cb0afeb8dc26305a0da95a2932f29965fa4277eabc2e9f37d6f603bdc","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a10b7d5cb0afeb8dc26305a0da95a2932f29965fa4277eabc2e9f37d6f603bdc","first_computed_at":"2026-06-04T01:09:38.038367Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-04T01:09:38.038367Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"JBFglGdfnZUF9PBGN43zGjr7FxV10M6dufFb4ET5jPn2drILcFKlunducH5OYOKG32aJC2ZDR4VpJriCYmbBCQ==","signature_status":"signed_v1","signed_at":"2026-06-04T01:09:38.038975Z","signed_message":"canonical_sha256_bytes"},"source_id":"2508.08237","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0f33c896bc9b1131b25de5b039fef3b5f537276c411a6fc8338f27f286c2d838","sha256:b48ea6af5e463a3813b389448195018471bedff306485bb69b635f7f98cf6759"],"state_sha256":"d5056dc8b21a4edacba02539c4262de41bcdae34057b4430d85641736ed3337b"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"G7W13rBSGcqxqDLoDm6V8dygWc+2f5yeLoB41hJA0Woyk1tF+kCh2DkpvqklJuX/QxZ5MBIwFsW79jD8swbPDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-07T22:12:34.597288Z","bundle_sha256":"371536a651943a26326e9b8de7dd1c595140dad0a95048bd1916b6b3845f39b2"}}