{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:XPP56FCF7WP7JCZXM2EF2TUF4M","short_pith_number":"pith:XPP56FCF","canonical_record":{"source":{"id":"1907.09238","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-07-22T11:21:08Z","cross_cats_sorted":["eess.AS"],"title_canon_sha256":"b91f99349de94884a3a8eccb7e248b52866cd7352387d91755cfee8a725b580e","abstract_canon_sha256":"8142468cd21cb9fa80265e9b38d3c37bd70e9b0110174306074a381cf2e8a4b8"},"schema_version":"1.0"},"canonical_sha256":"bbdfdf1445fd9ff48b3766885d4e85e301489fe981e7abe453f890deccc41a30","source":{"kind":"arxiv","id":"1907.09238","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.09238","created_at":"2026-05-17T23:39:59Z"},{"alias_kind":"arxiv_version","alias_value":"1907.09238v1","created_at":"2026-05-17T23:39:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.09238","created_at":"2026-05-17T23:39:59Z"},{"alias_kind":"pith_short_12","alias_value":"XPP56FCF7WP7","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"XPP56FCF7WP7JCZX","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"XPP56FCF","created_at":"2026-05-18T12:33:33Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:XPP56FCF7WP7JCZXM2EF2TUF4M","target":"record","payload":{"canonical_record":{"source":{"id":"1907.09238","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-07-22T11:21:08Z","cross_cats_sorted":["eess.AS"],"title_canon_sha256":"b91f99349de94884a3a8eccb7e248b52866cd7352387d91755cfee8a725b580e","abstract_canon_sha256":"8142468cd21cb9fa80265e9b38d3c37bd70e9b0110174306074a381cf2e8a4b8"},"schema_version":"1.0"},"canonical_sha256":"bbdfdf1445fd9ff48b3766885d4e85e301489fe981e7abe453f890deccc41a30","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:39:59.758001Z","signature_b64":"zHKNN2cYwLYQnFtuKGCP30kb6SRQVb7yUwdbXNQU5+fS4SmZiW1BZrGUOrP+6fHYySwLmpgJrTcB0ylkZBErAA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bbdfdf1445fd9ff48b3766885d4e85e301489fe981e7abe453f890deccc41a30","last_reissued_at":"2026-05-17T23:39:59.757499Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:39:59.757499Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1907.09238","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4LDlmrhysRuDbD8LlEzqAQp8ERrFfT+NMBNnHVeEZTSHPHZGWjO6W/BN/It+Vt4UTYuqwh6y0LnDwDkHcawwDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T07:44:47.817889Z"},"content_sha256":"679eeb319ba9640ffa4cef05ede93b33c88e208efd17d9ccb1d376769b747641","schema_version":"1.0","event_id":"sha256:679eeb319ba9640ffa4cef05ede93b33c88e208efd17d9ccb1d376769b747641"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:XPP56FCF7WP7JCZXM2EF2TUF4M","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Crowdsourcing a Dataset of Audio Captions","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["eess.AS"],"primary_cat":"cs.SD","authors_text":"Konstantinos Drossos, Samuel Lipping, Tuomas Virtanen","submitted_at":"2019-07-22T11:21:08Z","abstract_excerpt":"Audio captioning is a novel field of multi-modal translation and it is the task of creating a textual description of the content of an audio signal (e.g. \"people talking in a big room\"). The creation of a dataset for this task requires a considerable amount of work, rendering the crowdsourcing a very attractive option. In this paper we present a three steps based framework for crowdsourcing an audio captioning dataset, based on concepts and practises followed for the creation of widely used image captioning and machine translations datasets. During the first step initial captions are gathered."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.09238","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:39:59Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Q3H2VOXRFVBEwF3x+7mUrlMbRHa66J7zjgC+5x7igzZ2RiLSKW1bkeAMfwPGuNwxe+uyvAea7IpXozxMyRW0CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T07:44:47.818240Z"},"content_sha256":"f5a2b4828ac1f1ac58d91e8719ea490d838b5463142f5ac27dc9653b24bb31c3","schema_version":"1.0","event_id":"sha256:f5a2b4828ac1f1ac58d91e8719ea490d838b5463142f5ac27dc9653b24bb31c3"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/XPP56FCF7WP7JCZXM2EF2TUF4M/bundle.json","state_url":"https://pith.science/pith/XPP56FCF7WP7JCZXM2EF2TUF4M/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/XPP56FCF7WP7JCZXM2EF2TUF4M/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T07:44:47Z","links":{"resolver":"https://pith.science/pith/XPP56FCF7WP7JCZXM2EF2TUF4M","bundle":"https://pith.science/pith/XPP56FCF7WP7JCZXM2EF2TUF4M/bundle.json","state":"https://pith.science/pith/XPP56FCF7WP7JCZXM2EF2TUF4M/state.json","well_known_bundle":"https://pith.science/.well-known/pith/XPP56FCF7WP7JCZXM2EF2TUF4M/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:XPP56FCF7WP7JCZXM2EF2TUF4M","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8142468cd21cb9fa80265e9b38d3c37bd70e9b0110174306074a381cf2e8a4b8","cross_cats_sorted":["eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-07-22T11:21:08Z","title_canon_sha256":"b91f99349de94884a3a8eccb7e248b52866cd7352387d91755cfee8a725b580e"},"schema_version":"1.0","source":{"id":"1907.09238","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1907.09238","created_at":"2026-05-17T23:39:59Z"},{"alias_kind":"arxiv_version","alias_value":"1907.09238v1","created_at":"2026-05-17T23:39:59Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1907.09238","created_at":"2026-05-17T23:39:59Z"},{"alias_kind":"pith_short_12","alias_value":"XPP56FCF7WP7","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_16","alias_value":"XPP56FCF7WP7JCZX","created_at":"2026-05-18T12:33:33Z"},{"alias_kind":"pith_short_8","alias_value":"XPP56FCF","created_at":"2026-05-18T12:33:33Z"}],"graph_snapshots":[{"event_id":"sha256:f5a2b4828ac1f1ac58d91e8719ea490d838b5463142f5ac27dc9653b24bb31c3","target":"graph","created_at":"2026-05-17T23:39:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Audio captioning is a novel field of multi-modal translation and it is the task of creating a textual description of the content of an audio signal (e.g. \"people talking in a big room\"). The creation of a dataset for this task requires a considerable amount of work, rendering the crowdsourcing a very attractive option. In this paper we present a three steps based framework for crowdsourcing an audio captioning dataset, based on concepts and practises followed for the creation of widely used image captioning and machine translations datasets. During the first step initial captions are gathered.","authors_text":"Konstantinos Drossos, Samuel Lipping, Tuomas Virtanen","cross_cats":["eess.AS"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-07-22T11:21:08Z","title":"Crowdsourcing a Dataset of Audio Captions"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1907.09238","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:679eeb319ba9640ffa4cef05ede93b33c88e208efd17d9ccb1d376769b747641","target":"record","created_at":"2026-05-17T23:39:59Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8142468cd21cb9fa80265e9b38d3c37bd70e9b0110174306074a381cf2e8a4b8","cross_cats_sorted":["eess.AS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.SD","submitted_at":"2019-07-22T11:21:08Z","title_canon_sha256":"b91f99349de94884a3a8eccb7e248b52866cd7352387d91755cfee8a725b580e"},"schema_version":"1.0","source":{"id":"1907.09238","kind":"arxiv","version":1}},"canonical_sha256":"bbdfdf1445fd9ff48b3766885d4e85e301489fe981e7abe453f890deccc41a30","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bbdfdf1445fd9ff48b3766885d4e85e301489fe981e7abe453f890deccc41a30","first_computed_at":"2026-05-17T23:39:59.757499Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:39:59.757499Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zHKNN2cYwLYQnFtuKGCP30kb6SRQVb7yUwdbXNQU5+fS4SmZiW1BZrGUOrP+6fHYySwLmpgJrTcB0ylkZBErAA==","signature_status":"signed_v1","signed_at":"2026-05-17T23:39:59.758001Z","signed_message":"canonical_sha256_bytes"},"source_id":"1907.09238","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:679eeb319ba9640ffa4cef05ede93b33c88e208efd17d9ccb1d376769b747641","sha256:f5a2b4828ac1f1ac58d91e8719ea490d838b5463142f5ac27dc9653b24bb31c3"],"state_sha256":"c21ca3f5fa27e422b935ec9448ce971a70b749a8a1cbf22934b3a0955d6bbeae"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"hNTxRGeBsk30KbwazfKHAejHTnZCyxmHYZABxD5v+3i+78mThK0JKs4jdpgJ56Hd/hxgMi6wtzNrt297ymuzAg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T07:44:47.820265Z","bundle_sha256":"6d9edcfd560f9b1da3e00cc13a287359c01ce1a514d207e561635e180f4b7ef5"}}