{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:4OLKQ32Q6P674EMJ6PBCSUCYRV","short_pith_number":"pith:4OLKQ32Q","canonical_record":{"source":{"id":"1711.03800","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-11-10T13:04:55Z","cross_cats_sorted":["cs.CL","cs.HC"],"title_canon_sha256":"81971ff780b9301ca6fab27f44ed3597c47c656040a25ac181eeefd87dad8e92","abstract_canon_sha256":"97f0b4a943516072d2c1f223e1407e865a26a79d8400df44324a1b0d05a1df26"},"schema_version":"1.0"},"canonical_sha256":"e396a86f50f3fdfe1189f3c22950588d5f17f1f8c3a893c0e33d9a9f9623b81b","source":{"kind":"arxiv","id":"1711.03800","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.03800","created_at":"2026-05-18T00:28:51Z"},{"alias_kind":"arxiv_version","alias_value":"1711.03800v2","created_at":"2026-05-18T00:28:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.03800","created_at":"2026-05-18T00:28:51Z"},{"alias_kind":"pith_short_12","alias_value":"4OLKQ32Q6P67","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"4OLKQ32Q6P674EMJ","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"4OLKQ32Q","created_at":"2026-05-18T12:31:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:4OLKQ32Q6P674EMJ6PBCSUCYRV","target":"record","payload":{"canonical_record":{"source":{"id":"1711.03800","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-11-10T13:04:55Z","cross_cats_sorted":["cs.CL","cs.HC"],"title_canon_sha256":"81971ff780b9301ca6fab27f44ed3597c47c656040a25ac181eeefd87dad8e92","abstract_canon_sha256":"97f0b4a943516072d2c1f223e1407e865a26a79d8400df44324a1b0d05a1df26"},"schema_version":"1.0"},"canonical_sha256":"e396a86f50f3fdfe1189f3c22950588d5f17f1f8c3a893c0e33d9a9f9623b81b","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:28:51.702989Z","signature_b64":"bbZI5FSCObewsNoF8HSmDDk6bIPTP2VFuFo6WZhRArdOCBqCLaBuxGyJqidFETRBIJwu4JrpTsc+h4grOiukCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e396a86f50f3fdfe1189f3c22950588d5f17f1f8c3a893c0e33d9a9f9623b81b","last_reissued_at":"2026-05-18T00:28:51.702328Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:28:51.702328Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1711.03800","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"+lG49561SqEulnvLoK4cJVTSc0Ji9yUO4+q+8oeb3Zd3QX2uG5wKhWQ0OFDE8L60YDJKfPx3+FSWUPlgmRz4BA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T09:45:09.009623Z"},"content_sha256":"2e7e57d5fe1e39624bdb2e2788a5e3835afe0c27b7c17ca7f5448c740fd11e81","schema_version":"1.0","event_id":"sha256:2e7e57d5fe1e39624bdb2e2788a5e3835afe0c27b7c17ca7f5448c740fd11e81"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:4OLKQ32Q6P674EMJ6PBCSUCYRV","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Object Referring in Visual Scene with Spoken Language","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.HC"],"primary_cat":"cs.CV","authors_text":"Arun Balajee Vasudevan, Dengxin Dai, Luc Van Gool","submitted_at":"2017-11-10T13:04:55Z","abstract_excerpt":"Object referring has important applications, especially for human-machine interaction. While having received great attention, the task is mainly attacked with written language (text) as input rather than spoken language (speech), which is more natural. This paper investigates Object Referring with Spoken Language (ORSpoken) by presenting two datasets and one novel approach. Objects are annotated with their locations in images, text descriptions and speech descriptions. This makes the datasets ideal for multi-modality learning. The approach is developed by carefully taking down ORSpoken problem"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.03800","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:28:51Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"gSCfhtVj1ghAQp0wU7+Jq+kXDdPfNsWXDIBIV6KSVx5X0TxN4XIAlDCdC2Eg72zVDbD10bcM5kFfscyPQXG2Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T09:45:09.009977Z"},"content_sha256":"5c0463b77c3b522126ead44244d8726024251ccc8553a6401cb553f1e7c12048","schema_version":"1.0","event_id":"sha256:5c0463b77c3b522126ead44244d8726024251ccc8553a6401cb553f1e7c12048"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV/bundle.json","state_url":"https://pith.science/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T09:45:09Z","links":{"resolver":"https://pith.science/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV","bundle":"https://pith.science/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV/bundle.json","state":"https://pith.science/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4OLKQ32Q6P674EMJ6PBCSUCYRV/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:4OLKQ32Q6P674EMJ6PBCSUCYRV","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"97f0b4a943516072d2c1f223e1407e865a26a79d8400df44324a1b0d05a1df26","cross_cats_sorted":["cs.CL","cs.HC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-11-10T13:04:55Z","title_canon_sha256":"81971ff780b9301ca6fab27f44ed3597c47c656040a25ac181eeefd87dad8e92"},"schema_version":"1.0","source":{"id":"1711.03800","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1711.03800","created_at":"2026-05-18T00:28:51Z"},{"alias_kind":"arxiv_version","alias_value":"1711.03800v2","created_at":"2026-05-18T00:28:51Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1711.03800","created_at":"2026-05-18T00:28:51Z"},{"alias_kind":"pith_short_12","alias_value":"4OLKQ32Q6P67","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"4OLKQ32Q6P674EMJ","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"4OLKQ32Q","created_at":"2026-05-18T12:31:00Z"}],"graph_snapshots":[{"event_id":"sha256:5c0463b77c3b522126ead44244d8726024251ccc8553a6401cb553f1e7c12048","target":"graph","created_at":"2026-05-18T00:28:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Object referring has important applications, especially for human-machine interaction. While having received great attention, the task is mainly attacked with written language (text) as input rather than spoken language (speech), which is more natural. This paper investigates Object Referring with Spoken Language (ORSpoken) by presenting two datasets and one novel approach. Objects are annotated with their locations in images, text descriptions and speech descriptions. This makes the datasets ideal for multi-modality learning. The approach is developed by carefully taking down ORSpoken problem","authors_text":"Arun Balajee Vasudevan, Dengxin Dai, Luc Van Gool","cross_cats":["cs.CL","cs.HC"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-11-10T13:04:55Z","title":"Object Referring in Visual Scene with Spoken Language"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1711.03800","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:2e7e57d5fe1e39624bdb2e2788a5e3835afe0c27b7c17ca7f5448c740fd11e81","target":"record","created_at":"2026-05-18T00:28:51Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"97f0b4a943516072d2c1f223e1407e865a26a79d8400df44324a1b0d05a1df26","cross_cats_sorted":["cs.CL","cs.HC"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-11-10T13:04:55Z","title_canon_sha256":"81971ff780b9301ca6fab27f44ed3597c47c656040a25ac181eeefd87dad8e92"},"schema_version":"1.0","source":{"id":"1711.03800","kind":"arxiv","version":2}},"canonical_sha256":"e396a86f50f3fdfe1189f3c22950588d5f17f1f8c3a893c0e33d9a9f9623b81b","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e396a86f50f3fdfe1189f3c22950588d5f17f1f8c3a893c0e33d9a9f9623b81b","first_computed_at":"2026-05-18T00:28:51.702328Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:28:51.702328Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"bbZI5FSCObewsNoF8HSmDDk6bIPTP2VFuFo6WZhRArdOCBqCLaBuxGyJqidFETRBIJwu4JrpTsc+h4grOiukCg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:28:51.702989Z","signed_message":"canonical_sha256_bytes"},"source_id":"1711.03800","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:2e7e57d5fe1e39624bdb2e2788a5e3835afe0c27b7c17ca7f5448c740fd11e81","sha256:5c0463b77c3b522126ead44244d8726024251ccc8553a6401cb553f1e7c12048"],"state_sha256":"762130c3e92bc3f27311a6f048fc23723a01d166bee20c9d85d69e31b07dea91"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qmxGlbwFRTLYIpqXGh82ZZq5zjoxkEvHbqpW5xw8T0rh5rfpnN+tlTIs0LUa/goAvwWPHsSypHF6AWTqPudxAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T09:45:09.012020Z","bundle_sha256":"84af9338f9a0f346a020ec06c8b087e4b879c1ba56fd1aa8608a45da9d8fd7b0"}}