{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:VENLCNXZDRSPSEXBJMWHTBFB44","short_pith_number":"pith:VENLCNXZ","schema_version":"1.0","canonical_sha256":"a91ab136f91c64f912e14b2c7984a1e71b86992b165ccf1f638b223e2c349f36","source":{"kind":"arxiv","id":"1506.06724","version":1},"attestation_state":"computed","paper":{"title":"Aligning Books and Movies: Towards Story-like Visual Explanations by Watching Movies and Reading Books","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Antonio Torralba, Raquel Urtasun, Richard Zemel, Ruslan Salakhutdinov, Ryan Kiros, Sanja Fidler, Yukun Zhu","submitted_at":"2015-06-22T19:26:56Z","abstract_excerpt":"Books are a rich source of both fine-grained information, how a character, an object or a scene looks like, as well as high-level semantics, what someone is thinking, feeling and how these states evolve through a story. This paper aims to align books to their movie releases in order to provide rich descriptive explanations for visual content that go semantically far beyond the captions available in current datasets. To align movies and books we exploit a neural sentence embedding that is trained in an unsupervised way from a large corpus of books, as well as a video-text neural embedding for c"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1506.06724","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2015-06-22T19:26:56Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"b33b028d4241f9e4e76600a999cbe83e7a270c40bc074898fb36f8391807b935","abstract_canon_sha256":"e7fde1281624c4268a39007613b05e48ba61b09999ed3d059ae1aa6c5c5937d7"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:41:44.474561Z","signature_b64":"WOIKHfijBADVYy8FXkCBStibxrbV/vmBBsbE5In6johddrbzg7ye76OW1HOw7IghBQkwr8ktBDG9RAzSAb/LBg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a91ab136f91c64f912e14b2c7984a1e71b86992b165ccf1f638b223e2c349f36","last_reissued_at":"2026-05-18T01:41:44.474006Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:41:44.474006Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Aligning Books and Movies: Towards Story-like Visual Explanations by Watching Movies and Reading Books","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.CV","authors_text":"Antonio Torralba, Raquel Urtasun, Richard Zemel, Ruslan Salakhutdinov, Ryan Kiros, Sanja Fidler, Yukun Zhu","submitted_at":"2015-06-22T19:26:56Z","abstract_excerpt":"Books are a rich source of both fine-grained information, how a character, an object or a scene looks like, as well as high-level semantics, what someone is thinking, feeling and how these states evolve through a story. This paper aims to align books to their movie releases in order to provide rich descriptive explanations for visual content that go semantically far beyond the captions available in current datasets. To align movies and books we exploit a neural sentence embedding that is trained in an unsupervised way from a large corpus of books, as well as a video-text neural embedding for c"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1506.06724","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1506.06724","created_at":"2026-05-18T01:41:44.474074+00:00"},{"alias_kind":"arxiv_version","alias_value":"1506.06724v1","created_at":"2026-05-18T01:41:44.474074+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1506.06724","created_at":"2026-05-18T01:41:44.474074+00:00"},{"alias_kind":"pith_short_12","alias_value":"VENLCNXZDRSP","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_16","alias_value":"VENLCNXZDRSPSEXB","created_at":"2026-05-18T12:29:44.643036+00:00"},{"alias_kind":"pith_short_8","alias_value":"VENLCNXZ","created_at":"2026-05-18T12:29:44.643036+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":8,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"1908.10063","citing_title":"FinBERT: Financial Sentiment Analysis with Pre-trained Language Models","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2311.12983","citing_title":"GAIA: a benchmark for General AI Assistants","ref_index":150,"is_internal_anchor":false},{"citing_arxiv_id":"2605.10119","citing_title":"Refresh-Scaling the Memory of Balanced Adam","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05902","citing_title":"Evaluating Non-English Developer Support in Machine Learning for Software Engineering","ref_index":35,"is_internal_anchor":false},{"citing_arxiv_id":"1905.07830","citing_title":"HellaSwag: Can a Machine Really Finish Your Sentence?","ref_index":19,"is_internal_anchor":false},{"citing_arxiv_id":"2205.01068","citing_title":"OPT: Open Pre-trained Transformer Language Models","ref_index":284,"is_internal_anchor":false},{"citing_arxiv_id":"2604.22838","citing_title":"Neural Network Optimization Reimagined: Decoupled Techniques for Scratch and Fine-Tuning","ref_index":54,"is_internal_anchor":false},{"citing_arxiv_id":"1907.11692","citing_title":"RoBERTa: A Robustly Optimized BERT Pretraining Approach","ref_index":51,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44","json":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44.json","graph_json":"https://pith.science/api/pith-number/VENLCNXZDRSPSEXBJMWHTBFB44/graph.json","events_json":"https://pith.science/api/pith-number/VENLCNXZDRSPSEXBJMWHTBFB44/events.json","paper":"https://pith.science/paper/VENLCNXZ"},"agent_actions":{"view_html":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44","download_json":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44.json","view_paper":"https://pith.science/paper/VENLCNXZ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1506.06724&json=true","fetch_graph":"https://pith.science/api/pith-number/VENLCNXZDRSPSEXBJMWHTBFB44/graph.json","fetch_events":"https://pith.science/api/pith-number/VENLCNXZDRSPSEXBJMWHTBFB44/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44/action/storage_attestation","attest_author":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44/action/author_attestation","sign_citation":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44/action/citation_signature","submit_replication":"https://pith.science/pith/VENLCNXZDRSPSEXBJMWHTBFB44/action/replication_record"}},"created_at":"2026-05-18T01:41:44.474074+00:00","updated_at":"2026-05-18T01:41:44.474074+00:00"}