{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:B4ZQHKKG6JPAOLVQLBMRE4GPDY","short_pith_number":"pith:B4ZQHKKG","schema_version":"1.0","canonical_sha256":"0f3303a946f25e072eb058591270cf1e15d9c57b77902d4efc4d1ef6989022b2","source":{"kind":"arxiv","id":"1901.11390","version":1},"attestation_state":"computed","paper":{"title":"MONet: Unsupervised Scene Decomposition and Representation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CV","authors_text":"Alexander Lerchner, Christopher P. Burgess, Irina Higgins, Loic Matthey, Matt Botvinick, Nicholas Watters, Rishabh Kabra","submitted_at":"2019-01-22T18:55:34Z","abstract_excerpt":"The ability to decompose scenes in terms of abstract building blocks is crucial for general intelligence. Where those basic building blocks share meaningful properties, interactions and other regularities across scenes, such decompositions can simplify reasoning and facilitate imagination of novel scenarios. In particular, representing perceptual observations in terms of entities should improve data efficiency and transfer performance on a wide range of tasks. Thus we need models capable of discovering useful decompositions of scenes by identifying units with such regularities and representing"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1901.11390","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-01-22T18:55:34Z","cross_cats_sorted":["cs.LG","stat.ML"],"title_canon_sha256":"2aea8cc1644f7a189d0cf08b15527af5b7d4e07ee8dd96fbc8cad10be5caa841","abstract_canon_sha256":"492ab8725e98cbe5becbc65df113a85e2acc8d3b8923e58a7764d1324c26fa88"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:55:01.991236Z","signature_b64":"H0sxwn5fNUYbEgk5ZWmhqRVzEN5XUzKAbVpJnO59agxvq0tOYbqAdgm7a2u3WitvHp/wp1Zv0xG/MimQNac2Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0f3303a946f25e072eb058591270cf1e15d9c57b77902d4efc4d1ef6989022b2","last_reissued_at":"2026-05-17T23:55:01.990668Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:55:01.990668Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"MONet: Unsupervised Scene Decomposition and Representation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG","stat.ML"],"primary_cat":"cs.CV","authors_text":"Alexander Lerchner, Christopher P. Burgess, Irina Higgins, Loic Matthey, Matt Botvinick, Nicholas Watters, Rishabh Kabra","submitted_at":"2019-01-22T18:55:34Z","abstract_excerpt":"The ability to decompose scenes in terms of abstract building blocks is crucial for general intelligence. Where those basic building blocks share meaningful properties, interactions and other regularities across scenes, such decompositions can simplify reasoning and facilitate imagination of novel scenarios. In particular, representing perceptual observations in terms of entities should improve data efficiency and transfer performance on a wide range of tasks. Thus we need models capable of discovering useful decompositions of scenes by identifying units with such regularities and representing"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1901.11390","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1901.11390","created_at":"2026-05-17T23:55:01.990753+00:00"},{"alias_kind":"arxiv_version","alias_value":"1901.11390v1","created_at":"2026-05-17T23:55:01.990753+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1901.11390","created_at":"2026-05-17T23:55:01.990753+00:00"},{"alias_kind":"pith_short_12","alias_value":"B4ZQHKKG6JPA","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_16","alias_value":"B4ZQHKKG6JPAOLVQ","created_at":"2026-05-18T12:33:12.712433+00:00"},{"alias_kind":"pith_short_8","alias_value":"B4ZQHKKG","created_at":"2026-05-18T12:33:12.712433+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":15,"internal_anchor_count":7,"sample":[{"citing_arxiv_id":"2605.15975","citing_title":"Learning Bilevel Policies over Symbolic World Models for Long-Horizon Planning","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17827","citing_title":"Content-Style Identification via Differential Independence","ref_index":87,"is_internal_anchor":true},{"citing_arxiv_id":"2308.08708","citing_title":"Consciousness in Artificial Intelligence: Insights from the Science of Consciousness","ref_index":1,"is_internal_anchor":true},{"citing_arxiv_id":"1910.01442","citing_title":"CLEVRER: CoLlision Events for Video REpresentation and Reasoning","ref_index":31,"is_internal_anchor":true},{"citing_arxiv_id":"2409.01652","citing_title":"ReKep: Spatio-Temporal Reasoning of Relational Keypoint Constraints for Robotic Manipulation","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2602.22779","citing_title":"TrajTok: Learning Trajectory Tokens enables better Video Understanding","ref_index":7,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12929","citing_title":"Anatomy-Slot: Unsupervised Anatomical Factorization for Homologous Bilateral Reasoning in Retinal Diagnosis","ref_index":2,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12021","citing_title":"What-Where Transformer: A Slot-Centric Visual Backbone for Concurrent Representation and Localization","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2605.03413","citing_title":"Learning to Theorize the World from Observation","ref_index":246,"is_internal_anchor":false},{"citing_arxiv_id":"2605.06481","citing_title":"OA-WAM: Object-Addressable World Action Model for Robust Robot Manipulation","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19683","citing_title":"Mask World Model: Predicting What Matters for Robust Robot Policy Learning","ref_index":6,"is_internal_anchor":false},{"citing_arxiv_id":"2604.19480","citing_title":"Deep sprite-based image models: An analysis","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07904","citing_title":"Kuramoto Oscillatory Phase Encoding: Neuro-inspired Synchronization for Improved Learning Efficiency","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"2604.07712","citing_title":"CausalVAE as a Plug-in for World Models: Towards Reliable Counterfactual Dynamics","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2605.02323","citing_title":"When Attention Collapses: Residual Evidence Modeling for Compositional Inference","ref_index":29,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY","json":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY.json","graph_json":"https://pith.science/api/pith-number/B4ZQHKKG6JPAOLVQLBMRE4GPDY/graph.json","events_json":"https://pith.science/api/pith-number/B4ZQHKKG6JPAOLVQLBMRE4GPDY/events.json","paper":"https://pith.science/paper/B4ZQHKKG"},"agent_actions":{"view_html":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY","download_json":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY.json","view_paper":"https://pith.science/paper/B4ZQHKKG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1901.11390&json=true","fetch_graph":"https://pith.science/api/pith-number/B4ZQHKKG6JPAOLVQLBMRE4GPDY/graph.json","fetch_events":"https://pith.science/api/pith-number/B4ZQHKKG6JPAOLVQLBMRE4GPDY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY/action/storage_attestation","attest_author":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY/action/author_attestation","sign_citation":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY/action/citation_signature","submit_replication":"https://pith.science/pith/B4ZQHKKG6JPAOLVQLBMRE4GPDY/action/replication_record"}},"created_at":"2026-05-17T23:55:01.990753+00:00","updated_at":"2026-05-17T23:55:01.990753+00:00"}