{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2019:Y5MS6J3HTTM7YXO7DY2SZAOLGN","short_pith_number":"pith:Y5MS6J3H","schema_version":"1.0","canonical_sha256":"c7592f27679cd9fc5ddf1e352c81cb336f5111d6d139c21fa79c056a2f5e9dee","source":{"kind":"arxiv","id":"1903.10172","version":1},"attestation_state":"computed","paper":{"title":"Looking Fast and Slow: Memory-Guided Mobile Video Object Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Dmitry Kalenichenko, Marie White, Mason Liu, Menglong Zhu, Yinxiao Li","submitted_at":"2019-03-25T08:30:58Z","abstract_excerpt":"With a single eye fixation lasting a fraction of a second, the human visual system is capable of forming a rich representation of a complex environment, reaching a holistic understanding which facilitates object recognition and detection. This phenomenon is known as recognizing the \"gist\" of the scene and is accomplished by relying on relevant prior knowledge. This paper addresses the analogous question of whether using memory in computer vision systems can not only improve the accuracy of object detection in video streams, but also reduce the computation time. By interleaving conventional fea"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1903.10172","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2019-03-25T08:30:58Z","cross_cats_sorted":[],"title_canon_sha256":"b0d73cd2e63d9acdeb8f536c2e613cbe8c8a40d7196b476265b42fb821c29810","abstract_canon_sha256":"8b47481cad0c2471f8fe4c56903763b993e1e97675d7f4b985958dbbd4da5745"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:50:30.472847Z","signature_b64":"IZ/CM+eM7mVbt7oChKmConGCpLmUtOBko83fsco8zYm2IxUIyiamDOaa3eSUXR31zrdY7UM8xSHchNCaKyA9AQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c7592f27679cd9fc5ddf1e352c81cb336f5111d6d139c21fa79c056a2f5e9dee","last_reissued_at":"2026-05-17T23:50:30.472087Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:50:30.472087Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Looking Fast and Slow: Memory-Guided Mobile Video Object Detection","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Dmitry Kalenichenko, Marie White, Mason Liu, Menglong Zhu, Yinxiao Li","submitted_at":"2019-03-25T08:30:58Z","abstract_excerpt":"With a single eye fixation lasting a fraction of a second, the human visual system is capable of forming a rich representation of a complex environment, reaching a holistic understanding which facilitates object recognition and detection. This phenomenon is known as recognizing the \"gist\" of the scene and is accomplished by relying on relevant prior knowledge. This paper addresses the analogous question of whether using memory in computer vision systems can not only improve the accuracy of object detection in video streams, but also reduce the computation time. By interleaving conventional fea"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1903.10172","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1903.10172","created_at":"2026-05-17T23:50:30.472209+00:00"},{"alias_kind":"arxiv_version","alias_value":"1903.10172v1","created_at":"2026-05-17T23:50:30.472209+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1903.10172","created_at":"2026-05-17T23:50:30.472209+00:00"},{"alias_kind":"pith_short_12","alias_value":"Y5MS6J3HTTM7","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_16","alias_value":"Y5MS6J3HTTM7YXO7","created_at":"2026-05-18T12:33:33.725879+00:00"},{"alias_kind":"pith_short_8","alias_value":"Y5MS6J3H","created_at":"2026-05-18T12:33:33.725879+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":1,"internal_anchor_count":1,"sample":[{"citing_arxiv_id":"2605.15423","citing_title":"MR2-ByteTrack: CNN and Transformer-based Video Object Detection for AI-augmented Embedded Vision Sensor Nodes","ref_index":31,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN","json":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN.json","graph_json":"https://pith.science/api/pith-number/Y5MS6J3HTTM7YXO7DY2SZAOLGN/graph.json","events_json":"https://pith.science/api/pith-number/Y5MS6J3HTTM7YXO7DY2SZAOLGN/events.json","paper":"https://pith.science/paper/Y5MS6J3H"},"agent_actions":{"view_html":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN","download_json":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN.json","view_paper":"https://pith.science/paper/Y5MS6J3H","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1903.10172&json=true","fetch_graph":"https://pith.science/api/pith-number/Y5MS6J3HTTM7YXO7DY2SZAOLGN/graph.json","fetch_events":"https://pith.science/api/pith-number/Y5MS6J3HTTM7YXO7DY2SZAOLGN/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN/action/timestamp_anchor","attest_storage":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN/action/storage_attestation","attest_author":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN/action/author_attestation","sign_citation":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN/action/citation_signature","submit_replication":"https://pith.science/pith/Y5MS6J3HTTM7YXO7DY2SZAOLGN/action/replication_record"}},"created_at":"2026-05-17T23:50:30.472209+00:00","updated_at":"2026-05-17T23:50:30.472209+00:00"}