{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:ZWUQ2BTYWKO6BXSMX7ZZHVBUKL","short_pith_number":"pith:ZWUQ2BTY","schema_version":"1.0","canonical_sha256":"cda90d0678b29de0de4cbff393d43452e0652d41ec41f2868d278db84a44d031","source":{"kind":"arxiv","id":"1703.06870","version":3},"attestation_state":"computed","paper":{"title":"Mask R-CNN","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Georgia Gkioxari, Kaiming He, Piotr Doll\\'ar, Ross Girshick","submitted_at":"2017-03-20T17:53:38Z","abstract_excerpt":"We present a conceptually simple, flexible, and general framework for object instance segmentation. Our approach efficiently detects objects in an image while simultaneously generating a high-quality segmentation mask for each instance. The method, called Mask R-CNN, extends Faster R-CNN by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. Mask R-CNN is simple to train and adds only a small overhead to Faster R-CNN, running at 5 fps. Moreover, Mask R-CNN is easy to generalize to other tasks, e.g., allowing us to estimate human pose"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1703.06870","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2017-03-20T17:53:38Z","cross_cats_sorted":[],"title_canon_sha256":"9ebf51c93bb11e32f23c3bdd3b3c4ce7c9e3cf8d704ad7b8f205b4b177e773de","abstract_canon_sha256":"095b52e9f6c300bae1454543a2b00f315b5f2727d258c4498a8e708375d9dcbd"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:25:11.808469Z","signature_b64":"yJAzYlzWYrZ9ty+Af8FnR0qSXI+NPkseMIJ09Wr/dZZq8s1b4F+FEOCAolsGltqzCvtJe0NF4crLOnT5rKq+BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cda90d0678b29de0de4cbff393d43452e0652d41ec41f2868d278db84a44d031","last_reissued_at":"2026-05-18T00:25:11.807866Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:25:11.807866Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Mask R-CNN","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Georgia Gkioxari, Kaiming He, Piotr Doll\\'ar, Ross Girshick","submitted_at":"2017-03-20T17:53:38Z","abstract_excerpt":"We present a conceptually simple, flexible, and general framework for object instance segmentation. Our approach efficiently detects objects in an image while simultaneously generating a high-quality segmentation mask for each instance. The method, called Mask R-CNN, extends Faster R-CNN by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. Mask R-CNN is simple to train and adds only a small overhead to Faster R-CNN, running at 5 fps. Moreover, Mask R-CNN is easy to generalize to other tasks, e.g., allowing us to estimate human pose"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1703.06870","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1703.06870","created_at":"2026-05-18T00:25:11.807952+00:00"},{"alias_kind":"arxiv_version","alias_value":"1703.06870v3","created_at":"2026-05-18T00:25:11.807952+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1703.06870","created_at":"2026-05-18T00:25:11.807952+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZWUQ2BTYWKO6","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZWUQ2BTYWKO6BXSM","created_at":"2026-05-18T12:31:59.375834+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZWUQ2BTY","created_at":"2026-05-18T12:31:59.375834+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":19,"internal_anchor_count":14,"sample":[{"citing_arxiv_id":"1906.09266","citing_title":"A Multitask Network for Localization and Recognition of Text in Images","ref_index":8,"is_internal_anchor":true},{"citing_arxiv_id":"1906.12187","citing_title":"Deep Radar Detector","ref_index":4,"is_internal_anchor":true},{"citing_arxiv_id":"1906.11463","citing_title":"Automatic Colon Polyp Detection using Region based Deep CNN and Post Learning Approaches","ref_index":23,"is_internal_anchor":true},{"citing_arxiv_id":"1907.04444","citing_title":"A review on deep learning techniques for 3D sensed data classification","ref_index":38,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07676","citing_title":"Lung Nodules Detection and Segmentation Using 3D Mask-RCNN","ref_index":10,"is_internal_anchor":true},{"citing_arxiv_id":"2406.17323","citing_title":"XAMI -- A Benchmark Dataset for Artefact Detection in XMM-Newton Optical Images","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2410.07442","citing_title":"Self-Supervised Learning for Real-World Object Detection: a Survey","ref_index":70,"is_internal_anchor":true},{"citing_arxiv_id":"2605.22098","citing_title":"TextTeacher: What Can Language Teach About Images?","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2510.06194","citing_title":"Overlap-aware segmentation for topological reconstruction of obscured objects","ref_index":9,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17630","citing_title":"SegRAG: Training-Free Retrieval-Augmented Semantic Segmentation","ref_index":12,"is_internal_anchor":true},{"citing_arxiv_id":"2605.17630","citing_title":"SegRAG: Training-Free Retrieval-Augmented Semantic Segmentation","ref_index":13,"is_internal_anchor":true},{"citing_arxiv_id":"2605.18052","citing_title":"Efficient 3D Content Reconstruction and Generation","ref_index":92,"is_internal_anchor":true},{"citing_arxiv_id":"2507.22512","citing_title":"AlphaDent: A dataset for automated tooth pathology detection","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2601.10931","citing_title":"Sparse Data Tree Canopy Segmentation: Fine-Tuning Leading Pretrained Models on Only 150 Images","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.03259","citing_title":"CropVLM: A Domain-Adapted Vision-Language Model for Open-Set Crop Analysis","ref_index":17,"is_internal_anchor":false},{"citing_arxiv_id":"2604.26869","citing_title":"KAYRA: A Microservice Architecture for AI-Assisted Karyotyping with Cloud and On-Premise Deployment","ref_index":2,"is_internal_anchor":false},{"citing_arxiv_id":"1706.02677","citing_title":"Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2604.20030","citing_title":"Learning to count small and clustered objects with application to bacterial colonies","ref_index":34,"is_internal_anchor":false},{"citing_arxiv_id":"2604.05265","citing_title":"Semantic Reality: Interactive Context-Aware Visualization of Inter-Object Relationships in Augmented Reality","ref_index":29,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL","json":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL.json","graph_json":"https://pith.science/api/pith-number/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/graph.json","events_json":"https://pith.science/api/pith-number/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/events.json","paper":"https://pith.science/paper/ZWUQ2BTY"},"agent_actions":{"view_html":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL","download_json":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL.json","view_paper":"https://pith.science/paper/ZWUQ2BTY","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1703.06870&json=true","fetch_graph":"https://pith.science/api/pith-number/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/graph.json","fetch_events":"https://pith.science/api/pith-number/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/action/storage_attestation","attest_author":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/action/author_attestation","sign_citation":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/action/citation_signature","submit_replication":"https://pith.science/pith/ZWUQ2BTYWKO6BXSMX7ZZHVBUKL/action/replication_record"}},"created_at":"2026-05-18T00:25:11.807952+00:00","updated_at":"2026-05-18T00:25:11.807952+00:00"}