{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:ZO6ZBDKBZBGCCQMQWR3XJF6RXL","short_pith_number":"pith:ZO6ZBDKB","schema_version":"1.0","canonical_sha256":"cbbd908d41c84c214190b4777497d1bafe27665fdf5caedda9ce615af66199cd","source":{"kind":"arxiv","id":"1612.01105","version":2},"attestation_state":"computed","paper":{"title":"Pyramid Scene Parsing Network","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hengshuang Zhao, Jianping Shi, Jiaya Jia, Xiaogang Wang, Xiaojuan Qi","submitted_at":"2016-12-04T11:46:22Z","abstract_excerpt":"Scene parsing is challenging for unrestricted open vocabulary and diverse scenes. In this paper, we exploit the capability of global context information by different-region-based context aggregation through our pyramid pooling module together with the proposed pyramid scene parsing network (PSPNet). Our global prior representation is effective to produce good quality results on the scene parsing task, while PSPNet provides a superior framework for pixel-level prediction tasks. The proposed approach achieves state-of-the-art performance on various datasets. It came first in ImageNet scene parsi"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1612.01105","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-12-04T11:46:22Z","cross_cats_sorted":[],"title_canon_sha256":"b3f4bd47f0a1361cb5f415834724839138bb8d1572eee8d8fdae726234ace234","abstract_canon_sha256":"249533ac8fa8811449bafb7f6db06aa1db91aa20196dbcbc36c1247d05127f3f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:45:30.215127Z","signature_b64":"q6lY/6tr/xMkvx/oxDZ2vDc/KPiod6fSwYpNCsOP0ij8AgK5xyPfaYLRLHY7z6/Qpe5AtP6cLOBA97HeTgFVCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cbbd908d41c84c214190b4777497d1bafe27665fdf5caedda9ce615af66199cd","last_reissued_at":"2026-05-18T00:45:30.214702Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:45:30.214702Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Pyramid Scene Parsing Network","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Hengshuang Zhao, Jianping Shi, Jiaya Jia, Xiaogang Wang, Xiaojuan Qi","submitted_at":"2016-12-04T11:46:22Z","abstract_excerpt":"Scene parsing is challenging for unrestricted open vocabulary and diverse scenes. In this paper, we exploit the capability of global context information by different-region-based context aggregation through our pyramid pooling module together with the proposed pyramid scene parsing network (PSPNet). Our global prior representation is effective to produce good quality results on the scene parsing task, while PSPNet provides a superior framework for pixel-level prediction tasks. The proposed approach achieves state-of-the-art performance on various datasets. It came first in ImageNet scene parsi"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1612.01105","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1612.01105","created_at":"2026-05-18T00:45:30.214772+00:00"},{"alias_kind":"arxiv_version","alias_value":"1612.01105v2","created_at":"2026-05-18T00:45:30.214772+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1612.01105","created_at":"2026-05-18T00:45:30.214772+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZO6ZBDKBZBGC","created_at":"2026-05-18T12:30:55.937587+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZO6ZBDKBZBGCCQMQ","created_at":"2026-05-18T12:30:55.937587+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZO6ZBDKB","created_at":"2026-05-18T12:30:55.937587+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":4,"internal_anchor_count":3,"sample":[{"citing_arxiv_id":"1907.06876","citing_title":"Separable Convolutional LSTMs for Faster Video Segmentation","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"1907.07061","citing_title":"How much real data do we actually need: Analyzing object detection performance using synthetic and real data","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2602.23024","citing_title":"InCoM: Intent-Driven Perception and Structured Coordination for Mobile Manipulation","ref_index":33,"is_internal_anchor":true},{"citing_arxiv_id":"1706.05587","citing_title":"Rethinking Atrous Convolution for Semantic Image Segmentation","ref_index":95,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL","json":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL.json","graph_json":"https://pith.science/api/pith-number/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/graph.json","events_json":"https://pith.science/api/pith-number/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/events.json","paper":"https://pith.science/paper/ZO6ZBDKB"},"agent_actions":{"view_html":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL","download_json":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL.json","view_paper":"https://pith.science/paper/ZO6ZBDKB","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1612.01105&json=true","fetch_graph":"https://pith.science/api/pith-number/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/graph.json","fetch_events":"https://pith.science/api/pith-number/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/action/storage_attestation","attest_author":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/action/author_attestation","sign_citation":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/action/citation_signature","submit_replication":"https://pith.science/pith/ZO6ZBDKBZBGCCQMQWR3XJF6RXL/action/replication_record"}},"created_at":"2026-05-18T00:45:30.214772+00:00","updated_at":"2026-05-18T00:45:30.214772+00:00"}