{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:NBBRFES7EDOL6KZHDKY5CZT5JL","short_pith_number":"pith:NBBRFES7","schema_version":"1.0","canonical_sha256":"684312925f20dcbf2b271ab1d1667d4ad7a235b8136cfcb6a3e81aa577ffaee8","source":{"kind":"arxiv","id":"1603.07396","version":1},"attestation_state":"computed","paper":{"title":"A Diagram Is Worth A Dozen Images","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Ali Farhadi, Aniruddha Kembhavi, Eric Kolve, Hannaneh Hajishirzi, Mike Salvato, Minjoon Seo","submitted_at":"2016-03-24T00:02:58Z","abstract_excerpt":"Diagrams are common tools for representing complex concepts, relationships and events, often when it would be difficult to portray the same information with natural images. Understanding natural images has been extensively studied in computer vision, while diagram understanding has received little attention. In this paper, we study the problem of diagram interpretation and reasoning, the challenging task of identifying the structure of a diagram and the semantics of its constituents and their relationships. We introduce Diagram Parse Graphs (DPG) as our representation to model the structure of"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1603.07396","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2016-03-24T00:02:58Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"2c81280c66038b495367e8dce96eaae4ddebe0403e26ce45198fceead9fb4753","abstract_canon_sha256":"1ee0af321006e014f0cc933465f96b88c9bc51ab8ab04647261259079f6ebb34"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:18:21.092369Z","signature_b64":"/nGIxWMh6OenZpZ4G4WpsjUsRw9Hfq4Cmbtcu0V/BM+LLRHJ74nq3V8gKWhczS8ZtcU/dCdEBRFsIsX4dsFJDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"684312925f20dcbf2b271ab1d1667d4ad7a235b8136cfcb6a3e81aa577ffaee8","last_reissued_at":"2026-05-18T01:18:21.091863Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:18:21.091863Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"A Diagram Is Worth A Dozen Images","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Ali Farhadi, Aniruddha Kembhavi, Eric Kolve, Hannaneh Hajishirzi, Mike Salvato, Minjoon Seo","submitted_at":"2016-03-24T00:02:58Z","abstract_excerpt":"Diagrams are common tools for representing complex concepts, relationships and events, often when it would be difficult to portray the same information with natural images. Understanding natural images has been extensively studied in computer vision, while diagram understanding has received little attention. In this paper, we study the problem of diagram interpretation and reasoning, the challenging task of identifying the structure of a diagram and the semantics of its constituents and their relationships. We introduce Diagram Parse Graphs (DPG) as our representation to model the structure of"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1603.07396","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1603.07396","created_at":"2026-05-18T01:18:21.091927+00:00"},{"alias_kind":"arxiv_version","alias_value":"1603.07396v1","created_at":"2026-05-18T01:18:21.091927+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1603.07396","created_at":"2026-05-18T01:18:21.091927+00:00"},{"alias_kind":"pith_short_12","alias_value":"NBBRFES7EDOL","created_at":"2026-05-18T12:30:32.724797+00:00"},{"alias_kind":"pith_short_16","alias_value":"NBBRFES7EDOL6KZH","created_at":"2026-05-18T12:30:32.724797+00:00"},{"alias_kind":"pith_short_8","alias_value":"NBBRFES7","created_at":"2026-05-18T12:30:32.724797+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":12,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2503.19786","citing_title":"Gemma 3 Technical Report","ref_index":29,"is_internal_anchor":true},{"citing_arxiv_id":"2605.16941","citing_title":"Roll Out and Roll Back: Diffusion LLMs are Their Own Efficiency Teachers","ref_index":36,"is_internal_anchor":true},{"citing_arxiv_id":"2510.21122","citing_title":"NoisyGRPO: Incentivizing Multimodal CoT Reasoning via Noise Injection and Bayesian Estimation","ref_index":17,"is_internal_anchor":true},{"citing_arxiv_id":"2407.12772","citing_title":"LMMs-Eval: Reality Check on the Evaluation of Large Multimodal Models","ref_index":5,"is_internal_anchor":true},{"citing_arxiv_id":"2503.07536","citing_title":"LMM-R1: Empowering 3B LMMs with Strong Reasoning Abilities Through Two-Stage Rule-Based RL","ref_index":28,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12034","citing_title":"Boosting Omni-Modal Language Models: Staged Post-Training with Visually Debiased Evaluation","ref_index":48,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14198","citing_title":"MixAtlas: Uncertainty-aware Data Mixture Optimization for Multimodal LLM Midtraining","ref_index":9,"is_internal_anchor":false},{"citing_arxiv_id":"2605.12034","citing_title":"Boosting Omni-Modal Language Models: Staged Post-Training with Visually Debiased Evaluation","ref_index":48,"is_internal_anchor":false},{"citing_arxiv_id":"2403.20330","citing_title":"Are We on the Right Way for Evaluating Large Vision-Language Models?","ref_index":20,"is_internal_anchor":false},{"citing_arxiv_id":"2604.10228","citing_title":"SVSR: A Self-Verification and Self-Rectification Paradigm for Multimodal Reasoning","ref_index":35,"is_internal_anchor":false},{"citing_arxiv_id":"2604.08212","citing_title":"Vision-Language Foundation Models for Comprehensive Automated Pavement Condition Assessment","ref_index":49,"is_internal_anchor":false},{"citing_arxiv_id":"2604.17475","citing_title":"Waking Up Blind: Cold-Start Optimization of Supervision-Free Agentic Trajectories for Grounded Visual Perception","ref_index":2,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL","json":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL.json","graph_json":"https://pith.science/api/pith-number/NBBRFES7EDOL6KZHDKY5CZT5JL/graph.json","events_json":"https://pith.science/api/pith-number/NBBRFES7EDOL6KZHDKY5CZT5JL/events.json","paper":"https://pith.science/paper/NBBRFES7"},"agent_actions":{"view_html":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL","download_json":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL.json","view_paper":"https://pith.science/paper/NBBRFES7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1603.07396&json=true","fetch_graph":"https://pith.science/api/pith-number/NBBRFES7EDOL6KZHDKY5CZT5JL/graph.json","fetch_events":"https://pith.science/api/pith-number/NBBRFES7EDOL6KZHDKY5CZT5JL/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL/action/storage_attestation","attest_author":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL/action/author_attestation","sign_citation":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL/action/citation_signature","submit_replication":"https://pith.science/pith/NBBRFES7EDOL6KZHDKY5CZT5JL/action/replication_record"}},"created_at":"2026-05-18T01:18:21.091927+00:00","updated_at":"2026-05-18T01:18:21.091927+00:00"}