{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2021:RIAJYRWKAP2ICYR452DABEXYOH","short_pith_number":"pith:RIAJYRWK","schema_version":"1.0","canonical_sha256":"8a009c46ca03f481623cee860092f871ea3c6b0a4650f45ba62b773038b135aa","source":{"kind":"arxiv","id":"2101.11911","version":1},"attestation_state":"computed","paper":{"title":"The Role of Syntactic Planning in Compositional Image Captioning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Desmond Elliott, Emanuele Bugliarello","submitted_at":"2021-01-28T10:26:08Z","abstract_excerpt":"Image captioning has focused on generalizing to images drawn from the same distribution as the training set, and not to the more challenging problem of generalizing to different distributions of images. Recently, Nikolaus et al. (2019) introduced a dataset to assess compositional generalization in image captioning, where models are evaluated on their ability to describe images with unseen adjective-noun and noun-verb compositions. In this work, we investigate different methods to improve compositional generalization by planning the syntactic structure of a caption. Our experiments show that jo"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2101.11911","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-01-28T10:26:08Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"18c6bc1ace80daacbfcf1788e956752e5363cd6ece24dfdec74a4205befabb0f","abstract_canon_sha256":"4cfabbd4908c4bce8a19228abca4a45844a18ee0a1523d18c47f4e40db749986"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T02:10:32.109535Z","signature_b64":"aMyYaWB2+DX6m7p1TPYLZKPWXeacgMsIQcUHPtkjFIanRJtdm4vDUCZV7X1DoWgNyoELBjYUEvsvUDwtquerBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"8a009c46ca03f481623cee860092f871ea3c6b0a4650f45ba62b773038b135aa","last_reissued_at":"2026-07-05T02:10:32.109078Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T02:10:32.109078Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Role of Syntactic Planning in Compositional Image Captioning","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Desmond Elliott, Emanuele Bugliarello","submitted_at":"2021-01-28T10:26:08Z","abstract_excerpt":"Image captioning has focused on generalizing to images drawn from the same distribution as the training set, and not to the more challenging problem of generalizing to different distributions of images. Recently, Nikolaus et al. (2019) introduced a dataset to assess compositional generalization in image captioning, where models are evaluated on their ability to describe images with unseen adjective-noun and noun-verb compositions. In this work, we investigate different methods to improve compositional generalization by planning the syntactic structure of a caption. Our experiments show that jo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2101.11911","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2101.11911/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2101.11911","created_at":"2026-07-05T02:10:32.109134+00:00"},{"alias_kind":"arxiv_version","alias_value":"2101.11911v1","created_at":"2026-07-05T02:10:32.109134+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2101.11911","created_at":"2026-07-05T02:10:32.109134+00:00"},{"alias_kind":"pith_short_12","alias_value":"RIAJYRWKAP2I","created_at":"2026-07-05T02:10:32.109134+00:00"},{"alias_kind":"pith_short_16","alias_value":"RIAJYRWKAP2ICYR4","created_at":"2026-07-05T02:10:32.109134+00:00"},{"alias_kind":"pith_short_8","alias_value":"RIAJYRWK","created_at":"2026-07-05T02:10:32.109134+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":0,"sample":[{"citing_arxiv_id":"2606.19941","citing_title":"Compositionality Emerges in a Narrow Depth-Connectivity Regime: Architecture Constraints and Solution Manifolds","ref_index":58,"is_internal_anchor":false},{"citing_arxiv_id":"2606.07568","citing_title":"A Systematic Study of Behavioral Cloning for Scientific Data Annotation","ref_index":228,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH","json":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH.json","graph_json":"https://pith.science/api/pith-number/RIAJYRWKAP2ICYR452DABEXYOH/graph.json","events_json":"https://pith.science/api/pith-number/RIAJYRWKAP2ICYR452DABEXYOH/events.json","paper":"https://pith.science/paper/RIAJYRWK"},"agent_actions":{"view_html":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH","download_json":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH.json","view_paper":"https://pith.science/paper/RIAJYRWK","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2101.11911&json=true","fetch_graph":"https://pith.science/api/pith-number/RIAJYRWKAP2ICYR452DABEXYOH/graph.json","fetch_events":"https://pith.science/api/pith-number/RIAJYRWKAP2ICYR452DABEXYOH/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH/action/timestamp_anchor","attest_storage":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH/action/storage_attestation","attest_author":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH/action/author_attestation","sign_citation":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH/action/citation_signature","submit_replication":"https://pith.science/pith/RIAJYRWKAP2ICYR452DABEXYOH/action/replication_record"}},"created_at":"2026-07-05T02:10:32.109134+00:00","updated_at":"2026-07-05T02:10:32.109134+00:00"}