{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2023:YWXZK3YDFQT2CQXXGLT67ATJJY","short_pith_number":"pith:YWXZK3YD","schema_version":"1.0","canonical_sha256":"c5af956f032c27a142f732e7ef82694e0447d334ea403a38002c00bfbb60f46f","source":{"kind":"arxiv","id":"2301.13688","version":2},"attestation_state":"computed","paper":{"title":"The Flan Collection: Designing Data and Methods for Effective Instruction Tuning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Adam Roberts, Albert Webson, Barret Zoph, Denny Zhou, Hyung Won Chung, Jason Wei, Le Hou, Quoc V. Le, Shayne Longpre, Tu Vu, Yi Tay","submitted_at":"2023-01-31T15:03:44Z","abstract_excerpt":"We study the design decisions of publicly available instruction tuning methods, and break down the development of Flan 2022 (Chung et al., 2022). Through careful ablation studies on the Flan Collection of tasks and methods, we tease apart the effect of design decisions which enable Flan-T5 to outperform prior work by 3-17%+ across evaluation settings. We find task balancing and enrichment techniques are overlooked but critical to effective instruction tuning, and in particular, training with mixed prompt settings (zero-shot, few-shot, and chain-of-thought) actually yields stronger (2%+) perfor"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2301.13688","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2023-01-31T15:03:44Z","cross_cats_sorted":["cs.CL","cs.LG"],"title_canon_sha256":"90940a5f1df7454b91f8f414b7aaa380448cba7addc9f6bdfa3fdda7dfc9ee7d","abstract_canon_sha256":"e035a31bdedf432d05820c3c1801287b1966a2e9f910f7758dc06aaa37682ad3"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:38:13.942152Z","signature_b64":"9RjKi0oPip1q2qrjYCJQhFRKra6dmaZaXExVfrSWcMBy+Hw0J5yk8NPRmuqZaf2HlWXAHbkTQM6erM9XxRtQCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c5af956f032c27a142f732e7ef82694e0447d334ea403a38002c00bfbb60f46f","last_reissued_at":"2026-05-17T23:38:13.941480Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:38:13.941480Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"The Flan Collection: Designing Data and Methods for Effective Instruction Tuning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL","cs.LG"],"primary_cat":"cs.AI","authors_text":"Adam Roberts, Albert Webson, Barret Zoph, Denny Zhou, Hyung Won Chung, Jason Wei, Le Hou, Quoc V. Le, Shayne Longpre, Tu Vu, Yi Tay","submitted_at":"2023-01-31T15:03:44Z","abstract_excerpt":"We study the design decisions of publicly available instruction tuning methods, and break down the development of Flan 2022 (Chung et al., 2022). Through careful ablation studies on the Flan Collection of tasks and methods, we tease apart the effect of design decisions which enable Flan-T5 to outperform prior work by 3-17%+ across evaluation settings. We find task balancing and enrichment techniques are overlooked but critical to effective instruction tuning, and in particular, training with mixed prompt settings (zero-shot, few-shot, and chain-of-thought) actually yields stronger (2%+) perfor"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2301.13688","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2301.13688","created_at":"2026-05-17T23:38:13.941585+00:00"},{"alias_kind":"arxiv_version","alias_value":"2301.13688v2","created_at":"2026-05-17T23:38:13.941585+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2301.13688","created_at":"2026-05-17T23:38:13.941585+00:00"},{"alias_kind":"pith_short_12","alias_value":"YWXZK3YDFQT2","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_16","alias_value":"YWXZK3YDFQT2CQXX","created_at":"2026-05-18T12:33:37.589309+00:00"},{"alias_kind":"pith_short_8","alias_value":"YWXZK3YD","created_at":"2026-05-18T12:33:37.589309+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":20,"internal_anchor_count":4,"sample":[{"citing_arxiv_id":"2503.08223","citing_title":"Will LLMs Scaling Hit the Wall? Breaking Barriers via Distributed Resources on Massive Edge Devices","ref_index":163,"is_internal_anchor":true},{"citing_arxiv_id":"2508.04149","citing_title":"Difficulty-Based Preference Data Selection by DPO Implicit Reward Gap","ref_index":35,"is_internal_anchor":true},{"citing_arxiv_id":"2305.16264","citing_title":"Scaling Data-Constrained Language Models","ref_index":67,"is_internal_anchor":true},{"citing_arxiv_id":"2402.13116","citing_title":"A Survey on Knowledge Distillation of Large Language Models","ref_index":57,"is_internal_anchor":true},{"citing_arxiv_id":"2312.13771","citing_title":"AppAgent: Multimodal Agents as Smartphone Users","ref_index":62,"is_internal_anchor":false},{"citing_arxiv_id":"2505.05472","citing_title":"Mogao: An Omni Foundation Model for Interleaved Multi-Modal Generation","ref_index":53,"is_internal_anchor":false},{"citing_arxiv_id":"2311.16867","citing_title":"The Falcon Series of Open Language Models","ref_index":208,"is_internal_anchor":false},{"citing_arxiv_id":"2302.14045","citing_title":"Language Is Not All You Need: Aligning Perception with Language Models","ref_index":12,"is_internal_anchor":false},{"citing_arxiv_id":"2305.14233","citing_title":"Enhancing Chat Language Models by Scaling High-quality Instructional Conversations","ref_index":246,"is_internal_anchor":false},{"citing_arxiv_id":"2605.14289","citing_title":"MetaMoE: Diversity-Aware Proxy Selection for Privacy-Preserving Mixture-of-Experts Unification","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2303.17760","citing_title":"CAMEL: Communicative Agents for \"Mind\" Exploration of Large Language Model Society","ref_index":71,"is_internal_anchor":false},{"citing_arxiv_id":"2303.17580","citing_title":"HuggingGPT: Solving AI Tasks with ChatGPT and its Friends in Hugging Face","ref_index":14,"is_internal_anchor":false},{"citing_arxiv_id":"2304.12244","citing_title":"WizardLM: Empowering large pre-trained language models to follow complex instructions","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2305.10403","citing_title":"PaLM 2 Technical Report","ref_index":92,"is_internal_anchor":false},{"citing_arxiv_id":"2306.14824","citing_title":"Kosmos-2: Grounding Multimodal Large Language Models to the World","ref_index":10,"is_internal_anchor":false},{"citing_arxiv_id":"2305.14314","citing_title":"QLoRA: Efficient Finetuning of Quantized LLMs","ref_index":39,"is_internal_anchor":false},{"citing_arxiv_id":"2303.18223","citing_title":"A Survey of Large Language Models","ref_index":201,"is_internal_anchor":false},{"citing_arxiv_id":"2306.05685","citing_title":"Judging LLM-as-a-Judge with MT-Bench and Chatbot Arena","ref_index":27,"is_internal_anchor":false},{"citing_arxiv_id":"2604.13846","citing_title":"Beyond Static Personas: Situational Personality Steering for Large Language Models","ref_index":3,"is_internal_anchor":false},{"citing_arxiv_id":"2605.05227","citing_title":"Rethinking Data Curation in LLM Training: Online Reweighting Offers Better Generalization than Offline Methods","ref_index":27,"is_internal_anchor":false}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY","json":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY.json","graph_json":"https://pith.science/api/pith-number/YWXZK3YDFQT2CQXXGLT67ATJJY/graph.json","events_json":"https://pith.science/api/pith-number/YWXZK3YDFQT2CQXXGLT67ATJJY/events.json","paper":"https://pith.science/paper/YWXZK3YD"},"agent_actions":{"view_html":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY","download_json":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY.json","view_paper":"https://pith.science/paper/YWXZK3YD","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2301.13688&json=true","fetch_graph":"https://pith.science/api/pith-number/YWXZK3YDFQT2CQXXGLT67ATJJY/graph.json","fetch_events":"https://pith.science/api/pith-number/YWXZK3YDFQT2CQXXGLT67ATJJY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY/action/storage_attestation","attest_author":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY/action/author_attestation","sign_citation":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY/action/citation_signature","submit_replication":"https://pith.science/pith/YWXZK3YDFQT2CQXXGLT67ATJJY/action/replication_record"}},"created_at":"2026-05-17T23:38:13.941585+00:00","updated_at":"2026-05-17T23:38:13.941585+00:00"}