{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:EXHXNSIJB6EQ4IEVICOEX55IXP","short_pith_number":"pith:EXHXNSIJ","canonical_record":{"source":{"id":"2402.06619","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-09T18:51:49Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d4052ad4c87e320f051a3fee0ec27988fd13805354ae41f332647c33125bb129","abstract_canon_sha256":"07122a83dcfe58901e8efbbcfeea1bcb9016e856d3c237ba8ff2ef0d590a78c5"},"schema_version":"1.0"},"canonical_sha256":"25cf76c9090f890e2095409c4bf7a8bbf8c0937bc21c2cc9c4ecc5c5807788af","source":{"kind":"arxiv","id":"2402.06619","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.06619","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"arxiv_version","alias_value":"2402.06619v1","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.06619","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"pith_short_12","alias_value":"EXHXNSIJB6EQ","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"pith_short_16","alias_value":"EXHXNSIJB6EQ4IEV","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"pith_short_8","alias_value":"EXHXNSIJ","created_at":"2026-07-05T07:43:27Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:EXHXNSIJB6EQ4IEVICOEX55IXP","target":"record","payload":{"canonical_record":{"source":{"id":"2402.06619","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-09T18:51:49Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d4052ad4c87e320f051a3fee0ec27988fd13805354ae41f332647c33125bb129","abstract_canon_sha256":"07122a83dcfe58901e8efbbcfeea1bcb9016e856d3c237ba8ff2ef0d590a78c5"},"schema_version":"1.0"},"canonical_sha256":"25cf76c9090f890e2095409c4bf7a8bbf8c0937bc21c2cc9c4ecc5c5807788af","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T07:43:27.996259Z","signature_b64":"x8hkvSs28Dtyxev0w2JIc56Z+xK/Q2Ts+lOlon/GSpPan+ziVJDSLJpKO4uSiyf7h4NhrM6BLO90IgID4ObrAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"25cf76c9090f890e2095409c4bf7a8bbf8c0937bc21c2cc9c4ecc5c5807788af","last_reissued_at":"2026-07-05T07:43:27.995665Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T07:43:27.995665Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2402.06619","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T07:43:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"JOpzDRuSGcjP++VbOM6Kona018Z8MXlw0FLBfAUK22FIzKnxaLjONd2MWvICkhjMXHXfzITsEDeBWCdONMdNDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:42:07.128209Z"},"content_sha256":"185c326a418990c04e8061f6f6b0a61bf341a0a69dd686ad7b1743108e06fa55","schema_version":"1.0","event_id":"sha256:185c326a418990c04e8061f6f6b0a61bf341a0a69dd686ad7b1743108e06fa55"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:EXHXNSIJB6EQ4IEVICOEX55IXP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Aya Dataset: An Open-Access Collection for Multilingual Instruction Tuning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Abinaya Mahendiran, Ahmet \\\"Ust\\\"un, Aisha Alaagib, B\\\"orje F. Karlsson, Daniel Dsouza, Deividas Mataciunas, Dominik Krzemi\\'nski, Emad A. Alghamdi, Freddie Vargus, Hakimeh Fadaei, Herumb Shandilya, Ifeoma Okoh, Irem Erg\\\"un, Jay Patel, Joseph Wilson, Julia Kreutzer, Laura OMahony, Luisa Souza Moura, Marina Machado, Marzieh Fadaee, Max Bartolo, Mike Zhang, Niklas Muennighoff, Oshan Mudannayake, Ramith Hettiarachchi, Sara Hooker, Sebastian Gehrmann, Sebastian Ruder, Shivalika Singh, Surya Guthikonda, Vu Minh Chien, Wei-Yin Ko, Zaid Alyafeai","submitted_at":"2024-02-09T18:51:49Z","abstract_excerpt":"Datasets are foundational to many breakthroughs in modern artificial intelligence. Many recent achievements in the space of natural language processing (NLP) can be attributed to the finetuning of pre-trained models on a diverse set of tasks that enables a large language model (LLM) to respond to instructions. Instruction fine-tuning (IFT) requires specifically constructed and annotated datasets. However, existing datasets are almost all in the English language. In this work, our primary goal is to bridge the language gap by building a human-curated instruction-following dataset spanning 65 la"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2402.06619","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2402.06619/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T07:43:27Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zvCWt3qJ1BgRsyeWWza0E+bFW7QN5BoIoJsj5Ri+rNfBmFsMVXDxvMXWuuF+ji2M/9Kl7l8VODSCNOK3a9aKCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T15:42:07.128919Z"},"content_sha256":"2a6fb4dc910c76399a7b1ddcd0d456e7faaf7ed5efab61ce4fd9ece4910e1cf2","schema_version":"1.0","event_id":"sha256:2a6fb4dc910c76399a7b1ddcd0d456e7faaf7ed5efab61ce4fd9ece4910e1cf2"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EXHXNSIJB6EQ4IEVICOEX55IXP/bundle.json","state_url":"https://pith.science/pith/EXHXNSIJB6EQ4IEVICOEX55IXP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EXHXNSIJB6EQ4IEVICOEX55IXP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T15:42:07Z","links":{"resolver":"https://pith.science/pith/EXHXNSIJB6EQ4IEVICOEX55IXP","bundle":"https://pith.science/pith/EXHXNSIJB6EQ4IEVICOEX55IXP/bundle.json","state":"https://pith.science/pith/EXHXNSIJB6EQ4IEVICOEX55IXP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EXHXNSIJB6EQ4IEVICOEX55IXP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:EXHXNSIJB6EQ4IEVICOEX55IXP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"07122a83dcfe58901e8efbbcfeea1bcb9016e856d3c237ba8ff2ef0d590a78c5","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-09T18:51:49Z","title_canon_sha256":"d4052ad4c87e320f051a3fee0ec27988fd13805354ae41f332647c33125bb129"},"schema_version":"1.0","source":{"id":"2402.06619","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2402.06619","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"arxiv_version","alias_value":"2402.06619v1","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2402.06619","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"pith_short_12","alias_value":"EXHXNSIJB6EQ","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"pith_short_16","alias_value":"EXHXNSIJB6EQ4IEV","created_at":"2026-07-05T07:43:27Z"},{"alias_kind":"pith_short_8","alias_value":"EXHXNSIJ","created_at":"2026-07-05T07:43:27Z"}],"graph_snapshots":[{"event_id":"sha256:2a6fb4dc910c76399a7b1ddcd0d456e7faaf7ed5efab61ce4fd9ece4910e1cf2","target":"graph","created_at":"2026-07-05T07:43:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2402.06619/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Datasets are foundational to many breakthroughs in modern artificial intelligence. Many recent achievements in the space of natural language processing (NLP) can be attributed to the finetuning of pre-trained models on a diverse set of tasks that enables a large language model (LLM) to respond to instructions. Instruction fine-tuning (IFT) requires specifically constructed and annotated datasets. However, existing datasets are almost all in the English language. In this work, our primary goal is to bridge the language gap by building a human-curated instruction-following dataset spanning 65 la","authors_text":"Abinaya Mahendiran, Ahmet \\\"Ust\\\"un, Aisha Alaagib, B\\\"orje F. Karlsson, Daniel Dsouza, Deividas Mataciunas, Dominik Krzemi\\'nski, Emad A. Alghamdi, Freddie Vargus, Hakimeh Fadaei, Herumb Shandilya, Ifeoma Okoh, Irem Erg\\\"un, Jay Patel, Joseph Wilson, Julia Kreutzer, Laura OMahony, Luisa Souza Moura, Marina Machado, Marzieh Fadaee, Max Bartolo, Mike Zhang, Niklas Muennighoff, Oshan Mudannayake, Ramith Hettiarachchi, Sara Hooker, Sebastian Gehrmann, Sebastian Ruder, Shivalika Singh, Surya Guthikonda, Vu Minh Chien, Wei-Yin Ko, Zaid Alyafeai","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-09T18:51:49Z","title":"Aya Dataset: An Open-Access Collection for Multilingual Instruction Tuning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2402.06619","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:185c326a418990c04e8061f6f6b0a61bf341a0a69dd686ad7b1743108e06fa55","target":"record","created_at":"2026-07-05T07:43:27Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"07122a83dcfe58901e8efbbcfeea1bcb9016e856d3c237ba8ff2ef0d590a78c5","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-02-09T18:51:49Z","title_canon_sha256":"d4052ad4c87e320f051a3fee0ec27988fd13805354ae41f332647c33125bb129"},"schema_version":"1.0","source":{"id":"2402.06619","kind":"arxiv","version":1}},"canonical_sha256":"25cf76c9090f890e2095409c4bf7a8bbf8c0937bc21c2cc9c4ecc5c5807788af","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"25cf76c9090f890e2095409c4bf7a8bbf8c0937bc21c2cc9c4ecc5c5807788af","first_computed_at":"2026-07-05T07:43:27.995665Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T07:43:27.995665Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"x8hkvSs28Dtyxev0w2JIc56Z+xK/Q2Ts+lOlon/GSpPan+ziVJDSLJpKO4uSiyf7h4NhrM6BLO90IgID4ObrAQ==","signature_status":"signed_v1","signed_at":"2026-07-05T07:43:27.996259Z","signed_message":"canonical_sha256_bytes"},"source_id":"2402.06619","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:185c326a418990c04e8061f6f6b0a61bf341a0a69dd686ad7b1743108e06fa55","sha256:2a6fb4dc910c76399a7b1ddcd0d456e7faaf7ed5efab61ce4fd9ece4910e1cf2"],"state_sha256":"c2bbd5904fc2b25da265f38d7feced2dae8cb8543890962a1cb2456049c02dd0"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"D704ULhZ0Zi03aoy6xX63S9vGetk9NSnZU9vpH7psSzOLYANxWc9ivsRuxkL/nZtsZWLMw/xlxvDRYlfOS4KBA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T15:42:07.132605Z","bundle_sha256":"ada862971f1e33787d3e2741f2455df6c31205f4fe7690cbc0288bc351e5340e"}}