{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2021:4ERRQCUZQSTKGPZU73UL5DHW4E","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"e8bbe77e8299fad208c011be998f72a63411a86e9a441e89359e7d3a78277dbd","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-02-02T18:42:05Z","title_canon_sha256":"b43393e3473623e2b14443a9a5d46e5f7a4509c6ac1654d28a361c46bc1fcf68"},"schema_version":"1.0","source":{"id":"2102.01672","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2102.01672","created_at":"2026-07-05T02:28:20Z"},{"alias_kind":"arxiv_version","alias_value":"2102.01672v3","created_at":"2026-07-05T02:28:20Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2102.01672","created_at":"2026-07-05T02:28:20Z"},{"alias_kind":"pith_short_12","alias_value":"4ERRQCUZQSTK","created_at":"2026-07-05T02:28:20Z"},{"alias_kind":"pith_short_16","alias_value":"4ERRQCUZQSTKGPZU","created_at":"2026-07-05T02:28:20Z"},{"alias_kind":"pith_short_8","alias_value":"4ERRQCUZ","created_at":"2026-07-05T02:28:20Z"}],"graph_snapshots":[{"event_id":"sha256:96f181e62716feb0aa820577c0fc380fcaf9f3a296fecd50fdbe3a22931da674","target":"graph","created_at":"2026-07-05T02:28:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2102.01672/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We introduce GEM, a living benchmark for natural language Generation (NLG), its Evaluation, and Metrics. Measuring progress in NLG relies on a constantly evolving ecosystem of automated metrics, datasets, and human evaluation standards. Due to this moving target, new models often still evaluate on divergent anglo-centric corpora with well-established, but flawed, metrics. This disconnect makes it challenging to identify the limitations of current models and opportunities for progress. Addressing this limitation, GEM provides an environment in which models can easily be applied to a wide set of","authors_text":"Akhila Yerukola, Aman Madaan, Anastasia Shimorina, Angelina McMillan-Major, Ankur Parikh, Antoine Bosselut, Aremu Anuoluwapo, Bodhisattwa Prasad Majumder, Chris Emezue, Cristina Garbacea, Dhruv Kumar, Dipanjan Das, Diyi Yang, Emiel van Miltenburg, Esin Durmus, Faisal Ladhak, Harsh Jhamtani, Hendrik Strobelt, Jiawei Zhou, Jo\\~ao Sedoc, Juan Diego Rodriguez, Karmanya Aggarwal, Kaustubh D. Dhole, Khyathi Raghavi Chandu, Khyati Mahajan, Laura Perez-Beltrachini, Marco Antonio Sobrevilla Cabezudo, Mihir Kale, Miruna Clinciu, Moin Nadeem, Mounica Maddela, Niranjan Ramesh Rao, Nishant Subramani, Ond\\v{r}ej Du\\v{s}ek, Pawan Sasanka Ammanamanchi, Pedro Henrique Martins, Rubungo Andre Niyongabo, Saad Mahamood, Salomey Osei, Samira Shaikh, Sashank Santhanam, Sebastian Gehrmann, Shailza Jolly, Shashi Narayan, Simon Mille, Tatsunori Hashimoto, Thibault Sellam, Tosin Adewumi, Varun Gangal, Vikas Raunak, Vitaly Nikolaev, Wanyu Du, Wei Xu, Yacine Jernite, Yangfeng Ji, Yufang Hou","cross_cats":["cs.AI","cs.LG"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-02-02T18:42:05Z","title":"The GEM Benchmark: Natural Language Generation, its Evaluation and Metrics"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2102.01672","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:5619f64e9bb7f7680fa4f067a2a3f0b81b8a8e88dec4b001ab893162fad823a7","target":"record","created_at":"2026-07-05T02:28:20Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"e8bbe77e8299fad208c011be998f72a63411a86e9a441e89359e7d3a78277dbd","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2021-02-02T18:42:05Z","title_canon_sha256":"b43393e3473623e2b14443a9a5d46e5f7a4509c6ac1654d28a361c46bc1fcf68"},"schema_version":"1.0","source":{"id":"2102.01672","kind":"arxiv","version":3}},"canonical_sha256":"e123180a9984a6a33f34fee8be8cf6e112d5aea6b6cdb25929df495862b989c1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e123180a9984a6a33f34fee8be8cf6e112d5aea6b6cdb25929df495862b989c1","first_computed_at":"2026-07-05T02:28:20.310491Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T02:28:20.310491Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"wMjC/o/XAko+d3V68Y9dDvihcsqZPBXizU4qPlSyGCfORrRzNFg8Pm+tGf9OgMx9vcFz5GVZ+xfcdX1teTQJAQ==","signature_status":"signed_v1","signed_at":"2026-07-05T02:28:20.311000Z","signed_message":"canonical_sha256_bytes"},"source_id":"2102.01672","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:5619f64e9bb7f7680fa4f067a2a3f0b81b8a8e88dec4b001ab893162fad823a7","sha256:96f181e62716feb0aa820577c0fc380fcaf9f3a296fecd50fdbe3a22931da674"],"state_sha256":"4b66266ec6e14ecc58da946435ac215ae7cdee8dd0197581d23b12fc9db702b5"}