{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2021:G3GYNY453DU6MDFE6YR3MF3ZDU","short_pith_number":"pith:G3GYNY45","canonical_record":{"source":{"id":"2112.03529","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-12-07T06:58:22Z","cross_cats_sorted":[],"title_canon_sha256":"e4c6a06df4c08364269eab7c8a6d0c1f30c97da503c3bceaf07f8cc63f9c23be","abstract_canon_sha256":"8386ee0fa1ed3d79222d8e842528edc023442695d713942b3d6a95dfc8d33135"},"schema_version":"1.0"},"canonical_sha256":"36cd86e39dd8e9e60ca4f623b617791d06e25986cd5f577ffed8184f275303dd","source":{"kind":"arxiv","id":"2112.03529","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2112.03529","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2112.03529v1","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2112.03529","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"G3GYNY453DU6","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"pith_short_16","alias_value":"G3GYNY453DU6MDFE","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"pith_short_8","alias_value":"G3GYNY45","created_at":"2026-07-05T03:38:46Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2021:G3GYNY453DU6MDFE6YR3MF3ZDU","target":"record","payload":{"canonical_record":{"source":{"id":"2112.03529","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-12-07T06:58:22Z","cross_cats_sorted":[],"title_canon_sha256":"e4c6a06df4c08364269eab7c8a6d0c1f30c97da503c3bceaf07f8cc63f9c23be","abstract_canon_sha256":"8386ee0fa1ed3d79222d8e842528edc023442695d713942b3d6a95dfc8d33135"},"schema_version":"1.0"},"canonical_sha256":"36cd86e39dd8e9e60ca4f623b617791d06e25986cd5f577ffed8184f275303dd","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T03:38:46.555327Z","signature_b64":"n/9LDxV8a1bL/dR2rCklLSpb9T19WG2+IcE08BpbbhLyJKJei63Ki5FywCS9xwCRqo4vp1ZvJiPkfCJh4ij2Cw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"36cd86e39dd8e9e60ca4f623b617791d06e25986cd5f577ffed8184f275303dd","last_reissued_at":"2026-07-05T03:38:46.554888Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T03:38:46.554888Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2112.03529","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T03:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"zbgT0rZ/mUr1+wjagX5ISHpQqU5eI3kGH7pWWE+SK1Y3AHTKswMvfRAjQhoqdYdVArTvQtU38xHZYNn5r+zyBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T13:34:10.489047Z"},"content_sha256":"cbdc0bffeaaae08e1f45b4a0d64a7c2a6572a2a30a4306724f6db4d483702a3b","schema_version":"1.0","event_id":"sha256:cbdc0bffeaaae08e1f45b4a0d64a7c2a6572a2a30a4306724f6db4d483702a3b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2021:G3GYNY453DU6MDFE6YR3MF3ZDU","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Ground-Truth, Whose Truth? -- Examining the Challenges with Annotating Toxic Text Datasets","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Dennis Wei, Ioana Baldini, Karthikeyan Natesan Ramamurthy, Kofi Arhin, Moninder Singh","submitted_at":"2021-12-07T06:58:22Z","abstract_excerpt":"The use of machine learning (ML)-based language models (LMs) to monitor content online is on the rise. For toxic text identification, task-specific fine-tuning of these models are performed using datasets labeled by annotators who provide ground-truth labels in an effort to distinguish between offensive and normal content. These projects have led to the development, improvement, and expansion of large datasets over time, and have contributed immensely to research on natural language. Despite the achievements, existing evidence suggests that ML models built on these datasets do not always resul"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2112.03529","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2112.03529/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T03:38:46Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FT0n3cCmFYBpTgkARCTF2F9MVA0QKEtwz/8yPm3n9hTKXid09aI7k4DELw9RyYrpvao38oaNM63ZX6kzBU5jCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T13:34:10.489698Z"},"content_sha256":"324a73c61e6ee8ac38eb55e4da338f8bc00b956e1cb53c30701d69c2f7abd392","schema_version":"1.0","event_id":"sha256:324a73c61e6ee8ac38eb55e4da338f8bc00b956e1cb53c30701d69c2f7abd392"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/G3GYNY453DU6MDFE6YR3MF3ZDU/bundle.json","state_url":"https://pith.science/pith/G3GYNY453DU6MDFE6YR3MF3ZDU/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/G3GYNY453DU6MDFE6YR3MF3ZDU/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T13:34:10Z","links":{"resolver":"https://pith.science/pith/G3GYNY453DU6MDFE6YR3MF3ZDU","bundle":"https://pith.science/pith/G3GYNY453DU6MDFE6YR3MF3ZDU/bundle.json","state":"https://pith.science/pith/G3GYNY453DU6MDFE6YR3MF3ZDU/state.json","well_known_bundle":"https://pith.science/.well-known/pith/G3GYNY453DU6MDFE6YR3MF3ZDU/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2021:G3GYNY453DU6MDFE6YR3MF3ZDU","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8386ee0fa1ed3d79222d8e842528edc023442695d713942b3d6a95dfc8d33135","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-12-07T06:58:22Z","title_canon_sha256":"e4c6a06df4c08364269eab7c8a6d0c1f30c97da503c3bceaf07f8cc63f9c23be"},"schema_version":"1.0","source":{"id":"2112.03529","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2112.03529","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"arxiv_version","alias_value":"2112.03529v1","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2112.03529","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"pith_short_12","alias_value":"G3GYNY453DU6","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"pith_short_16","alias_value":"G3GYNY453DU6MDFE","created_at":"2026-07-05T03:38:46Z"},{"alias_kind":"pith_short_8","alias_value":"G3GYNY45","created_at":"2026-07-05T03:38:46Z"}],"graph_snapshots":[{"event_id":"sha256:324a73c61e6ee8ac38eb55e4da338f8bc00b956e1cb53c30701d69c2f7abd392","target":"graph","created_at":"2026-07-05T03:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2112.03529/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The use of machine learning (ML)-based language models (LMs) to monitor content online is on the rise. For toxic text identification, task-specific fine-tuning of these models are performed using datasets labeled by annotators who provide ground-truth labels in an effort to distinguish between offensive and normal content. These projects have led to the development, improvement, and expansion of large datasets over time, and have contributed immensely to research on natural language. Despite the achievements, existing evidence suggests that ML models built on these datasets do not always resul","authors_text":"Dennis Wei, Ioana Baldini, Karthikeyan Natesan Ramamurthy, Kofi Arhin, Moninder Singh","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-12-07T06:58:22Z","title":"Ground-Truth, Whose Truth? -- Examining the Challenges with Annotating Toxic Text Datasets"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2112.03529","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:cbdc0bffeaaae08e1f45b4a0d64a7c2a6572a2a30a4306724f6db4d483702a3b","target":"record","created_at":"2026-07-05T03:38:46Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8386ee0fa1ed3d79222d8e842528edc023442695d713942b3d6a95dfc8d33135","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2021-12-07T06:58:22Z","title_canon_sha256":"e4c6a06df4c08364269eab7c8a6d0c1f30c97da503c3bceaf07f8cc63f9c23be"},"schema_version":"1.0","source":{"id":"2112.03529","kind":"arxiv","version":1}},"canonical_sha256":"36cd86e39dd8e9e60ca4f623b617791d06e25986cd5f577ffed8184f275303dd","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"36cd86e39dd8e9e60ca4f623b617791d06e25986cd5f577ffed8184f275303dd","first_computed_at":"2026-07-05T03:38:46.554888Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T03:38:46.554888Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"n/9LDxV8a1bL/dR2rCklLSpb9T19WG2+IcE08BpbbhLyJKJei63Ki5FywCS9xwCRqo4vp1ZvJiPkfCJh4ij2Cw==","signature_status":"signed_v1","signed_at":"2026-07-05T03:38:46.555327Z","signed_message":"canonical_sha256_bytes"},"source_id":"2112.03529","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:cbdc0bffeaaae08e1f45b4a0d64a7c2a6572a2a30a4306724f6db4d483702a3b","sha256:324a73c61e6ee8ac38eb55e4da338f8bc00b956e1cb53c30701d69c2f7abd392"],"state_sha256":"8f27f9322e260c7da9d93d67d89e26b643db659fb5f879b5c4978343338f28f7"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"UDKArieLUbonNQmYK4RwlfBAd3yD4eHJnYy8UtTe8SaTj2YpDDAoFNVhAlY17oJJg4z5QOVA3a0ccKauTB1NAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T13:34:10.493014Z","bundle_sha256":"e666602b8c6ff2b3014d7997bc387d6d36d7b7034b06bca7017df63d94121b93"}}