{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2018:FNBHHYUDNXE5LGO63Q6T33GQ3D","short_pith_number":"pith:FNBHHYUD","canonical_record":{"source":{"id":"1803.09337","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-25T20:53:40Z","cross_cats_sorted":[],"title_canon_sha256":"565302f7dc90b1b896920d952b7ca53d86579f461dd5cd5084c549b08412e6a7","abstract_canon_sha256":"2946e28d2db413f33dee184cf03ed715fc4060413b09546accabd15566550cc7"},"schema_version":"1.0"},"canonical_sha256":"2b4273e2836dc9d599dedc3d3decd0d8cf628e8049256dd64c23d83500681711","source":{"kind":"arxiv","id":"1803.09337","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.09337","created_at":"2026-05-18T00:20:11Z"},{"alias_kind":"arxiv_version","alias_value":"1803.09337v1","created_at":"2026-05-18T00:20:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09337","created_at":"2026-05-18T00:20:11Z"},{"alias_kind":"pith_short_12","alias_value":"FNBHHYUDNXE5","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"FNBHHYUDNXE5LGO6","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"FNBHHYUD","created_at":"2026-05-18T12:32:25Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2018:FNBHHYUDNXE5LGO63Q6T33GQ3D","target":"record","payload":{"canonical_record":{"source":{"id":"1803.09337","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-25T20:53:40Z","cross_cats_sorted":[],"title_canon_sha256":"565302f7dc90b1b896920d952b7ca53d86579f461dd5cd5084c549b08412e6a7","abstract_canon_sha256":"2946e28d2db413f33dee184cf03ed715fc4060413b09546accabd15566550cc7"},"schema_version":"1.0"},"canonical_sha256":"2b4273e2836dc9d599dedc3d3decd0d8cf628e8049256dd64c23d83500681711","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:20:11.969378Z","signature_b64":"39PB3fqa919zs7GZF0MvsQKLv0JmP2mNT2jX1NzJg3x+SfYyJYayPDaZ7xE+U10T9hffGmde4TT5a09b+TX9BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2b4273e2836dc9d599dedc3d3decd0d8cf628e8049256dd64c23d83500681711","last_reissued_at":"2026-05-18T00:20:11.967679Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:20:11.967679Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1803.09337","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"URbPoFUVokFGkmwqLcBBb/3b2a017CvjVpxvMOoDUNgVZaPg/KTK1tI8J5wN0sjxMXSVXfKz7kOhKum/3kdOCQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T05:26:37.843665Z"},"content_sha256":"9ccb9dbadbb8428397730ffbfe11025a5359ce847be1cfe9ce98e2baac246441","schema_version":"1.0","event_id":"sha256:9ccb9dbadbb8428397730ffbfe11025a5359ce847be1cfe9ce98e2baac246441"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2018:FNBHHYUDNXE5LGO63Q6T33GQ3D","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Text Segmentation as a Supervised Learning Task","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Adir Cohen, Jonathan Berant, Michael Rotman, Noam Mor, Omri Koshorek","submitted_at":"2018-03-25T20:53:40Z","abstract_excerpt":"Text segmentation, the task of dividing a document into contiguous segments based on its semantic structure, is a longstanding challenge in language understanding. Previous work on text segmentation focused on unsupervised methods such as clustering or graph search, due to the paucity in labeled data. In this work, we formulate text segmentation as a supervised learning problem, and present a large new dataset for text segmentation that is automatically extracted and labeled from Wikipedia. Moreover, we develop a segmentation model based on this dataset and show that it generalizes well to uns"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09337","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:20:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"daaENC5/FIIreDbrfKbWhIjZkst4ys8dvCZbM0gwWfyDHHDt1ybWJO7uMGesWN3GK4wdn6DjJUIbN+BTWFTRAw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T05:26:37.844382Z"},"content_sha256":"80e8263f424f473659f5d05f8b708803f88814193db8f405635623350836cc03","schema_version":"1.0","event_id":"sha256:80e8263f424f473659f5d05f8b708803f88814193db8f405635623350836cc03"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D/bundle.json","state_url":"https://pith.science/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T05:26:37Z","links":{"resolver":"https://pith.science/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D","bundle":"https://pith.science/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D/bundle.json","state":"https://pith.science/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D/state.json","well_known_bundle":"https://pith.science/.well-known/pith/FNBHHYUDNXE5LGO63Q6T33GQ3D/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2018:FNBHHYUDNXE5LGO63Q6T33GQ3D","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2946e28d2db413f33dee184cf03ed715fc4060413b09546accabd15566550cc7","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-25T20:53:40Z","title_canon_sha256":"565302f7dc90b1b896920d952b7ca53d86579f461dd5cd5084c549b08412e6a7"},"schema_version":"1.0","source":{"id":"1803.09337","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1803.09337","created_at":"2026-05-18T00:20:11Z"},{"alias_kind":"arxiv_version","alias_value":"1803.09337v1","created_at":"2026-05-18T00:20:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1803.09337","created_at":"2026-05-18T00:20:11Z"},{"alias_kind":"pith_short_12","alias_value":"FNBHHYUDNXE5","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_16","alias_value":"FNBHHYUDNXE5LGO6","created_at":"2026-05-18T12:32:25Z"},{"alias_kind":"pith_short_8","alias_value":"FNBHHYUD","created_at":"2026-05-18T12:32:25Z"}],"graph_snapshots":[{"event_id":"sha256:80e8263f424f473659f5d05f8b708803f88814193db8f405635623350836cc03","target":"graph","created_at":"2026-05-18T00:20:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Text segmentation, the task of dividing a document into contiguous segments based on its semantic structure, is a longstanding challenge in language understanding. Previous work on text segmentation focused on unsupervised methods such as clustering or graph search, due to the paucity in labeled data. In this work, we formulate text segmentation as a supervised learning problem, and present a large new dataset for text segmentation that is automatically extracted and labeled from Wikipedia. Moreover, we develop a segmentation model based on this dataset and show that it generalizes well to uns","authors_text":"Adir Cohen, Jonathan Berant, Michael Rotman, Noam Mor, Omri Koshorek","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-25T20:53:40Z","title":"Text Segmentation as a Supervised Learning Task"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1803.09337","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:9ccb9dbadbb8428397730ffbfe11025a5359ce847be1cfe9ce98e2baac246441","target":"record","created_at":"2026-05-18T00:20:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2946e28d2db413f33dee184cf03ed715fc4060413b09546accabd15566550cc7","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2018-03-25T20:53:40Z","title_canon_sha256":"565302f7dc90b1b896920d952b7ca53d86579f461dd5cd5084c549b08412e6a7"},"schema_version":"1.0","source":{"id":"1803.09337","kind":"arxiv","version":1}},"canonical_sha256":"2b4273e2836dc9d599dedc3d3decd0d8cf628e8049256dd64c23d83500681711","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2b4273e2836dc9d599dedc3d3decd0d8cf628e8049256dd64c23d83500681711","first_computed_at":"2026-05-18T00:20:11.967679Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:20:11.967679Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"39PB3fqa919zs7GZF0MvsQKLv0JmP2mNT2jX1NzJg3x+SfYyJYayPDaZ7xE+U10T9hffGmde4TT5a09b+TX9BQ==","signature_status":"signed_v1","signed_at":"2026-05-18T00:20:11.969378Z","signed_message":"canonical_sha256_bytes"},"source_id":"1803.09337","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:9ccb9dbadbb8428397730ffbfe11025a5359ce847be1cfe9ce98e2baac246441","sha256:80e8263f424f473659f5d05f8b708803f88814193db8f405635623350836cc03"],"state_sha256":"118e324a9119e2f6a40f0792b7d24a70d963fa8fc01ab2abbeb715d8ba09a411"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"InsK/9Z4Dg0isG84xMdP6w4WKHLWFLkhIHFYvXm0EMSt/m3G/0mr5GJrBVbC9Y2gISVtG+FAyO4CdTp2jCnVBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T05:26:37.847186Z","bundle_sha256":"fc03ee5f5e0013822380a6605f90fc2220d711ddaa23ed00f9847b515b15818e"}}