{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:WYEZJRMFLKI57LVU76JVR7YOUP","short_pith_number":"pith:WYEZJRMF","canonical_record":{"source":{"id":"1709.01888","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-05T02:38:44Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"fdeb8630190fb5881082c150b81776264c4082d64457db12c1f1c876cd8f1c29","abstract_canon_sha256":"2b569a5231185ccf746083cbba3f65a9f7dabbd59861907f8c233ec685d2bd6d"},"schema_version":"1.0"},"canonical_sha256":"b60994c5855a91dfaeb4ff9358ff0ea3c73f48d6d268e7e9962d54e5e09bbe77","source":{"kind":"arxiv","id":"1709.01888","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.01888","created_at":"2026-05-18T00:35:54Z"},{"alias_kind":"arxiv_version","alias_value":"1709.01888v1","created_at":"2026-05-18T00:35:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.01888","created_at":"2026-05-18T00:35:54Z"},{"alias_kind":"pith_short_12","alias_value":"WYEZJRMFLKI5","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_16","alias_value":"WYEZJRMFLKI57LVU","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_8","alias_value":"WYEZJRMF","created_at":"2026-05-18T12:31:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:WYEZJRMFLKI57LVU76JVR7YOUP","target":"record","payload":{"canonical_record":{"source":{"id":"1709.01888","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-05T02:38:44Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"fdeb8630190fb5881082c150b81776264c4082d64457db12c1f1c876cd8f1c29","abstract_canon_sha256":"2b569a5231185ccf746083cbba3f65a9f7dabbd59861907f8c233ec685d2bd6d"},"schema_version":"1.0"},"canonical_sha256":"b60994c5855a91dfaeb4ff9358ff0ea3c73f48d6d268e7e9962d54e5e09bbe77","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:35:54.115447Z","signature_b64":"dL0a1/LvbsiF79LBxmBQ6Y0XCrhFhmpxHbVLWklZ0KMPNA3/lfn35r6m/KSRuFzkUVEXspSoCeEPU57gCdslBA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b60994c5855a91dfaeb4ff9358ff0ea3c73f48d6d268e7e9962d54e5e09bbe77","last_reissued_at":"2026-05-18T00:35:54.114816Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:35:54.114816Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1709.01888","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:35:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"DmKYlf3rM1fKxmFgyfhxjQvKOlzhuZWUJ3qtnx2bdgUYV7ByuFhchtUCokDAQxqi9UWJx8CMnlRCo4D/b1ocAg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T11:16:25.454138Z"},"content_sha256":"f17e05db1df1060fa4688f4f7bd99b9619a0496259a9f6ed8797a03191750a66","schema_version":"1.0","event_id":"sha256:f17e05db1df1060fa4688f4f7bd99b9619a0496259a9f6ed8797a03191750a66"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:WYEZJRMFLKI57LVU76JVR7YOUP","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Language Modeling by Clustering with Word Embeddings for Text Readability Assessment","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.CL","authors_text":"H.T. Kung, Miriam Cha, Youngjune Gwon","submitted_at":"2017-09-05T02:38:44Z","abstract_excerpt":"We present a clustering-based language model using word embeddings for text readability prediction. Presumably, an Euclidean semantic space hypothesis holds true for word embeddings whose training is done by observing word co-occurrences. We argue that clustering with word embeddings in the metric space should yield feature representations in a higher semantic space appropriate for text regression. Also, by representing features in terms of histograms, our approach can naturally address documents of varying lengths. An empirical evaluation using the Common Core Standards corpus reveals that th"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.01888","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:35:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"qK2OQn/w1dOFj2RnmyfBIm8M8v/hCVu2fE2u/UG+fQikusDamXqlihxh64PaCWkq/pgrAyuovyi22WK1jmc3CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-06T11:16:25.454580Z"},"content_sha256":"110bf5f4db84b58de3bca4a02e6299a297ca968a3908af920e5b502f2a2eafb0","schema_version":"1.0","event_id":"sha256:110bf5f4db84b58de3bca4a02e6299a297ca968a3908af920e5b502f2a2eafb0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/WYEZJRMFLKI57LVU76JVR7YOUP/bundle.json","state_url":"https://pith.science/pith/WYEZJRMFLKI57LVU76JVR7YOUP/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/WYEZJRMFLKI57LVU76JVR7YOUP/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-06T11:16:25Z","links":{"resolver":"https://pith.science/pith/WYEZJRMFLKI57LVU76JVR7YOUP","bundle":"https://pith.science/pith/WYEZJRMFLKI57LVU76JVR7YOUP/bundle.json","state":"https://pith.science/pith/WYEZJRMFLKI57LVU76JVR7YOUP/state.json","well_known_bundle":"https://pith.science/.well-known/pith/WYEZJRMFLKI57LVU76JVR7YOUP/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:WYEZJRMFLKI57LVU76JVR7YOUP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"2b569a5231185ccf746083cbba3f65a9f7dabbd59861907f8c233ec685d2bd6d","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-05T02:38:44Z","title_canon_sha256":"fdeb8630190fb5881082c150b81776264c4082d64457db12c1f1c876cd8f1c29"},"schema_version":"1.0","source":{"id":"1709.01888","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1709.01888","created_at":"2026-05-18T00:35:54Z"},{"alias_kind":"arxiv_version","alias_value":"1709.01888v1","created_at":"2026-05-18T00:35:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1709.01888","created_at":"2026-05-18T00:35:54Z"},{"alias_kind":"pith_short_12","alias_value":"WYEZJRMFLKI5","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_16","alias_value":"WYEZJRMFLKI57LVU","created_at":"2026-05-18T12:31:53Z"},{"alias_kind":"pith_short_8","alias_value":"WYEZJRMF","created_at":"2026-05-18T12:31:53Z"}],"graph_snapshots":[{"event_id":"sha256:110bf5f4db84b58de3bca4a02e6299a297ca968a3908af920e5b502f2a2eafb0","target":"graph","created_at":"2026-05-18T00:35:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"We present a clustering-based language model using word embeddings for text readability prediction. Presumably, an Euclidean semantic space hypothesis holds true for word embeddings whose training is done by observing word co-occurrences. We argue that clustering with word embeddings in the metric space should yield feature representations in a higher semantic space appropriate for text regression. Also, by representing features in terms of histograms, our approach can naturally address documents of varying lengths. An empirical evaluation using the Common Core Standards corpus reveals that th","authors_text":"H.T. Kung, Miriam Cha, Youngjune Gwon","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-05T02:38:44Z","title":"Language Modeling by Clustering with Word Embeddings for Text Readability Assessment"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1709.01888","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:f17e05db1df1060fa4688f4f7bd99b9619a0496259a9f6ed8797a03191750a66","target":"record","created_at":"2026-05-18T00:35:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"2b569a5231185ccf746083cbba3f65a9f7dabbd59861907f8c233ec685d2bd6d","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-09-05T02:38:44Z","title_canon_sha256":"fdeb8630190fb5881082c150b81776264c4082d64457db12c1f1c876cd8f1c29"},"schema_version":"1.0","source":{"id":"1709.01888","kind":"arxiv","version":1}},"canonical_sha256":"b60994c5855a91dfaeb4ff9358ff0ea3c73f48d6d268e7e9962d54e5e09bbe77","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"b60994c5855a91dfaeb4ff9358ff0ea3c73f48d6d268e7e9962d54e5e09bbe77","first_computed_at":"2026-05-18T00:35:54.114816Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:35:54.114816Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"dL0a1/LvbsiF79LBxmBQ6Y0XCrhFhmpxHbVLWklZ0KMPNA3/lfn35r6m/KSRuFzkUVEXspSoCeEPU57gCdslBA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:35:54.115447Z","signed_message":"canonical_sha256_bytes"},"source_id":"1709.01888","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:f17e05db1df1060fa4688f4f7bd99b9619a0496259a9f6ed8797a03191750a66","sha256:110bf5f4db84b58de3bca4a02e6299a297ca968a3908af920e5b502f2a2eafb0"],"state_sha256":"382a78244368b5fc9957bc4e8ff969e370b3bd981c1136db1b6da062f6d0537a"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HzyynF2MN5dybeFN9iwTREAyDhKWoV5uaVNgAVYZ7HRyc9oSQUKCClnPYR1q8X+3XiCdEhZunJp77H/+QY4VAw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-06T11:16:25.457090Z","bundle_sha256":"d9dfdc480eb8d6ae9c2fa2d2824ed3875338d961c2898e44017bbe9a6cda1edd"}}