{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:MYVTTN3JKFYI34TYHVL535VPBK","short_pith_number":"pith:MYVTTN3J","canonical_record":{"source":{"id":"1702.04457","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-02-15T03:35:03Z","cross_cats_sorted":[],"title_canon_sha256":"d3f1b566820bbbd342787882e49ce3ae8d0ca20d0039b008dbd6868cdd1e9723","abstract_canon_sha256":"bce7023a9e854b958350bd095c4db388802831b2c1e41b5b3f650588bf5454e1"},"schema_version":"1.0"},"canonical_sha256":"662b39b76951708df2783d57ddf6af0a93c60813f90ff19e830bc623cbe33342","source":{"kind":"arxiv","id":"1702.04457","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1702.04457","created_at":"2026-05-18T00:48:53Z"},{"alias_kind":"arxiv_version","alias_value":"1702.04457v2","created_at":"2026-05-18T00:48:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.04457","created_at":"2026-05-18T00:48:53Z"},{"alias_kind":"pith_short_12","alias_value":"MYVTTN3JKFYI","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MYVTTN3JKFYI34TY","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MYVTTN3J","created_at":"2026-05-18T12:31:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:MYVTTN3JKFYI34TYHVL535VPBK","target":"record","payload":{"canonical_record":{"source":{"id":"1702.04457","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-02-15T03:35:03Z","cross_cats_sorted":[],"title_canon_sha256":"d3f1b566820bbbd342787882e49ce3ae8d0ca20d0039b008dbd6868cdd1e9723","abstract_canon_sha256":"bce7023a9e854b958350bd095c4db388802831b2c1e41b5b3f650588bf5454e1"},"schema_version":"1.0"},"canonical_sha256":"662b39b76951708df2783d57ddf6af0a93c60813f90ff19e830bc623cbe33342","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:48:53.449139Z","signature_b64":"8Ya05iQAxHtVu5YfKZlrq25LerST7ywncaX7TxuBpR6MEaVGod7lVAnqlSNyit/8gFIodX++ERGZde0LN1VnDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"662b39b76951708df2783d57ddf6af0a93c60813f90ff19e830bc623cbe33342","last_reissued_at":"2026-05-18T00:48:53.448243Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:48:53.448243Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1702.04457","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:48:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"Q6K32TAfJwciIDg8pyrhyjCdvDm/a31ZEm7rv69JFYgQaM6XNkapTALVKa0pJDxyOsErCVzIaCdoAturks8fDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T11:21:23.023090Z"},"content_sha256":"37a54976667a94efea9643a139e25cf2788d4d762ac31bca5c570626eba3498c","schema_version":"1.0","event_id":"sha256:37a54976667a94efea9643a139e25cf2788d4d762ac31bca5c570626eba3498c"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:MYVTTN3JKFYI34TYHVL535VPBK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Automated Phrase Mining from Massive Text Corpora","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Clare R Voss, Jialu Liu, Jiawei Han, Jingbo Shang, Meng Jiang, Xiang Ren","submitted_at":"2017-02-15T03:35:03Z","abstract_excerpt":"As one of the fundamental tasks in text analysis, phrase mining aims at extracting quality phrases from a text corpus. Phrase mining is important in various tasks such as information extraction/retrieval, taxonomy construction, and topic modeling. Most existing methods rely on complex, trained linguistic analyzers, and thus likely have unsatisfactory performance on text corpora of new domains and genres without extra but expensive adaption. Recently, a few data-driven methods have been developed successfully for extraction of phrases from massive domain-specific text. However, none of the stat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.04457","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:48:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"lzfL9AAFYL1mxFZkmrQfkmFFG0/iTwptoBqZ4oecrFhTmTiCNY/SGQz/gw/strEuvQ1RxFDl+ip7vCNM+VL1AA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T11:21:23.023756Z"},"content_sha256":"fab8fd2d133e2bd24af978d4e5b4761db16daafd03a821c14f123e374be7cfe0","schema_version":"1.0","event_id":"sha256:fab8fd2d133e2bd24af978d4e5b4761db16daafd03a821c14f123e374be7cfe0"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/MYVTTN3JKFYI34TYHVL535VPBK/bundle.json","state_url":"https://pith.science/pith/MYVTTN3JKFYI34TYHVL535VPBK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/MYVTTN3JKFYI34TYHVL535VPBK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T11:21:23Z","links":{"resolver":"https://pith.science/pith/MYVTTN3JKFYI34TYHVL535VPBK","bundle":"https://pith.science/pith/MYVTTN3JKFYI34TYHVL535VPBK/bundle.json","state":"https://pith.science/pith/MYVTTN3JKFYI34TYHVL535VPBK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/MYVTTN3JKFYI34TYHVL535VPBK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:MYVTTN3JKFYI34TYHVL535VPBK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bce7023a9e854b958350bd095c4db388802831b2c1e41b5b3f650588bf5454e1","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-02-15T03:35:03Z","title_canon_sha256":"d3f1b566820bbbd342787882e49ce3ae8d0ca20d0039b008dbd6868cdd1e9723"},"schema_version":"1.0","source":{"id":"1702.04457","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1702.04457","created_at":"2026-05-18T00:48:53Z"},{"alias_kind":"arxiv_version","alias_value":"1702.04457v2","created_at":"2026-05-18T00:48:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1702.04457","created_at":"2026-05-18T00:48:53Z"},{"alias_kind":"pith_short_12","alias_value":"MYVTTN3JKFYI","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_16","alias_value":"MYVTTN3JKFYI34TY","created_at":"2026-05-18T12:31:31Z"},{"alias_kind":"pith_short_8","alias_value":"MYVTTN3J","created_at":"2026-05-18T12:31:31Z"}],"graph_snapshots":[{"event_id":"sha256:fab8fd2d133e2bd24af978d4e5b4761db16daafd03a821c14f123e374be7cfe0","target":"graph","created_at":"2026-05-18T00:48:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"As one of the fundamental tasks in text analysis, phrase mining aims at extracting quality phrases from a text corpus. Phrase mining is important in various tasks such as information extraction/retrieval, taxonomy construction, and topic modeling. Most existing methods rely on complex, trained linguistic analyzers, and thus likely have unsatisfactory performance on text corpora of new domains and genres without extra but expensive adaption. Recently, a few data-driven methods have been developed successfully for extraction of phrases from massive domain-specific text. However, none of the stat","authors_text":"Clare R Voss, Jialu Liu, Jiawei Han, Jingbo Shang, Meng Jiang, Xiang Ren","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-02-15T03:35:03Z","title":"Automated Phrase Mining from Massive Text Corpora"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1702.04457","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:37a54976667a94efea9643a139e25cf2788d4d762ac31bca5c570626eba3498c","target":"record","created_at":"2026-05-18T00:48:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bce7023a9e854b958350bd095c4db388802831b2c1e41b5b3f650588bf5454e1","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-02-15T03:35:03Z","title_canon_sha256":"d3f1b566820bbbd342787882e49ce3ae8d0ca20d0039b008dbd6868cdd1e9723"},"schema_version":"1.0","source":{"id":"1702.04457","kind":"arxiv","version":2}},"canonical_sha256":"662b39b76951708df2783d57ddf6af0a93c60813f90ff19e830bc623cbe33342","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"662b39b76951708df2783d57ddf6af0a93c60813f90ff19e830bc623cbe33342","first_computed_at":"2026-05-18T00:48:53.448243Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:48:53.448243Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"8Ya05iQAxHtVu5YfKZlrq25LerST7ywncaX7TxuBpR6MEaVGod7lVAnqlSNyit/8gFIodX++ERGZde0LN1VnDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:48:53.449139Z","signed_message":"canonical_sha256_bytes"},"source_id":"1702.04457","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:37a54976667a94efea9643a139e25cf2788d4d762ac31bca5c570626eba3498c","sha256:fab8fd2d133e2bd24af978d4e5b4761db16daafd03a821c14f123e374be7cfe0"],"state_sha256":"c06c22b842806f2ead1093a81edaf9d78d37fe818d1f6e47bbbf5c7c4988014f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"l2czYbPL8wnqu3b7DSeoJd1B3Uas3erHjSLTeCWGSMUdBpxsEZoNyuZ2q/k5lfnKwv/L3GlloRgOdneTT06eBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T11:21:23.027199Z","bundle_sha256":"2deed836061fa235af2fa1ee0487e635d2c3f0d7c8fc434a897e22715004a555"}}