{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:5CIA5BZYKIY6OHSEXZTFLTJ3LO","short_pith_number":"pith:5CIA5BZY","canonical_record":{"source":{"id":"1708.07935","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2017-08-26T05:56:32Z","cross_cats_sorted":[],"title_canon_sha256":"59a5193d10a0dc9f2ddfd09487114a702b628eb9ebb9900b4c97d51136cb4445","abstract_canon_sha256":"0fa5bc2aecc8a32dc6b82153ceb2b4ea7a4e4838eaf39b9c74c69b9073f4f788"},"schema_version":"1.0"},"canonical_sha256":"e8900e87385231e71e44be6655cd3b5b954711b6a15a8afefd0b036603d3b8ab","source":{"kind":"arxiv","id":"1708.07935","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1708.07935","created_at":"2026-05-18T00:36:37Z"},{"alias_kind":"arxiv_version","alias_value":"1708.07935v1","created_at":"2026-05-18T00:36:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.07935","created_at":"2026-05-18T00:36:37Z"},{"alias_kind":"pith_short_12","alias_value":"5CIA5BZYKIY6","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"5CIA5BZYKIY6OHSE","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"5CIA5BZY","created_at":"2026-05-18T12:31:00Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:5CIA5BZYKIY6OHSEXZTFLTJ3LO","target":"record","payload":{"canonical_record":{"source":{"id":"1708.07935","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2017-08-26T05:56:32Z","cross_cats_sorted":[],"title_canon_sha256":"59a5193d10a0dc9f2ddfd09487114a702b628eb9ebb9900b4c97d51136cb4445","abstract_canon_sha256":"0fa5bc2aecc8a32dc6b82153ceb2b4ea7a4e4838eaf39b9c74c69b9073f4f788"},"schema_version":"1.0"},"canonical_sha256":"e8900e87385231e71e44be6655cd3b5b954711b6a15a8afefd0b036603d3b8ab","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:36:37.595214Z","signature_b64":"GKFopUKpCLxuHTSdaofrQ74/nZDK0d8yBgiI2cSBslO4Ht+ZkdXY8sDfB/9gmzOALEK+cnnBOTH9c0MJXEaUAw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e8900e87385231e71e44be6655cd3b5b954711b6a15a8afefd0b036603d3b8ab","last_reissued_at":"2026-05-18T00:36:37.594700Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:36:37.594700Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1708.07935","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:36:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"QW+kXnqh6GZiNR8aaj/mONb93/pXXCxuzlY5R9ifGn9ChhCnrCWZ8ZPlWMym010gakfSNsLMyF0aL7iuT7H5Bw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T11:52:53.378461Z"},"content_sha256":"302884b1b55b47401911662c703bbf6d7beed83ea1905e6cb3514a35186cabd7","schema_version":"1.0","event_id":"sha256:302884b1b55b47401911662c703bbf6d7beed83ea1905e6cb3514a35186cabd7"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:5CIA5BZYKIY6OHSEXZTFLTJ3LO","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Effective Blog Pages Extractor for Better UGC Accessing","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Can Wang, Kui Zhao, Xia Hu, Yi Wang","submitted_at":"2017-08-26T05:56:32Z","abstract_excerpt":"Blog is becoming an increasingly popular media for information publishing. Besides the main content, most of blog pages nowadays also contain noisy information such as advertisements etc. Removing these unrelated elements can improves user experience, but also can better adapt the content to various devices such as mobile phones. Though template-based extractors are highly accurate, they may incur expensive cost in that a large number of template need to be developed and they will fail once the template is updated. To address these issues, we present a novel template-independent content extrac"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.07935","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:36:37Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"8gNoAbM8mIu6oU5f6gtifqTnFvmHAYYgZNItKslVq9Qs0pUtvPliVHbNPEJMjKwyrn2fNArpBU6T6NLPJmqLCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-10T11:52:53.379139Z"},"content_sha256":"ffde73b75bd0da871ebedcdcb69337144f3643b39e885d50a570ec6618e7c557","schema_version":"1.0","event_id":"sha256:ffde73b75bd0da871ebedcdcb69337144f3643b39e885d50a570ec6618e7c557"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO/bundle.json","state_url":"https://pith.science/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-10T11:52:53Z","links":{"resolver":"https://pith.science/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO","bundle":"https://pith.science/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO/bundle.json","state":"https://pith.science/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO/state.json","well_known_bundle":"https://pith.science/.well-known/pith/5CIA5BZYKIY6OHSEXZTFLTJ3LO/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:5CIA5BZYKIY6OHSEXZTFLTJ3LO","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"0fa5bc2aecc8a32dc6b82153ceb2b4ea7a4e4838eaf39b9c74c69b9073f4f788","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2017-08-26T05:56:32Z","title_canon_sha256":"59a5193d10a0dc9f2ddfd09487114a702b628eb9ebb9900b4c97d51136cb4445"},"schema_version":"1.0","source":{"id":"1708.07935","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1708.07935","created_at":"2026-05-18T00:36:37Z"},{"alias_kind":"arxiv_version","alias_value":"1708.07935v1","created_at":"2026-05-18T00:36:37Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.07935","created_at":"2026-05-18T00:36:37Z"},{"alias_kind":"pith_short_12","alias_value":"5CIA5BZYKIY6","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_16","alias_value":"5CIA5BZYKIY6OHSE","created_at":"2026-05-18T12:31:00Z"},{"alias_kind":"pith_short_8","alias_value":"5CIA5BZY","created_at":"2026-05-18T12:31:00Z"}],"graph_snapshots":[{"event_id":"sha256:ffde73b75bd0da871ebedcdcb69337144f3643b39e885d50a570ec6618e7c557","target":"graph","created_at":"2026-05-18T00:36:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Blog is becoming an increasingly popular media for information publishing. Besides the main content, most of blog pages nowadays also contain noisy information such as advertisements etc. Removing these unrelated elements can improves user experience, but also can better adapt the content to various devices such as mobile phones. Though template-based extractors are highly accurate, they may incur expensive cost in that a large number of template need to be developed and they will fail once the template is updated. To address these issues, we present a novel template-independent content extrac","authors_text":"Can Wang, Kui Zhao, Xia Hu, Yi Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2017-08-26T05:56:32Z","title":"Effective Blog Pages Extractor for Better UGC Accessing"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.07935","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:302884b1b55b47401911662c703bbf6d7beed83ea1905e6cb3514a35186cabd7","target":"record","created_at":"2026-05-18T00:36:37Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"0fa5bc2aecc8a32dc6b82153ceb2b4ea7a4e4838eaf39b9c74c69b9073f4f788","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2017-08-26T05:56:32Z","title_canon_sha256":"59a5193d10a0dc9f2ddfd09487114a702b628eb9ebb9900b4c97d51136cb4445"},"schema_version":"1.0","source":{"id":"1708.07935","kind":"arxiv","version":1}},"canonical_sha256":"e8900e87385231e71e44be6655cd3b5b954711b6a15a8afefd0b036603d3b8ab","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e8900e87385231e71e44be6655cd3b5b954711b6a15a8afefd0b036603d3b8ab","first_computed_at":"2026-05-18T00:36:37.594700Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:36:37.594700Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"GKFopUKpCLxuHTSdaofrQ74/nZDK0d8yBgiI2cSBslO4Ht+ZkdXY8sDfB/9gmzOALEK+cnnBOTH9c0MJXEaUAw==","signature_status":"signed_v1","signed_at":"2026-05-18T00:36:37.595214Z","signed_message":"canonical_sha256_bytes"},"source_id":"1708.07935","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:302884b1b55b47401911662c703bbf6d7beed83ea1905e6cb3514a35186cabd7","sha256:ffde73b75bd0da871ebedcdcb69337144f3643b39e885d50a570ec6618e7c557"],"state_sha256":"4c85d9d37005c8d8ae2d6b8e625a11342fa7394deb793256487478b3e16f4e5c"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"VrF6V+C10ZNsnVjK29caW1FVbbskFBDBwt+RkjVu+S+d9tjobIATW6XCyhjw1aqbQibvrQSH4h6DAnv7TzUtBw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-10T11:52:53.383433Z","bundle_sha256":"e977a8b9c976a39d06ae5776a7fa0c209e680bfce7b16e5591b00cc39cf63c56"}}