{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2015:CBEUSRA7FAU2BJYQV6CRXQ35WD","short_pith_number":"pith:CBEUSRA7","canonical_record":{"source":{"id":"1507.08396","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-30T06:44:37Z","cross_cats_sorted":["cs.IR","cs.LG","stat.ML"],"title_canon_sha256":"9d66616dd453e854dc7e531c4755737045724b268365905f1af8ffb80ed489d4","abstract_canon_sha256":"76bc2ffe3ec18dbf582af0e1332f01133285be17c9a5aaa631e0648d808ccfdf"},"schema_version":"1.0"},"canonical_sha256":"104949441f2829a0a710af851bc37db0dda00842143c1260ecf6b4589af7f568","source":{"kind":"arxiv","id":"1507.08396","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1507.08396","created_at":"2026-05-18T01:36:06Z"},{"alias_kind":"arxiv_version","alias_value":"1507.08396v1","created_at":"2026-05-18T01:36:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.08396","created_at":"2026-05-18T01:36:06Z"},{"alias_kind":"pith_short_12","alias_value":"CBEUSRA7FAU2","created_at":"2026-05-18T12:29:14Z"},{"alias_kind":"pith_short_16","alias_value":"CBEUSRA7FAU2BJYQ","created_at":"2026-05-18T12:29:14Z"},{"alias_kind":"pith_short_8","alias_value":"CBEUSRA7","created_at":"2026-05-18T12:29:14Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2015:CBEUSRA7FAU2BJYQV6CRXQ35WD","target":"record","payload":{"canonical_record":{"source":{"id":"1507.08396","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-30T06:44:37Z","cross_cats_sorted":["cs.IR","cs.LG","stat.ML"],"title_canon_sha256":"9d66616dd453e854dc7e531c4755737045724b268365905f1af8ffb80ed489d4","abstract_canon_sha256":"76bc2ffe3ec18dbf582af0e1332f01133285be17c9a5aaa631e0648d808ccfdf"},"schema_version":"1.0"},"canonical_sha256":"104949441f2829a0a710af851bc37db0dda00842143c1260ecf6b4589af7f568","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:36:06.583020Z","signature_b64":"VvmQMUitat6/2Gn83dlc4WE3WD4b+jo9J731EWaGuCOWKWm6k9Omfca809i+h+kve/gWT+OeNtfmODvIDV0QDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"104949441f2829a0a710af851bc37db0dda00842143c1260ecf6b4589af7f568","last_reissued_at":"2026-05-18T01:36:06.582424Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:36:06.582424Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1507.08396","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:36:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"5g5442j3gixG2xJ+DssoMmYvLmiR5CgeVovEtKF4w+Jmapr885Kv6sx6zT2qSHW9ROU/4QGwxTe/+Ln2JI9OCg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T12:39:57.890326Z"},"content_sha256":"ebde74e57d5904915aaf96c51fdfe8b31e1ad230db550cf6e4f6a2c1e89ae3c9","schema_version":"1.0","event_id":"sha256:ebde74e57d5904915aaf96c51fdfe8b31e1ad230db550cf6e4f6a2c1e89ae3c9"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2015:CBEUSRA7FAU2BJYQV6CRXQ35WD","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Tag-Weighted Topic Model For Large-scale Semi-Structured Documents","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR","cs.LG","stat.ML"],"primary_cat":"cs.CL","authors_text":"Guan Huang, Jiefei Li, Rong Pan, Ruiyang Tan, Shuangyin Li","submitted_at":"2015-07-30T06:44:37Z","abstract_excerpt":"To date, there have been massive Semi-Structured Documents (SSDs) during the evolution of the Internet. These SSDs contain both unstructured features (e.g., plain text) and metadata (e.g., tags). Most previous works focused on modeling the unstructured text, and recently, some other methods have been proposed to model the unstructured text with specific tags. To build a general model for SSDs remains an important problem in terms of both model fitness and efficiency. We propose a novel method to model the SSDs by a so-called Tag-Weighted Topic Model (TWTM). TWTM is a framework that leverages b"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.08396","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T01:36:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WykfXU8b9kZLUds7AseCqFym6bVAU2cw1aU606FhCVv/ujrbNoFjYY0t+ZImrHEZbP3TlOPSZTvWXtsy2z0nAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T12:39:57.890677Z"},"content_sha256":"c40a42bde364c03340b50e28bbc08c51fd3887c7d23ce80dc6de6a9a4252294d","schema_version":"1.0","event_id":"sha256:c40a42bde364c03340b50e28bbc08c51fd3887c7d23ce80dc6de6a9a4252294d"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD/bundle.json","state_url":"https://pith.science/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T12:39:57Z","links":{"resolver":"https://pith.science/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD","bundle":"https://pith.science/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD/bundle.json","state":"https://pith.science/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CBEUSRA7FAU2BJYQV6CRXQ35WD/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2015:CBEUSRA7FAU2BJYQV6CRXQ35WD","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"76bc2ffe3ec18dbf582af0e1332f01133285be17c9a5aaa631e0648d808ccfdf","cross_cats_sorted":["cs.IR","cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-30T06:44:37Z","title_canon_sha256":"9d66616dd453e854dc7e531c4755737045724b268365905f1af8ffb80ed489d4"},"schema_version":"1.0","source":{"id":"1507.08396","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1507.08396","created_at":"2026-05-18T01:36:06Z"},{"alias_kind":"arxiv_version","alias_value":"1507.08396v1","created_at":"2026-05-18T01:36:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1507.08396","created_at":"2026-05-18T01:36:06Z"},{"alias_kind":"pith_short_12","alias_value":"CBEUSRA7FAU2","created_at":"2026-05-18T12:29:14Z"},{"alias_kind":"pith_short_16","alias_value":"CBEUSRA7FAU2BJYQ","created_at":"2026-05-18T12:29:14Z"},{"alias_kind":"pith_short_8","alias_value":"CBEUSRA7","created_at":"2026-05-18T12:29:14Z"}],"graph_snapshots":[{"event_id":"sha256:c40a42bde364c03340b50e28bbc08c51fd3887c7d23ce80dc6de6a9a4252294d","target":"graph","created_at":"2026-05-18T01:36:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"To date, there have been massive Semi-Structured Documents (SSDs) during the evolution of the Internet. These SSDs contain both unstructured features (e.g., plain text) and metadata (e.g., tags). Most previous works focused on modeling the unstructured text, and recently, some other methods have been proposed to model the unstructured text with specific tags. To build a general model for SSDs remains an important problem in terms of both model fitness and efficiency. We propose a novel method to model the SSDs by a so-called Tag-Weighted Topic Model (TWTM). TWTM is a framework that leverages b","authors_text":"Guan Huang, Jiefei Li, Rong Pan, Ruiyang Tan, Shuangyin Li","cross_cats":["cs.IR","cs.LG","stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-30T06:44:37Z","title":"Tag-Weighted Topic Model For Large-scale Semi-Structured Documents"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1507.08396","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:ebde74e57d5904915aaf96c51fdfe8b31e1ad230db550cf6e4f6a2c1e89ae3c9","target":"record","created_at":"2026-05-18T01:36:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"76bc2ffe3ec18dbf582af0e1332f01133285be17c9a5aaa631e0648d808ccfdf","cross_cats_sorted":["cs.IR","cs.LG","stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2015-07-30T06:44:37Z","title_canon_sha256":"9d66616dd453e854dc7e531c4755737045724b268365905f1af8ffb80ed489d4"},"schema_version":"1.0","source":{"id":"1507.08396","kind":"arxiv","version":1}},"canonical_sha256":"104949441f2829a0a710af851bc37db0dda00842143c1260ecf6b4589af7f568","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"104949441f2829a0a710af851bc37db0dda00842143c1260ecf6b4589af7f568","first_computed_at":"2026-05-18T01:36:06.582424Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T01:36:06.582424Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"VvmQMUitat6/2Gn83dlc4WE3WD4b+jo9J731EWaGuCOWKWm6k9Omfca809i+h+kve/gWT+OeNtfmODvIDV0QDw==","signature_status":"signed_v1","signed_at":"2026-05-18T01:36:06.583020Z","signed_message":"canonical_sha256_bytes"},"source_id":"1507.08396","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:ebde74e57d5904915aaf96c51fdfe8b31e1ad230db550cf6e4f6a2c1e89ae3c9","sha256:c40a42bde364c03340b50e28bbc08c51fd3887c7d23ce80dc6de6a9a4252294d"],"state_sha256":"939fecd6006c03565f2e1deb5815c789db96e1d1d1d0bf801d89799a3892cf1d"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LKB7w6OZzJ3jaayb3C9s4afiwT84AkXKJa/ZzHGa/lBXL74ch98/XYNYKzhnVpRc5xXx4L1te6mhWfCixbbaDQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T12:39:57.892744Z","bundle_sha256":"ce4fc1da2b5b376f2be54f60de866f3bfc045824da6e0c911b3c8392f9a20583"}}