{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:7I43PKGGLOCROQZ3AAP56CEKUZ","short_pith_number":"pith:7I43PKGG","schema_version":"1.0","canonical_sha256":"fa39b7a8c65b8517433b001fdf088aa67bb146d695868a964bfcce5491e60882","source":{"kind":"arxiv","id":"1608.00104","version":1},"attestation_state":"computed","paper":{"title":"World Knowledge as Indirect Supervision for Document Clustering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR"],"primary_cat":"cs.LG","authors_text":"Chenguang Wang, Dan Roth, Jiawei Han, Ming Zhang, Yangqiu Song","submitted_at":"2016-07-30T11:53:04Z","abstract_excerpt":"One of the key obstacles in making learning protocols realistic in applications is the need to supervise them, a costly process that often requires hiring domain experts. We consider the framework to use the world knowledge as indirect supervision. World knowledge is general-purpose knowledge, which is not designed for any specific domain. Then the key challenges are how to adapt the world knowledge to domains and how to represent it for learning. In this paper, we provide an example of using world knowledge for domain dependent document clustering. We provide three ways to specify the world k"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1608.00104","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2016-07-30T11:53:04Z","cross_cats_sorted":["cs.CL","cs.IR"],"title_canon_sha256":"94650b2c491227137d0c60d7dab1d8b43b5a7c73143265b38e984917191be631","abstract_canon_sha256":"a9cab3ab1e1918eaa3bbb8dccab1e8d43a497419d5f85fe58c90fe02768c8968"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:10:13.814151Z","signature_b64":"pshJqnidsc4npztrGFmXFt16JjSDATHjz24lwMTo//c0r7E9c2WYmH1oWMS8QxszU8HxZSitVXpkeV78qwHHBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"fa39b7a8c65b8517433b001fdf088aa67bb146d695868a964bfcce5491e60882","last_reissued_at":"2026-05-18T01:10:13.813621Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:10:13.813621Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"World Knowledge as Indirect Supervision for Document Clustering","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","cs.IR"],"primary_cat":"cs.LG","authors_text":"Chenguang Wang, Dan Roth, Jiawei Han, Ming Zhang, Yangqiu Song","submitted_at":"2016-07-30T11:53:04Z","abstract_excerpt":"One of the key obstacles in making learning protocols realistic in applications is the need to supervise them, a costly process that often requires hiring domain experts. We consider the framework to use the world knowledge as indirect supervision. World knowledge is general-purpose knowledge, which is not designed for any specific domain. Then the key challenges are how to adapt the world knowledge to domains and how to represent it for learning. In this paper, we provide an example of using world knowledge for domain dependent document clustering. We provide three ways to specify the world k"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1608.00104","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1608.00104","created_at":"2026-05-18T01:10:13.813722+00:00"},{"alias_kind":"arxiv_version","alias_value":"1608.00104v1","created_at":"2026-05-18T01:10:13.813722+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1608.00104","created_at":"2026-05-18T01:10:13.813722+00:00"},{"alias_kind":"pith_short_12","alias_value":"7I43PKGGLOCR","created_at":"2026-05-18T12:30:04.600751+00:00"},{"alias_kind":"pith_short_16","alias_value":"7I43PKGGLOCROQZ3","created_at":"2026-05-18T12:30:04.600751+00:00"},{"alias_kind":"pith_short_8","alias_value":"7I43PKGG","created_at":"2026-05-18T12:30:04.600751+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ","json":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ.json","graph_json":"https://pith.science/api/pith-number/7I43PKGGLOCROQZ3AAP56CEKUZ/graph.json","events_json":"https://pith.science/api/pith-number/7I43PKGGLOCROQZ3AAP56CEKUZ/events.json","paper":"https://pith.science/paper/7I43PKGG"},"agent_actions":{"view_html":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ","download_json":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ.json","view_paper":"https://pith.science/paper/7I43PKGG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1608.00104&json=true","fetch_graph":"https://pith.science/api/pith-number/7I43PKGGLOCROQZ3AAP56CEKUZ/graph.json","fetch_events":"https://pith.science/api/pith-number/7I43PKGGLOCROQZ3AAP56CEKUZ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ/action/storage_attestation","attest_author":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ/action/author_attestation","sign_citation":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ/action/citation_signature","submit_replication":"https://pith.science/pith/7I43PKGGLOCROQZ3AAP56CEKUZ/action/replication_record"}},"created_at":"2026-05-18T01:10:13.813722+00:00","updated_at":"2026-05-18T01:10:13.813722+00:00"}