{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2016:WZ2PCDFJKHDG6X5IOCP5I3BNKI","short_pith_number":"pith:WZ2PCDFJ","schema_version":"1.0","canonical_sha256":"b674f10ca951c66f5fa8709fd46c2d5218e8ffa7e0e486797b0f5f3bd0841613","source":{"kind":"arxiv","id":"1607.04883","version":4},"attestation_state":"computed","paper":{"title":"Statistical Industry Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["q-fin.ST"],"primary_cat":"q-fin.PM","authors_text":"Willie Yu, Zura Kakushadze","submitted_at":"2016-07-17T15:49:24Z","abstract_excerpt":"We give complete algorithms and source code for constructing (multilevel) statistical industry classifications, including methods for fixing the number of clusters at each level (and the number of levels). Under the hood there are clustering algorithms (e.g., k-means). However, what should we cluster? Correlations? Returns? The answer turns out to be neither and our backtests suggest that these details make a sizable difference. We also give an algorithm and source code for building \"hybrid\" industry classifications by improving off-the-shelf \"fundamental\" industry classifications by applying "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1607.04883","kind":"arxiv","version":4},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-fin.PM","submitted_at":"2016-07-17T15:49:24Z","cross_cats_sorted":["q-fin.ST"],"title_canon_sha256":"85acf31f2bd8e49768aa36609e8c90d75c0dc030228a8428651f8e89d43fc92f","abstract_canon_sha256":"c957a16c81623fc27a9e5001323a615c7aa1978c7547b5368ca49a36431f37af"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:57:15.979823Z","signature_b64":"j7MmYqHju+rjioZytr1LnXn5HCpFwWAfOy+fCKyVimrNsICFlCR9Bv8sr5ayC8lVy1Yq+CUHlgnjqgmfPed/DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"b674f10ca951c66f5fa8709fd46c2d5218e8ffa7e0e486797b0f5f3bd0841613","last_reissued_at":"2026-05-17T23:57:15.979434Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:57:15.979434Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Statistical Industry Classification","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["q-fin.ST"],"primary_cat":"q-fin.PM","authors_text":"Willie Yu, Zura Kakushadze","submitted_at":"2016-07-17T15:49:24Z","abstract_excerpt":"We give complete algorithms and source code for constructing (multilevel) statistical industry classifications, including methods for fixing the number of clusters at each level (and the number of levels). Under the hood there are clustering algorithms (e.g., k-means). However, what should we cluster? Correlations? Returns? The answer turns out to be neither and our backtests suggest that these details make a sizable difference. We also give an algorithm and source code for building \"hybrid\" industry classifications by improving off-the-shelf \"fundamental\" industry classifications by applying "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1607.04883","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1607.04883","created_at":"2026-05-17T23:57:15.979493+00:00"},{"alias_kind":"arxiv_version","alias_value":"1607.04883v4","created_at":"2026-05-17T23:57:15.979493+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1607.04883","created_at":"2026-05-17T23:57:15.979493+00:00"},{"alias_kind":"pith_short_12","alias_value":"WZ2PCDFJKHDG","created_at":"2026-05-18T12:30:51.357362+00:00"},{"alias_kind":"pith_short_16","alias_value":"WZ2PCDFJKHDG6X5I","created_at":"2026-05-18T12:30:51.357362+00:00"},{"alias_kind":"pith_short_8","alias_value":"WZ2PCDFJ","created_at":"2026-05-18T12:30:51.357362+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI","json":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI.json","graph_json":"https://pith.science/api/pith-number/WZ2PCDFJKHDG6X5IOCP5I3BNKI/graph.json","events_json":"https://pith.science/api/pith-number/WZ2PCDFJKHDG6X5IOCP5I3BNKI/events.json","paper":"https://pith.science/paper/WZ2PCDFJ"},"agent_actions":{"view_html":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI","download_json":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI.json","view_paper":"https://pith.science/paper/WZ2PCDFJ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1607.04883&json=true","fetch_graph":"https://pith.science/api/pith-number/WZ2PCDFJKHDG6X5IOCP5I3BNKI/graph.json","fetch_events":"https://pith.science/api/pith-number/WZ2PCDFJKHDG6X5IOCP5I3BNKI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI/action/storage_attestation","attest_author":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI/action/author_attestation","sign_citation":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI/action/citation_signature","submit_replication":"https://pith.science/pith/WZ2PCDFJKHDG6X5IOCP5I3BNKI/action/replication_record"}},"created_at":"2026-05-17T23:57:15.979493+00:00","updated_at":"2026-05-17T23:57:15.979493+00:00"}