{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:533CLD6SXIAY53VSBGZFEHGLSB","short_pith_number":"pith:533CLD6S","schema_version":"1.0","canonical_sha256":"eef6258fd2ba018eeeb209b2521ccb905a9d82216cd471c33286b98fa4be834a","source":{"kind":"arxiv","id":"1704.00135","version":2},"attestation_state":"computed","paper":{"title":"Topic modeling of public repositories at scale using names in source code","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.PL","authors_text":"Eiso Kant, Vadim Markovtsev","submitted_at":"2017-04-01T08:16:20Z","abstract_excerpt":"Programming languages themselves have a limited number of reserved keywords and character based tokens that define the language specification. However, programmers have a rich use of natural language within their code through comments, text literals and naming entities. The programmer defined names that can be found in source code are a rich source of information to build a high level understanding of the project. The goal of this paper is to apply topic modeling to names used in over 13.6 million repositories and perceive the inferred topics. One of the problems in such a study is the occurre"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1704.00135","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.PL","submitted_at":"2017-04-01T08:16:20Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"b551c4ef9d51e49242979193db00e716fb1845c492d47c0377b4f43c9fc635e9","abstract_canon_sha256":"c5764b9c0a8db0da0c8b4e9ad4c5a3d258ec9c1f79b1a6af56d699b833de4eec"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:44:08.716210Z","signature_b64":"FX5ziKjVVQdkgxzdXZY4e4qsvkDB38p5cH8P0aP7VFExIxOXhPwkLxDRnemNPKogQZk21XK47zTxXiqGLwzYCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"eef6258fd2ba018eeeb209b2521ccb905a9d82216cd471c33286b98fa4be834a","last_reissued_at":"2026-05-18T00:44:08.715782Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:44:08.715782Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Topic modeling of public repositories at scale using names in source code","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.PL","authors_text":"Eiso Kant, Vadim Markovtsev","submitted_at":"2017-04-01T08:16:20Z","abstract_excerpt":"Programming languages themselves have a limited number of reserved keywords and character based tokens that define the language specification. However, programmers have a rich use of natural language within their code through comments, text literals and naming entities. The programmer defined names that can be found in source code are a rich source of information to build a high level understanding of the project. The goal of this paper is to apply topic modeling to names used in over 13.6 million repositories and perceive the inferred topics. One of the problems in such a study is the occurre"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1704.00135","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1704.00135","created_at":"2026-05-18T00:44:08.715849+00:00"},{"alias_kind":"arxiv_version","alias_value":"1704.00135v2","created_at":"2026-05-18T00:44:08.715849+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1704.00135","created_at":"2026-05-18T00:44:08.715849+00:00"},{"alias_kind":"pith_short_12","alias_value":"533CLD6SXIAY","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_16","alias_value":"533CLD6SXIAY53VS","created_at":"2026-05-18T12:31:00.734936+00:00"},{"alias_kind":"pith_short_8","alias_value":"533CLD6S","created_at":"2026-05-18T12:31:00.734936+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB","json":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB.json","graph_json":"https://pith.science/api/pith-number/533CLD6SXIAY53VSBGZFEHGLSB/graph.json","events_json":"https://pith.science/api/pith-number/533CLD6SXIAY53VSBGZFEHGLSB/events.json","paper":"https://pith.science/paper/533CLD6S"},"agent_actions":{"view_html":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB","download_json":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB.json","view_paper":"https://pith.science/paper/533CLD6S","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1704.00135&json=true","fetch_graph":"https://pith.science/api/pith-number/533CLD6SXIAY53VSBGZFEHGLSB/graph.json","fetch_events":"https://pith.science/api/pith-number/533CLD6SXIAY53VSBGZFEHGLSB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB/action/storage_attestation","attest_author":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB/action/author_attestation","sign_citation":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB/action/citation_signature","submit_replication":"https://pith.science/pith/533CLD6SXIAY53VSBGZFEHGLSB/action/replication_record"}},"created_at":"2026-05-18T00:44:08.715849+00:00","updated_at":"2026-05-18T00:44:08.715849+00:00"}