{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:34GEZP24JTOETTFQDO65LINBK4","short_pith_number":"pith:34GEZP24","schema_version":"1.0","canonical_sha256":"df0c4cbf5c4cdc49ccb01bbdd5a1a1572274c6bd1f59b057e455b192f268ddf9","source":{"kind":"arxiv","id":"1406.5181","version":2},"attestation_state":"computed","paper":{"title":"Zipf's law holds for phrases, not words","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["physics.soc-ph"],"primary_cat":"cs.CL","authors_text":"Christopher M. Danforth, Eric Clark, Jake Ryland Williams, James P. Bagrow, Paul R. Lessard, Peter Sheridan Dodds, Suma Desu","submitted_at":"2014-06-19T20:00:05Z","abstract_excerpt":"With Zipf's law being originally and most famously observed for word frequency, it is surprisingly limited in its applicability to human language, holding over no more than three to four orders of magnitude before hitting a clear break in scaling. Here, building on the simple observation that phrases of one or more words comprise the most coherent units of meaning in language, we show empirically that Zipf's law for phrases extends over as many as nine orders of rank magnitude. In doing so, we develop a principled and scalable statistical mechanical method of random text partitioning, which op"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1406.5181","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2014-06-19T20:00:05Z","cross_cats_sorted":["physics.soc-ph"],"title_canon_sha256":"979a34abea1f323a89302b7be0132f741f8b6a02c1f10ab3f0607eac68f6b1d4","abstract_canon_sha256":"1c74e573f8e3b5156738dc1af665bc2bb6633067d36d4b24ffe423365d3df946"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T02:25:40.321052Z","signature_b64":"prA1h/6r4biNFKjgoZee91azZtD4Wv6vhfh2ITKETrkQOIdwxKAr13vtb19SrtDvuPC6XLwmSv2B6CfZD8NfAQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"df0c4cbf5c4cdc49ccb01bbdd5a1a1572274c6bd1f59b057e455b192f268ddf9","last_reissued_at":"2026-05-18T02:25:40.320605Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T02:25:40.320605Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Zipf's law holds for phrases, not words","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["physics.soc-ph"],"primary_cat":"cs.CL","authors_text":"Christopher M. Danforth, Eric Clark, Jake Ryland Williams, James P. Bagrow, Paul R. Lessard, Peter Sheridan Dodds, Suma Desu","submitted_at":"2014-06-19T20:00:05Z","abstract_excerpt":"With Zipf's law being originally and most famously observed for word frequency, it is surprisingly limited in its applicability to human language, holding over no more than three to four orders of magnitude before hitting a clear break in scaling. Here, building on the simple observation that phrases of one or more words comprise the most coherent units of meaning in language, we show empirically that Zipf's law for phrases extends over as many as nine orders of rank magnitude. In doing so, we develop a principled and scalable statistical mechanical method of random text partitioning, which op"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1406.5181","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1406.5181","created_at":"2026-05-18T02:25:40.320669+00:00"},{"alias_kind":"arxiv_version","alias_value":"1406.5181v2","created_at":"2026-05-18T02:25:40.320669+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1406.5181","created_at":"2026-05-18T02:25:40.320669+00:00"},{"alias_kind":"pith_short_12","alias_value":"34GEZP24JTOE","created_at":"2026-05-18T12:28:11.866339+00:00"},{"alias_kind":"pith_short_16","alias_value":"34GEZP24JTOETTFQ","created_at":"2026-05-18T12:28:11.866339+00:00"},{"alias_kind":"pith_short_8","alias_value":"34GEZP24","created_at":"2026-05-18T12:28:11.866339+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4","json":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4.json","graph_json":"https://pith.science/api/pith-number/34GEZP24JTOETTFQDO65LINBK4/graph.json","events_json":"https://pith.science/api/pith-number/34GEZP24JTOETTFQDO65LINBK4/events.json","paper":"https://pith.science/paper/34GEZP24"},"agent_actions":{"view_html":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4","download_json":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4.json","view_paper":"https://pith.science/paper/34GEZP24","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1406.5181&json=true","fetch_graph":"https://pith.science/api/pith-number/34GEZP24JTOETTFQDO65LINBK4/graph.json","fetch_events":"https://pith.science/api/pith-number/34GEZP24JTOETTFQDO65LINBK4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4/action/storage_attestation","attest_author":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4/action/author_attestation","sign_citation":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4/action/citation_signature","submit_replication":"https://pith.science/pith/34GEZP24JTOETTFQDO65LINBK4/action/replication_record"}},"created_at":"2026-05-18T02:25:40.320669+00:00","updated_at":"2026-05-18T02:25:40.320669+00:00"}