{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2014:SNL7PSZ55FUD7IDX7IPY2RYUDY","short_pith_number":"pith:SNL7PSZ5","schema_version":"1.0","canonical_sha256":"9357f7cb3de9683fa077fa1f8d47141e24e399b44dd7541178f4a49c4a41fafa","source":{"kind":"arxiv","id":"1407.8322","version":2},"attestation_state":"computed","paper":{"title":"Zipf's law for word frequencies: word forms versus lemmas in long texts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","physics.data-an"],"primary_cat":"physics.soc-ph","authors_text":"Alvaro Corral, Gemma Boleda, Ramon Ferrer-i-Cancho","submitted_at":"2014-07-31T09:02:15Z","abstract_excerpt":"Zipf's law is a fundamental paradigm in the statistics of written and spoken natural language as well as in other communication systems. We raise the question of the elementary units for which Zipf's law should hold in the most natural way, studying its validity for plain word forms and for the corresponding lemma forms. In order to have as homogeneous sources as possible, we analyze some of the longest literary texts ever written, comprising four different languages, with different levels of morphological complexity. In all cases Zipf's law is fulfilled, in the sense that a power-law distribu"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1407.8322","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"physics.soc-ph","submitted_at":"2014-07-31T09:02:15Z","cross_cats_sorted":["cs.CL","physics.data-an"],"title_canon_sha256":"988e669f36c76b9456bfaa85d7695585eb329b444187031dfc396dbd210ef71a","abstract_canon_sha256":"157f47d67c0e0cc157419ab332ebe907edb9cde1ad961263404a62629f9a6cad"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:37:02.235709Z","signature_b64":"KbM31dDmAngtWf0XTAQ3dbvrJA8xk5cjWGxVxMlA4CjLdzPjifUnAiJyxsXVlypCMl94kCNGqWSTlzo32A5UCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9357f7cb3de9683fa077fa1f8d47141e24e399b44dd7541178f4a49c4a41fafa","last_reissued_at":"2026-05-18T01:37:02.235023Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:37:02.235023Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Zipf's law for word frequencies: word forms versus lemmas in long texts","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CL","physics.data-an"],"primary_cat":"physics.soc-ph","authors_text":"Alvaro Corral, Gemma Boleda, Ramon Ferrer-i-Cancho","submitted_at":"2014-07-31T09:02:15Z","abstract_excerpt":"Zipf's law is a fundamental paradigm in the statistics of written and spoken natural language as well as in other communication systems. We raise the question of the elementary units for which Zipf's law should hold in the most natural way, studying its validity for plain word forms and for the corresponding lemma forms. In order to have as homogeneous sources as possible, we analyze some of the longest literary texts ever written, comprising four different languages, with different levels of morphological complexity. In all cases Zipf's law is fulfilled, in the sense that a power-law distribu"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1407.8322","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1407.8322","created_at":"2026-05-18T01:37:02.235135+00:00"},{"alias_kind":"arxiv_version","alias_value":"1407.8322v2","created_at":"2026-05-18T01:37:02.235135+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1407.8322","created_at":"2026-05-18T01:37:02.235135+00:00"},{"alias_kind":"pith_short_12","alias_value":"SNL7PSZ55FUD","created_at":"2026-05-18T12:28:49.207871+00:00"},{"alias_kind":"pith_short_16","alias_value":"SNL7PSZ55FUD7IDX","created_at":"2026-05-18T12:28:49.207871+00:00"},{"alias_kind":"pith_short_8","alias_value":"SNL7PSZ5","created_at":"2026-05-18T12:28:49.207871+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY","json":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY.json","graph_json":"https://pith.science/api/pith-number/SNL7PSZ55FUD7IDX7IPY2RYUDY/graph.json","events_json":"https://pith.science/api/pith-number/SNL7PSZ55FUD7IDX7IPY2RYUDY/events.json","paper":"https://pith.science/paper/SNL7PSZ5"},"agent_actions":{"view_html":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY","download_json":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY.json","view_paper":"https://pith.science/paper/SNL7PSZ5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1407.8322&json=true","fetch_graph":"https://pith.science/api/pith-number/SNL7PSZ55FUD7IDX7IPY2RYUDY/graph.json","fetch_events":"https://pith.science/api/pith-number/SNL7PSZ55FUD7IDX7IPY2RYUDY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY/action/storage_attestation","attest_author":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY/action/author_attestation","sign_citation":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY/action/citation_signature","submit_replication":"https://pith.science/pith/SNL7PSZ55FUD7IDX7IPY2RYUDY/action/replication_record"}},"created_at":"2026-05-18T01:37:02.235135+00:00","updated_at":"2026-05-18T01:37:02.235135+00:00"}