{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2ZJ4HTA7GG74SYLRH6UQBGXPVW","short_pith_number":"pith:2ZJ4HTA7","schema_version":"1.0","canonical_sha256":"d653c3cc1f31bfc961713fa9009aefadba8ea5f7eb0498644bd9ab923d5d1dff","source":{"kind":"arxiv","id":"2605.15362","version":1},"attestation_state":"computed","paper":{"title":"Automatic Construction of a Legal Citation Graph from 100 Million Ukrainian Court Decisions: Large-Scale Extraction, Topological Analysis, and Ontology-Driven Clustering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A citation graph from 100 million Ukrainian court decisions encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy.","cross_cats":["cs.DL","cs.IR"],"primary_cat":"cs.CL","authors_text":"Volodymyr Ovcharov","submitted_at":"2026-05-14T19:42:20Z","abstract_excerpt":"Half a billion citation edges extracted from 100.7 million Ukrainian court decisions reveal that judicial citation structure encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy. We construct the first large-scale citation graph from the complete EDRSR registry (99.5 million full texts, 1.1 TB), extracting 502 million citation links across six types via regex on commodity hardware in approximately 5 hours, with precision of 1.00 on a 200-decision validation sample (95% Wilson CI: [0.982, 1.000]).\n  Three principal findings em"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.15362","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T19:42:20Z","cross_cats_sorted":["cs.DL","cs.IR"],"title_canon_sha256":"14998f497ccc87b9050ed3bcebb619066285fb658ac20ebbd43714a39c0b0bfc","abstract_canon_sha256":"0ebcc47cbe9315bea6ae427e74a25f4447536571620d4195715310544d4c8bb6"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:54.460916Z","signature_b64":"GwUsFEZEQdEV/MCNxfYQiULHwUSZUwQgNrcFq65wwOP2b1h2IXk0RuPzDQbjU7W0RxtC6YNpuGaj7ggtbdt5BQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d653c3cc1f31bfc961713fa9009aefadba8ea5f7eb0498644bd9ab923d5d1dff","last_reissued_at":"2026-05-20T00:00:54.460168Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:54.460168Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Automatic Construction of a Legal Citation Graph from 100 Million Ukrainian Court Decisions: Large-Scale Extraction, Topological Analysis, and Ontology-Driven Clustering","license":"http://creativecommons.org/licenses/by/4.0/","headline":"A citation graph from 100 million Ukrainian court decisions encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy.","cross_cats":["cs.DL","cs.IR"],"primary_cat":"cs.CL","authors_text":"Volodymyr Ovcharov","submitted_at":"2026-05-14T19:42:20Z","abstract_excerpt":"Half a billion citation edges extracted from 100.7 million Ukrainian court decisions reveal that judicial citation structure encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy. We construct the first large-scale citation graph from the complete EDRSR registry (99.5 million full texts, 1.1 TB), extracting 502 million citation links across six types via regex on commodity hardware in approximately 5 hours, with precision of 1.00 on a 200-decision validation sample (95% Wilson CI: [0.982, 1.000]).\n  Three principal findings em"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Half a billion citation edges extracted from 100.7 million Ukrainian court decisions reveal that judicial citation structure encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"Regex patterns applied to full-text decisions accurately identify all six types of citation links at scale, with the 200-decision validation sample being representative of the full 99.5 million documents.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A citation graph built from the complete Ukrainian court registry recovers legal domain boundaries via community detection and predicts legislative importance with AUC 0.9984.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"A citation graph from 100 million Ukrainian court decisions encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"c4d1444cc0a336c450d9235d73f8151d18ac8bd5db2ce000b5bef28025de2d81"},"source":{"id":"2605.15362","kind":"arxiv","version":1},"verdict":{"id":"4217eaf6-0503-4512-99bb-a14ccb290d33","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T15:34:09.322183Z","strongest_claim":"Half a billion citation edges extracted from 100.7 million Ukrainian court decisions reveal that judicial citation structure encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy.","one_line_summary":"A citation graph built from the complete Ukrainian court registry recovers legal domain boundaries via community detection and predicts legislative importance with AUC 0.9984.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"Regex patterns applied to full-text decisions accurately identify all six types of citation links at scale, with the 200-decision validation sample being representative of the full 99.5 million documents.","pith_extraction_headline":"A citation graph from 100 million Ukrainian court decisions encodes legal domain boundaries without supervision and predicts future legislative importance with near-perfect accuracy."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.15362/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_title_agreement","ran_at":"2026-05-19T16:01:18.075009Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_compliance","ran_at":"2026-05-19T15:40:53.184300Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T14:21:54.193610Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T13:33:22.742031Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"392e88db57049d8eba1542e06f094f0c75dd1ca3d14564a9bfef8dc9a724629f"},"references":{"count":22,"sample":[{"doi":"10.1088/1742-5468/2008/10/p10008","year":2008,"title":"Fast Unfolding of Communities in Large Networks","work_id":"16c547d5-bd24-4225-a6d1-57765592f2cf","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2018,"title":"Bommarito, Daniel Martin Katz, and Eric M","work_id":"1c9c9c02-159f-46f5-a2ac-aa6cc1f0e5fa","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.18653/v1/2020.findings-emnlp.261","year":2020,"title":"LEGAL-BERT: The muppets straight out of law school","work_id":"b2953ebf-56aa-45a2-b0b5-c31eaec47861","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"10.1137/070710111","year":2009,"title":"Power-law distributions in empirical data","work_id":"68d3d1a0-955d-4d3b-bc85-568523f261ff","ref_index":4,"cited_arxiv_id":"0706.1062","is_internal_anchor":true},{"doi":"","year":null,"title":"Measuring law over time: A network analytical framework with an application to statutes and regulations in the united states and germany.Frontiers in Physics, 9:658463,","work_id":"34916eee-4642-4713-8102-ced254d20c9b","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":22,"snapshot_sha256":"a947cec5307df0a85302015a976425835304e9bf4e808c8bd7ab1dc028222d2c","internal_anchors":1},"formal_canon":{"evidence_count":2,"snapshot_sha256":"346f8150c585c328217d456f12fb506fc7669cd76b21e3481e02908c32807c3a"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.15362","created_at":"2026-05-20T00:00:54.460266+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.15362v1","created_at":"2026-05-20T00:00:54.460266+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.15362","created_at":"2026-05-20T00:00:54.460266+00:00"},{"alias_kind":"pith_short_12","alias_value":"2ZJ4HTA7GG74","created_at":"2026-05-20T00:00:54.460266+00:00"},{"alias_kind":"pith_short_16","alias_value":"2ZJ4HTA7GG74SYLR","created_at":"2026-05-20T00:00:54.460266+00:00"},{"alias_kind":"pith_short_8","alias_value":"2ZJ4HTA7","created_at":"2026-05-20T00:00:54.460266+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW","json":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW.json","graph_json":"https://pith.science/api/pith-number/2ZJ4HTA7GG74SYLRH6UQBGXPVW/graph.json","events_json":"https://pith.science/api/pith-number/2ZJ4HTA7GG74SYLRH6UQBGXPVW/events.json","paper":"https://pith.science/paper/2ZJ4HTA7"},"agent_actions":{"view_html":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW","download_json":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW.json","view_paper":"https://pith.science/paper/2ZJ4HTA7","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.15362&json=true","fetch_graph":"https://pith.science/api/pith-number/2ZJ4HTA7GG74SYLRH6UQBGXPVW/graph.json","fetch_events":"https://pith.science/api/pith-number/2ZJ4HTA7GG74SYLRH6UQBGXPVW/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW/action/storage_attestation","attest_author":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW/action/author_attestation","sign_citation":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW/action/citation_signature","submit_replication":"https://pith.science/pith/2ZJ4HTA7GG74SYLRH6UQBGXPVW/action/replication_record"}},"created_at":"2026-05-20T00:00:54.460266+00:00","updated_at":"2026-05-20T00:00:54.460266+00:00"}