{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:OJDFMGCG2GN6W32VJ5KRBK23W4","short_pith_number":"pith:OJDFMGCG","schema_version":"1.0","canonical_sha256":"7246561846d19beb6f554f5510ab5bb7054cd859ce469eb33b3d8bec0ed3aaa2","source":{"kind":"arxiv","id":"2603.05207","version":2},"attestation_state":"computed","paper":{"title":"Core-based Hierarchies for Efficient GraphRAG","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.IR","authors_text":"Ahmet Erdem Sar{\\i}y\\\"uce, Jakir Hossain","submitted_at":"2026-03-05T14:17:30Z","abstract_excerpt":"Retrieval-Augmented Generation (RAG) enhances large language models by incorporating external knowledge. However, existing vector-based methods often fail on global sensemaking tasks that require reasoning across many documents. GraphRAG addresses this by organizing documents into a knowledge graph with hierarchical communities that can be recursively summarized. Current GraphRAG approaches rely on Leiden clustering for community detection, but we prove that on sparse knowledge graphs, where average degree is constant and most nodes have low degree, modularity optimization admits exponentially"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.05207","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2026-03-05T14:17:30Z","cross_cats_sorted":["cs.CL"],"title_canon_sha256":"85489dc90f293693e263518e31411278b0051f6712ce672ceac0aa9e27ffcde6","abstract_canon_sha256":"53873239577adc1a714a85b8fde02a9e41ef7427b051f293f82f3cabcb5b4c53"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-03T01:05:11.427799Z","signature_b64":"uq+hg21yJTmnvidVoudq7CEyRs+lGXRHgN40PrBh8m9HLaNhTuj4wJ/NOT6QTrHw3YOxwHil7VinxwkCi8wxCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7246561846d19beb6f554f5510ab5bb7054cd859ce469eb33b3d8bec0ed3aaa2","last_reissued_at":"2026-06-03T01:05:11.427293Z","signature_status":"signed_v1","first_computed_at":"2026-06-03T01:05:11.427293Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Core-based Hierarchies for Efficient GraphRAG","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.CL"],"primary_cat":"cs.IR","authors_text":"Ahmet Erdem Sar{\\i}y\\\"uce, Jakir Hossain","submitted_at":"2026-03-05T14:17:30Z","abstract_excerpt":"Retrieval-Augmented Generation (RAG) enhances large language models by incorporating external knowledge. However, existing vector-based methods often fail on global sensemaking tasks that require reasoning across many documents. GraphRAG addresses this by organizing documents into a knowledge graph with hierarchical communities that can be recursively summarized. Current GraphRAG approaches rely on Leiden clustering for community detection, but we prove that on sparse knowledge graphs, where average degree is constant and most nodes have low degree, modularity optimization admits exponentially"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.05207","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.05207/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.05207","created_at":"2026-06-03T01:05:11.427367+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.05207v2","created_at":"2026-06-03T01:05:11.427367+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.05207","created_at":"2026-06-03T01:05:11.427367+00:00"},{"alias_kind":"pith_short_12","alias_value":"OJDFMGCG2GN6","created_at":"2026-06-03T01:05:11.427367+00:00"},{"alias_kind":"pith_short_16","alias_value":"OJDFMGCG2GN6W32V","created_at":"2026-06-03T01:05:11.427367+00:00"},{"alias_kind":"pith_short_8","alias_value":"OJDFMGCG","created_at":"2026-06-03T01:05:11.427367+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":2,"internal_anchor_count":2,"sample":[{"citing_arxiv_id":"2604.04948","citing_title":"From PDF to RAG-Ready: Evaluating Document Conversion Frameworks for Domain-Specific Question Answering","ref_index":25,"is_internal_anchor":true},{"citing_arxiv_id":"2604.07863","citing_title":"Task-Adaptive Retrieval over Agentic Multi-Modal Web Histories via Learned Graph Memory","ref_index":11,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4","json":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4.json","graph_json":"https://pith.science/api/pith-number/OJDFMGCG2GN6W32VJ5KRBK23W4/graph.json","events_json":"https://pith.science/api/pith-number/OJDFMGCG2GN6W32VJ5KRBK23W4/events.json","paper":"https://pith.science/paper/OJDFMGCG"},"agent_actions":{"view_html":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4","download_json":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4.json","view_paper":"https://pith.science/paper/OJDFMGCG","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.05207&json=true","fetch_graph":"https://pith.science/api/pith-number/OJDFMGCG2GN6W32VJ5KRBK23W4/graph.json","fetch_events":"https://pith.science/api/pith-number/OJDFMGCG2GN6W32VJ5KRBK23W4/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4/action/storage_attestation","attest_author":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4/action/author_attestation","sign_citation":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4/action/citation_signature","submit_replication":"https://pith.science/pith/OJDFMGCG2GN6W32VJ5KRBK23W4/action/replication_record"}},"created_at":"2026-06-03T01:05:11.427367+00:00","updated_at":"2026-06-03T01:05:11.427367+00:00"}