{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2024:OQ274GJUXEBBYETLWJOFV4PK6J","short_pith_number":"pith:OQ274GJU","schema_version":"1.0","canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","source":{"kind":"arxiv","id":"2407.13193","version":4},"attestation_state":"computed","paper":{"title":"Retrieval-Augmented Generation for Natural Language Processing: A Survey","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Can Chen, Chun Jason Xue, Haolun Wu, Lianming Huang, Nan Guan, Shangyu Wu, Tei-Wei Kuo, Xue Liu, Ye Yuan, Ying Xiong, Yufei Cui","submitted_at":"2024-07-18T06:06:53Z","abstract_excerpt":"Large language models (LLMs) have achieved strong empirical performance in various fields, benefiting from their huge amount of parameters that store knowledge. However, LLMs still suffer from several key issues, such as hallucination problems, knowledge update issues, and lacking domain-specific expertise. The appearance of retrieval-augmented generation (RAG), which leverages an external knowledge base to augment LLMs, mitigates these limitations. This paper presents a systematic review of RAG techniques for natural language processing (NLP), with a focus on retrievers and retrieval fusions."},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2407.13193","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-18T06:06:53Z","cross_cats_sorted":[],"title_canon_sha256":"434e1dee56d5963ee02d2b43cf57f4083ac82bcba695d9938359e2e09bf0b343","abstract_canon_sha256":"885a80b12418b047accea43bb109a1dd49a2320b002206d23044bdf1ac17d2b8"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T02:05:31.877519Z","signature_b64":"+jjYTJB/nOT1Pml3zbOECG545aweYDqKgDdblg60YH0TsVjsVG//OWyQOqje4dfdZn5FbwM8YT+UkFpcBycIDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","last_reissued_at":"2026-05-20T02:05:31.876900Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T02:05:31.876900Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Retrieval-Augmented Generation for Natural Language Processing: A Survey","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Can Chen, Chun Jason Xue, Haolun Wu, Lianming Huang, Nan Guan, Shangyu Wu, Tei-Wei Kuo, Xue Liu, Ye Yuan, Ying Xiong, Yufei Cui","submitted_at":"2024-07-18T06:06:53Z","abstract_excerpt":"Large language models (LLMs) have achieved strong empirical performance in various fields, benefiting from their huge amount of parameters that store knowledge. However, LLMs still suffer from several key issues, such as hallucination problems, knowledge update issues, and lacking domain-specific expertise. The appearance of retrieval-augmented generation (RAG), which leverages an external knowledge base to augment LLMs, mitigates these limitations. This paper presents a systematic review of RAG techniques for natural language processing (NLP), with a focus on retrievers and retrieval fusions."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2407.13193","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2407.13193/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2407.13193","created_at":"2026-05-20T02:05:31.876983+00:00"},{"alias_kind":"arxiv_version","alias_value":"2407.13193v4","created_at":"2026-05-20T02:05:31.876983+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.13193","created_at":"2026-05-20T02:05:31.876983+00:00"},{"alias_kind":"pith_short_12","alias_value":"OQ274GJUXEBB","created_at":"2026-05-20T02:05:31.876983+00:00"},{"alias_kind":"pith_short_16","alias_value":"OQ274GJUXEBBYETL","created_at":"2026-05-20T02:05:31.876983+00:00"},{"alias_kind":"pith_short_8","alias_value":"OQ274GJU","created_at":"2026-05-20T02:05:31.876983+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":10,"internal_anchor_count":10,"sample":[{"citing_arxiv_id":"2502.09891","citing_title":"ArchRAG: Attributed Community-based Hierarchical Retrieval-Augmented Generation","ref_index":63,"is_internal_anchor":true},{"citing_arxiv_id":"2503.04338","citing_title":"In-depth Analysis of Graph-based RAG in a Unified Framework","ref_index":91,"is_internal_anchor":true},{"citing_arxiv_id":"2511.09803","citing_title":"Retrieval as a Decision: Training-Free Adaptive Gating for Efficient RAG","ref_index":16,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12335","citing_title":"EHR-RAGp: Retrieval-Augmented Prototype-Guided Foundation Model for Electronic Health Records","ref_index":18,"is_internal_anchor":true},{"citing_arxiv_id":"2604.26686","citing_title":"When Model Editing Meets Service Evolution: A Knowledge-Update Perspective for Service Recommendation","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.25847","citing_title":"From Soliloquy to Agora: Memory-Enhanced LLM Agents with Decentralized Debate for Optimization Modeling","ref_index":20,"is_internal_anchor":true},{"citing_arxiv_id":"2605.01302","citing_title":"Beyond Semantic Relevance: Counterfactual Risk Minimization for Robust Retrieval-Augmented Generation","ref_index":75,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06279","citing_title":"Plasma GraphRAG: Physics-Grounded Parameter Selection for Gyrokinetic Simulations","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2604.14222","citing_title":"Adaptive Query Routing: A Tier-Based Framework for Hybrid Retrieval Across Financial, Legal, and Medical Documents","ref_index":14,"is_internal_anchor":true},{"citing_arxiv_id":"2604.17458","citing_title":"EHRAG: Bridging Semantic Gaps in Lightweight GraphRAG via Hybrid Hypergraph Construction and Retrieval","ref_index":293,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J","json":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J.json","graph_json":"https://pith.science/api/pith-number/OQ274GJUXEBBYETLWJOFV4PK6J/graph.json","events_json":"https://pith.science/api/pith-number/OQ274GJUXEBBYETLWJOFV4PK6J/events.json","paper":"https://pith.science/paper/OQ274GJU"},"agent_actions":{"view_html":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J","download_json":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J.json","view_paper":"https://pith.science/paper/OQ274GJU","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2407.13193&json=true","fetch_graph":"https://pith.science/api/pith-number/OQ274GJUXEBBYETLWJOFV4PK6J/graph.json","fetch_events":"https://pith.science/api/pith-number/OQ274GJUXEBBYETLWJOFV4PK6J/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/action/timestamp_anchor","attest_storage":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/action/storage_attestation","attest_author":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/action/author_attestation","sign_citation":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/action/citation_signature","submit_replication":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/action/replication_record"}},"created_at":"2026-05-20T02:05:31.876983+00:00","updated_at":"2026-05-20T02:05:31.876983+00:00"}