{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2024:OQ274GJUXEBBYETLWJOFV4PK6J","short_pith_number":"pith:OQ274GJU","canonical_record":{"source":{"id":"2407.13193","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-18T06:06:53Z","cross_cats_sorted":[],"title_canon_sha256":"434e1dee56d5963ee02d2b43cf57f4083ac82bcba695d9938359e2e09bf0b343","abstract_canon_sha256":"885a80b12418b047accea43bb109a1dd49a2320b002206d23044bdf1ac17d2b8"},"schema_version":"1.0"},"canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","source":{"kind":"arxiv","id":"2407.13193","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2407.13193","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"arxiv_version","alias_value":"2407.13193v4","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.13193","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"pith_short_12","alias_value":"OQ274GJUXEBB","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"pith_short_16","alias_value":"OQ274GJUXEBBYETL","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"pith_short_8","alias_value":"OQ274GJU","created_at":"2026-05-20T02:05:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2024:OQ274GJUXEBBYETLWJOFV4PK6J","target":"record","payload":{"canonical_record":{"source":{"id":"2407.13193","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-18T06:06:53Z","cross_cats_sorted":[],"title_canon_sha256":"434e1dee56d5963ee02d2b43cf57f4083ac82bcba695d9938359e2e09bf0b343","abstract_canon_sha256":"885a80b12418b047accea43bb109a1dd49a2320b002206d23044bdf1ac17d2b8"},"schema_version":"1.0"},"canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T02:05:31.877519Z","signature_b64":"+jjYTJB/nOT1Pml3zbOECG545aweYDqKgDdblg60YH0TsVjsVG//OWyQOqje4dfdZn5FbwM8YT+UkFpcBycIDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","last_reissued_at":"2026-05-20T02:05:31.876900Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T02:05:31.876900Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2407.13193","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T02:05:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"cREKZS/Kjwlz7WCLECEsgHtaGhjeSPEyevs8vKRmx0xSGB3ct+cgPx7iJypjUs8A8CPTsO0jpLIdgQkBy+CsCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T06:00:39.052833Z"},"content_sha256":"da573f2ee91690045c46d83f7728dc17c36320d29b255fa85d961da78e7157c6","schema_version":"1.0","event_id":"sha256:da573f2ee91690045c46d83f7728dc17c36320d29b255fa85d961da78e7157c6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2024:OQ274GJUXEBBYETLWJOFV4PK6J","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Retrieval-Augmented Generation for Natural Language Processing: A Survey","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Can Chen, Chun Jason Xue, Haolun Wu, Lianming Huang, Nan Guan, Shangyu Wu, Tei-Wei Kuo, Xue Liu, Ye Yuan, Ying Xiong, Yufei Cui","submitted_at":"2024-07-18T06:06:53Z","abstract_excerpt":"Large language models (LLMs) have achieved strong empirical performance in various fields, benefiting from their huge amount of parameters that store knowledge. However, LLMs still suffer from several key issues, such as hallucination problems, knowledge update issues, and lacking domain-specific expertise. The appearance of retrieval-augmented generation (RAG), which leverages an external knowledge base to augment LLMs, mitigates these limitations. This paper presents a systematic review of RAG techniques for natural language processing (NLP), with a focus on retrievers and retrieval fusions."},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2407.13193","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2407.13193/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T02:05:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"I1yzC8goWWHM/kdT4l95iNj5sR+INDOMK3Zw+Um0MCTv0aIoHUHOg5fdwA0G6Z/jKRCWC9veWarHp746BX1BBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T06:00:39.053417Z"},"content_sha256":"d1c5f29a528eeeb1a0fea33df6066924860b17eb2ce71d9158c3706d4202414b","schema_version":"1.0","event_id":"sha256:d1c5f29a528eeeb1a0fea33df6066924860b17eb2ce71d9158c3706d4202414b"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/bundle.json","state_url":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/OQ274GJUXEBBYETLWJOFV4PK6J/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T06:00:39Z","links":{"resolver":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J","bundle":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/bundle.json","state":"https://pith.science/pith/OQ274GJUXEBBYETLWJOFV4PK6J/state.json","well_known_bundle":"https://pith.science/.well-known/pith/OQ274GJUXEBBYETLWJOFV4PK6J/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2024:OQ274GJUXEBBYETLWJOFV4PK6J","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"885a80b12418b047accea43bb109a1dd49a2320b002206d23044bdf1ac17d2b8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-18T06:06:53Z","title_canon_sha256":"434e1dee56d5963ee02d2b43cf57f4083ac82bcba695d9938359e2e09bf0b343"},"schema_version":"1.0","source":{"id":"2407.13193","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2407.13193","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"arxiv_version","alias_value":"2407.13193v4","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2407.13193","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"pith_short_12","alias_value":"OQ274GJUXEBB","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"pith_short_16","alias_value":"OQ274GJUXEBBYETL","created_at":"2026-05-20T02:05:31Z"},{"alias_kind":"pith_short_8","alias_value":"OQ274GJU","created_at":"2026-05-20T02:05:31Z"}],"graph_snapshots":[{"event_id":"sha256:d1c5f29a528eeeb1a0fea33df6066924860b17eb2ce71d9158c3706d4202414b","target":"graph","created_at":"2026-05-20T02:05:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2407.13193/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large language models (LLMs) have achieved strong empirical performance in various fields, benefiting from their huge amount of parameters that store knowledge. However, LLMs still suffer from several key issues, such as hallucination problems, knowledge update issues, and lacking domain-specific expertise. The appearance of retrieval-augmented generation (RAG), which leverages an external knowledge base to augment LLMs, mitigates these limitations. This paper presents a systematic review of RAG techniques for natural language processing (NLP), with a focus on retrievers and retrieval fusions.","authors_text":"Can Chen, Chun Jason Xue, Haolun Wu, Lianming Huang, Nan Guan, Shangyu Wu, Tei-Wei Kuo, Xue Liu, Ye Yuan, Ying Xiong, Yufei Cui","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-18T06:06:53Z","title":"Retrieval-Augmented Generation for Natural Language Processing: A Survey"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2407.13193","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:da573f2ee91690045c46d83f7728dc17c36320d29b255fa85d961da78e7157c6","target":"record","created_at":"2026-05-20T02:05:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"885a80b12418b047accea43bb109a1dd49a2320b002206d23044bdf1ac17d2b8","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2024-07-18T06:06:53Z","title_canon_sha256":"434e1dee56d5963ee02d2b43cf57f4083ac82bcba695d9938359e2e09bf0b343"},"schema_version":"1.0","source":{"id":"2407.13193","kind":"arxiv","version":4}},"canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7435fe1934b9021c126bb25c5af1eaf27c324cb03258621f27b4f5f30542ddcf","first_computed_at":"2026-05-20T02:05:31.876900Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T02:05:31.876900Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"+jjYTJB/nOT1Pml3zbOECG545aweYDqKgDdblg60YH0TsVjsVG//OWyQOqje4dfdZn5FbwM8YT+UkFpcBycIDg==","signature_status":"signed_v1","signed_at":"2026-05-20T02:05:31.877519Z","signed_message":"canonical_sha256_bytes"},"source_id":"2407.13193","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:da573f2ee91690045c46d83f7728dc17c36320d29b255fa85d961da78e7157c6","sha256:d1c5f29a528eeeb1a0fea33df6066924860b17eb2ce71d9158c3706d4202414b"],"state_sha256":"b982cb31a0e26d64dfd2788db49636ff958113ce0c2cfd8a2c5dcd68f7d9de95"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/IUw4m4P+x/lT9qaEq1OuJ6b4oxgQz4QSUJbUNywvEeE7NQjY/LwVWtm+/QaqFDo11p9YS6/TztE8MIwKjchAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T06:00:39.056661Z","bundle_sha256":"1199eb37cbc8e27e572be0afa133cbd7798cfdc44bc29524be43c89afd4609cf"}}