{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W","short_pith_number":"pith:7KZP4YJ2","canonical_record":{"source":{"id":"2605.15184","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:58:41Z","cross_cats_sorted":[],"title_canon_sha256":"882523d375a459c9c109456a0bbf0fb20ee9b760d5292c9be0a2ac2043cbfcee","abstract_canon_sha256":"06b18eb3db10ed5898a0f5b5a6f0b616d238d315cff6419f208ce59c3287fabf"},"schema_version":"1.0"},"canonical_sha256":"fab2fe613a41692870c1fe4ee20e09fd92ce5abeb23353b0b3c87fc858865ec5","source":{"kind":"arxiv","id":"2605.15184","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15184","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15184v1","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"pith_short_12","alias_value":"7KZP4YJ2IFUS","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"7KZP4YJ2IFUSQ4GB","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"7KZP4YJ2","created_at":"2026-05-18T12:33:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W","target":"record","payload":{"canonical_record":{"source":{"id":"2605.15184","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:58:41Z","cross_cats_sorted":[],"title_canon_sha256":"882523d375a459c9c109456a0bbf0fb20ee9b760d5292c9be0a2ac2043cbfcee","abstract_canon_sha256":"06b18eb3db10ed5898a0f5b5a6f0b616d238d315cff6419f208ce59c3287fabf"},"schema_version":"1.0"},"canonical_sha256":"fab2fe613a41692870c1fe4ee20e09fd92ce5abeb23353b0b3c87fc858865ec5","receipt":{"kind":"pith_receipt","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.2","canonical_sha256":"fab2fe613a41692870c1fe4ee20e09fd92ce5abeb23353b0b3c87fc858865ec5","last_reissued_at":"2026-05-17T21:57:18.501018Z","signature_status":"unsigned_v0","first_computed_at":"2026-05-17T21:40:25.119335Z"},"source_kind":"arxiv","source_id":"2605.15184","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T21:18:32Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"OO2YkaofXpFZ/Wx0Ag2PFNlLjGV5lXF/BWh8Y8VoeIYmHyf+DgirGAE3BhHcarEhPpkP3114R1Lfd6h0KNcnBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:54:07.451354Z"},"content_sha256":"4b6736f5c3bfc84eb8e64be0122f9f9d1339e108b2df58ea81b304fdc41f3edb","schema_version":"1.0","event_id":"sha256:4b6736f5c3bfc84eb8e64be0122f9f9d1339e108b2df58ea81b304fdc41f3edb"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Is Grep All You Need? How Agent Harnesses Reshape Agentic Search","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Grep retrieval often beats vector search for accuracy in LLM agent workflows, though harness and tool-calling style drive most of the performance difference.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Akhil Kasturi, Anmol Gulati, Elias Lumer, Sahil Sen, Vamse Kumar Subbiah","submitted_at":"2026-05-14T17:58:41Z","abstract_excerpt":"Recent advances in Large Language Model (LLM) agents have enabled complex agentic workflows where models autonomously retrieve information, call tools, and reason over large corpora to complete tasks on behalf of users. Despite the growing adoption of retrieval-augmented generation (RAG) in agentic search systems, existing literature lacks a systematic comparison of how retrieval strategy choice interacts with agent architecture and tool-calling paradigm. Important practical dimensions, including how tool outputs are presented to the model and how performance changes when searches must cope wi"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Across Chronos and the provider CLIs, grep generally yields higher accuracy than vector retrieval in our comparisons in experiment 1; at the same time, overall scores still depend strongly on which harness and tool-calling style is used, even when the underlying conversation data are the same.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the 116-question sample from LongMemEval and the chosen harness implementations (Chronos, Claude Code, Codex, Gemini CLI) are representative of broader agentic search performance.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Grep retrieval generally outperforms vector retrieval in agentic search tasks, with performance varying strongly by agent harness and tool-calling style.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Grep retrieval often beats vector search for accuracy in LLM agent workflows, though harness and tool-calling style drive most of the performance difference.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"093958831f88079a78eb14e12ca1e05605319da02b01120e85590baa43b760d6"},"source":{"id":"2605.15184","kind":"arxiv","version":1},"verdict":{"id":"8b3b3e13-cb9c-4274-9083-eb6e82313538","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T03:03:42.934860Z","strongest_claim":"Across Chronos and the provider CLIs, grep generally yields higher accuracy than vector retrieval in our comparisons in experiment 1; at the same time, overall scores still depend strongly on which harness and tool-calling style is used, even when the underlying conversation data are the same.","one_line_summary":"Grep retrieval generally outperforms vector retrieval in agentic search tasks, with performance varying strongly by agent harness and tool-calling style.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the 116-question sample from LongMemEval and the chosen harness implementations (Chronos, Claude Code, Codex, Gemini CLI) are representative of broader agentic search performance.","pith_extraction_headline":"Grep retrieval often beats vector search for accuracy in LLM agent workflows, though harness and tool-calling style drive most of the performance difference."},"references":{"count":32,"sample":[{"doi":"","year":2024,"title":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. 2024. Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. InProceedings of ICLR","work_id":"51d93457-b93f-410a-abae-43e325a84dfd","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","ref_index":2,"cited_arxiv_id":"2107.03374","is_internal_anchor":true},{"doi":"","year":2009,"title":"Gordon V. Cormack, Charles L. A. Clarke, and Stefan Buettcher. 2009. Reciprocal Rank Fusion Outperforms Condorcet and Individual Rank Learning Methods. In Proceedings of SIGIR. 758–759","work_id":"0e5f0a7e-17be-4502-abcd-a29e76608974","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and Stéphane Clinchant","work_id":"ccccb494-40a0-4b9b-b976-97459888526f","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2021,"title":"doi:10.48550/ARXIV.2109.10086","work_id":"b92b66c0-5a02-4966-91fe-a2935c54d59b","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":32,"snapshot_sha256":"828a1a9dacd3e1d67cf30e6d7ff430d6bc862a2a9965e132f102df791fca1fce","internal_anchors":8},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"8b3b3e13-cb9c-4274-9083-eb6e82313538"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T21:57:18Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mV1hfHWniAlMFiZIN23eKCZhEhlusvbbQY5ktFSsm/8qb4FDUwj1jakxVdOiv7N6XvddKFKB2dViLHqeh8CfCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-31T21:54:07.452503Z"},"content_sha256":"fe6aa4dad5830aa8e62033cdc40cc175aaed8b7d06e380df026fb03fc995dbf8","schema_version":"1.0","event_id":"sha256:fe6aa4dad5830aa8e62033cdc40cc175aaed8b7d06e380df026fb03fc995dbf8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W/bundle.json","state_url":"https://pith.science/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-31T21:54:07Z","links":{"resolver":"https://pith.science/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W","bundle":"https://pith.science/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W/bundle.json","state":"https://pith.science/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W/state.json","well_known_bundle":"https://pith.science/.well-known/pith/7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:7KZP4YJ2IFUSQ4GB7ZHOEDQJ7W","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"06b18eb3db10ed5898a0f5b5a6f0b616d238d315cff6419f208ce59c3287fabf","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:58:41Z","title_canon_sha256":"882523d375a459c9c109456a0bbf0fb20ee9b760d5292c9be0a2ac2043cbfcee"},"schema_version":"1.0","source":{"id":"2605.15184","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.15184","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"arxiv_version","alias_value":"2605.15184v1","created_at":"2026-05-17T21:18:32Z"},{"alias_kind":"pith_short_12","alias_value":"7KZP4YJ2IFUS","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"7KZP4YJ2IFUSQ4GB","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"7KZP4YJ2","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:fe6aa4dad5830aa8e62033cdc40cc175aaed8b7d06e380df026fb03fc995dbf8","target":"graph","created_at":"2026-05-17T21:57:18Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Across Chronos and the provider CLIs, grep generally yields higher accuracy than vector retrieval in our comparisons in experiment 1; at the same time, overall scores still depend strongly on which harness and tool-calling style is used, even when the underlying conversation data are the same."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the 116-question sample from LongMemEval and the chosen harness implementations (Chronos, Claude Code, Codex, Gemini CLI) are representative of broader agentic search performance."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"Grep retrieval generally outperforms vector retrieval in agentic search tasks, with performance varying strongly by agent harness and tool-calling style."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Grep retrieval often beats vector search for accuracy in LLM agent workflows, though harness and tool-calling style drive most of the performance difference."}],"snapshot_sha256":"093958831f88079a78eb14e12ca1e05605319da02b01120e85590baa43b760d6"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Recent advances in Large Language Model (LLM) agents have enabled complex agentic workflows where models autonomously retrieve information, call tools, and reason over large corpora to complete tasks on behalf of users. Despite the growing adoption of retrieval-augmented generation (RAG) in agentic search systems, existing literature lacks a systematic comparison of how retrieval strategy choice interacts with agent architecture and tool-calling paradigm. Important practical dimensions, including how tool outputs are presented to the model and how performance changes when searches must cope wi","authors_text":"Akhil Kasturi, Anmol Gulati, Elias Lumer, Sahil Sen, Vamse Kumar Subbiah","cross_cats":[],"headline":"Grep retrieval often beats vector search for accuracy in LLM agent workflows, though harness and tool-calling style drive most of the performance difference.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:58:41Z","title":"Is Grep All You Need? How Agent Harnesses Reshape Agentic Search"},"references":{"count":32,"internal_anchors":8,"resolved_work":32,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Akari Asai, Zeqiu Wu, Yizhong Wang, Avirup Sil, and Hannaneh Hajishirzi. 2024. Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. InProceedings of ICLR","work_id":"51d93457-b93f-410a-abae-43e325a84dfd","year":2024},{"cited_arxiv_id":"2107.03374","doi":"","is_internal_anchor":true,"ref_index":2,"title":"Evaluating Large Language Models Trained on Code","work_id":"042493e9-b26f-4b4e-bbde-382072ca9b08","year":2021},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Gordon V. Cormack, Charles L. A. Clarke, and Stefan Buettcher. 2009. Reciprocal Rank Fusion Outperforms Condorcet and Individual Rank Learning Methods. In Proceedings of SIGIR. 758–759","work_id":"0e5f0a7e-17be-4502-abcd-a29e76608974","year":2009},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Thibault Formal, Carlos Lassance, Benjamin Piwowarski, and Stéphane Clinchant","work_id":"ccccb494-40a0-4b9b-b976-97459888526f","year":null},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"doi:10.48550/ARXIV.2109.10086","work_id":"b92b66c0-5a02-4966-91fe-a2935c54d59b","year":2021}],"snapshot_sha256":"828a1a9dacd3e1d67cf30e6d7ff430d6bc862a2a9965e132f102df791fca1fce"},"source":{"id":"2605.15184","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-15T03:03:42.934860Z","id":"8b3b3e13-cb9c-4274-9083-eb6e82313538","model_set":{"reader":"grok-4.3"},"one_line_summary":"Grep retrieval generally outperforms vector retrieval in agentic search tasks, with performance varying strongly by agent harness and tool-calling style.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Grep retrieval often beats vector search for accuracy in LLM agent workflows, though harness and tool-calling style drive most of the performance difference.","strongest_claim":"Across Chronos and the provider CLIs, grep generally yields higher accuracy than vector retrieval in our comparisons in experiment 1; at the same time, overall scores still depend strongly on which harness and tool-calling style is used, even when the underlying conversation data are the same.","weakest_assumption":"That the 116-question sample from LongMemEval and the chosen harness implementations (Chronos, Claude Code, Codex, Gemini CLI) are representative of broader agentic search performance."}},"verdict_id":"8b3b3e13-cb9c-4274-9083-eb6e82313538"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:4b6736f5c3bfc84eb8e64be0122f9f9d1339e108b2df58ea81b304fdc41f3edb","target":"record","created_at":"2026-05-17T21:18:32Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"06b18eb3db10ed5898a0f5b5a6f0b616d238d315cff6419f208ce59c3287fabf","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-05-14T17:58:41Z","title_canon_sha256":"882523d375a459c9c109456a0bbf0fb20ee9b760d5292c9be0a2ac2043cbfcee"},"schema_version":"1.0","source":{"id":"2605.15184","kind":"arxiv","version":1}},"canonical_sha256":"fab2fe613a41692870c1fe4ee20e09fd92ce5abeb23353b0b3c87fc858865ec5","receipt":{"builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"fab2fe613a41692870c1fe4ee20e09fd92ce5abeb23353b0b3c87fc858865ec5","first_computed_at":"2026-05-17T21:40:25.119335Z","kind":"pith_receipt","last_reissued_at":"2026-05-17T21:57:18.501018Z","receipt_version":"0.2","signature_status":"unsigned_v0"},"source_id":"2605.15184","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:4b6736f5c3bfc84eb8e64be0122f9f9d1339e108b2df58ea81b304fdc41f3edb","sha256:fe6aa4dad5830aa8e62033cdc40cc175aaed8b7d06e380df026fb03fc995dbf8"],"state_sha256":"baa5b57c2e907035a494ff24151d43a2b94e8f3baf296f70c1647fca13aada30"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"iysa1f3E6XexQ6R6vob4iWlyPzPOkB8Z7W+boESx44RSsqM2pPjh38a6qnm967aYw9pa2re/bPr/zE3iPHVHCA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-31T21:54:07.457127Z","bundle_sha256":"bc90573d8d72cd8ad78277ce84b36312cdde85d6ead80e1a15a3b702a42322af"}}