{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:B52XMRC2IMI2K2OL32EPWQ3R65","short_pith_number":"pith:B52XMRC2","canonical_record":{"source":{"id":"2604.09609","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-11T16:24:32Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"366532b760a0820e2186532680003c232966fcf141d0a2c6817cb3be39859285","abstract_canon_sha256":"ccf46d90c795c501d9a8663ea59fd8c9ac3ba209d19a51e72573bd446663ed5b"},"schema_version":"1.0"},"canonical_sha256":"0f7576445a4311a569cbde88fb4371f760594e559cb72f6cf56b75d8f72235f1","source":{"kind":"arxiv","id":"2604.09609","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.09609","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"arxiv_version","alias_value":"2604.09609v2","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.09609","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_12","alias_value":"B52XMRC2IMI2","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_16","alias_value":"B52XMRC2IMI2K2OL","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_8","alias_value":"B52XMRC2","created_at":"2026-05-20T00:04:31Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:B52XMRC2IMI2K2OL32EPWQ3R65","target":"record","payload":{"canonical_record":{"source":{"id":"2604.09609","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-11T16:24:32Z","cross_cats_sorted":["cs.RO"],"title_canon_sha256":"366532b760a0820e2186532680003c232966fcf141d0a2c6817cb3be39859285","abstract_canon_sha256":"ccf46d90c795c501d9a8663ea59fd8c9ac3ba209d19a51e72573bd446663ed5b"},"schema_version":"1.0"},"canonical_sha256":"0f7576445a4311a569cbde88fb4371f760594e559cb72f6cf56b75d8f72235f1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:04:31.252928Z","signature_b64":"1hiarB0WMC2hY20z6mETnmKwpY8naL4WBWfl43uBIayXXNrS+7MKIEGG6YtcSg7YSqLS6vvx2O4GpJzreQvtDA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0f7576445a4311a569cbde88fb4371f760594e559cb72f6cf56b75d8f72235f1","last_reissued_at":"2026-05-20T00:04:31.252046Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:04:31.252046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.09609","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"e465A88mp9pDXSUSDLYVIn8vTCvgZaDfv7XtiX1It7Sgm3mUo+FbEHtMP+g/8lwRK8yWLY1U1uGhuTLHExhCBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T14:10:31.766761Z"},"content_sha256":"a7d80c392c6c25fc505e0ce6f141edc74b0396e563ce8d9585eec1af60f0089f","schema_version":"1.0","event_id":"sha256:a7d80c392c6c25fc505e0ce6f141edc74b0396e563ce8d9585eec1af60f0089f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:B52XMRC2IMI2K2OL32EPWQ3R65","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"General-purpose LLMs as Models of Human Driver Behavior: The Case of Simplified Merging","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"General-purpose LLMs can serve as standalone models of human merging behavior in simplified scenarios.","cross_cats":["cs.RO"],"primary_cat":"cs.AI","authors_text":"Arkady Zgonnikov, Samir H.A. Mohammad, Wouter Mooi","submitted_at":"2026-03-11T16:24:32Z","abstract_excerpt":"Human behavior models are essential as behavior references and for simulating human agents in virtual safety assessment of automated vehicles (AVs), yet current models face a trade-off between interpretability and flexibility. General-purpose large language models (LLMs) offer a promising alternative: a single model potentially deployable without parameter fitting across diverse scenarios. However, what LLMs can and cannot capture about human driving behavior remains poorly understood. We address this gap by embedding two general-purpose LLMs (OpenAI o3 and Google Gemini 2.5 Pro) as standalone"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"general-purpose LLMs could potentially serve as standalone, ready-to-use human behavior models in AV evaluation pipelines","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The simplified one-dimensional merging scenario is sufficient to evaluate whether LLMs capture core aspects of human driver behavior for valid comparison to real data.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"General-purpose LLMs reproduce some human-like merging behaviors such as spatial cue responses but fail to consistently handle dynamic velocity cues and show divergent safety performance across models.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"General-purpose LLMs can serve as standalone models of human merging behavior in simplified scenarios.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"d84c65b1eaccf8b2ed75044419ec6bedcf2426db110e7a656bd1c254656f5780"},"source":{"id":"2604.09609","kind":"arxiv","version":2},"verdict":{"id":"786b8729-e7da-4501-bbd2-b2d3de0dad15","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T13:22:35.670971Z","strongest_claim":"general-purpose LLMs could potentially serve as standalone, ready-to-use human behavior models in AV evaluation pipelines","one_line_summary":"General-purpose LLMs reproduce some human-like merging behaviors such as spatial cue responses but fail to consistently handle dynamic velocity cues and show divergent safety performance across models.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The simplified one-dimensional merging scenario is sufficient to evaluate whether LLMs capture core aspects of human driver behavior for valid comparison to real data.","pith_extraction_headline":"General-purpose LLMs can serve as standalone models of human merging behavior in simplified scenarios."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.09609/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"786b8729-e7da-4501-bbd2-b2d3de0dad15"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-20T00:04:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"54hVtobnfGXix9IhQDSJCkD/oiR/IUZI9LPkdx6XqQf6W7uiM1sx81U5EwQB/frdJ0Yiiq1LPdFLICgvNqWeBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T14:10:31.767234Z"},"content_sha256":"6ea616bb3a59c5cb84a8d6946cdd34336dac18dceb7cacd617c99d034f4c722e","schema_version":"1.0","event_id":"sha256:6ea616bb3a59c5cb84a8d6946cdd34336dac18dceb7cacd617c99d034f4c722e"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/B52XMRC2IMI2K2OL32EPWQ3R65/bundle.json","state_url":"https://pith.science/pith/B52XMRC2IMI2K2OL32EPWQ3R65/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/B52XMRC2IMI2K2OL32EPWQ3R65/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T14:10:31Z","links":{"resolver":"https://pith.science/pith/B52XMRC2IMI2K2OL32EPWQ3R65","bundle":"https://pith.science/pith/B52XMRC2IMI2K2OL32EPWQ3R65/bundle.json","state":"https://pith.science/pith/B52XMRC2IMI2K2OL32EPWQ3R65/state.json","well_known_bundle":"https://pith.science/.well-known/pith/B52XMRC2IMI2K2OL32EPWQ3R65/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:B52XMRC2IMI2K2OL32EPWQ3R65","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ccf46d90c795c501d9a8663ea59fd8c9ac3ba209d19a51e72573bd446663ed5b","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-11T16:24:32Z","title_canon_sha256":"366532b760a0820e2186532680003c232966fcf141d0a2c6817cb3be39859285"},"schema_version":"1.0","source":{"id":"2604.09609","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.09609","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"arxiv_version","alias_value":"2604.09609v2","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.09609","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_12","alias_value":"B52XMRC2IMI2","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_16","alias_value":"B52XMRC2IMI2K2OL","created_at":"2026-05-20T00:04:31Z"},{"alias_kind":"pith_short_8","alias_value":"B52XMRC2","created_at":"2026-05-20T00:04:31Z"}],"graph_snapshots":[{"event_id":"sha256:6ea616bb3a59c5cb84a8d6946cdd34336dac18dceb7cacd617c99d034f4c722e","target":"graph","created_at":"2026-05-20T00:04:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"general-purpose LLMs could potentially serve as standalone, ready-to-use human behavior models in AV evaluation pipelines"},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The simplified one-dimensional merging scenario is sufficient to evaluate whether LLMs capture core aspects of human driver behavior for valid comparison to real data."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"General-purpose LLMs reproduce some human-like merging behaviors such as spatial cue responses but fail to consistently handle dynamic velocity cues and show divergent safety performance across models."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"General-purpose LLMs can serve as standalone models of human merging behavior in simplified scenarios."}],"snapshot_sha256":"d84c65b1eaccf8b2ed75044419ec6bedcf2426db110e7a656bd1c254656f5780"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.09609/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Human behavior models are essential as behavior references and for simulating human agents in virtual safety assessment of automated vehicles (AVs), yet current models face a trade-off between interpretability and flexibility. General-purpose large language models (LLMs) offer a promising alternative: a single model potentially deployable without parameter fitting across diverse scenarios. However, what LLMs can and cannot capture about human driving behavior remains poorly understood. We address this gap by embedding two general-purpose LLMs (OpenAI o3 and Google Gemini 2.5 Pro) as standalone","authors_text":"Arkady Zgonnikov, Samir H.A. Mohammad, Wouter Mooi","cross_cats":["cs.RO"],"headline":"General-purpose LLMs can serve as standalone models of human merging behavior in simplified scenarios.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-11T16:24:32Z","title":"General-purpose LLMs as Models of Human Driver Behavior: The Case of Simplified Merging"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.09609","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-15T13:22:35.670971Z","id":"786b8729-e7da-4501-bbd2-b2d3de0dad15","model_set":{"reader":"grok-4.3"},"one_line_summary":"General-purpose LLMs reproduce some human-like merging behaviors such as spatial cue responses but fail to consistently handle dynamic velocity cues and show divergent safety performance across models.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"General-purpose LLMs can serve as standalone models of human merging behavior in simplified scenarios.","strongest_claim":"general-purpose LLMs could potentially serve as standalone, ready-to-use human behavior models in AV evaluation pipelines","weakest_assumption":"The simplified one-dimensional merging scenario is sufficient to evaluate whether LLMs capture core aspects of human driver behavior for valid comparison to real data."}},"verdict_id":"786b8729-e7da-4501-bbd2-b2d3de0dad15"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a7d80c392c6c25fc505e0ce6f141edc74b0396e563ce8d9585eec1af60f0089f","target":"record","created_at":"2026-05-20T00:04:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ccf46d90c795c501d9a8663ea59fd8c9ac3ba209d19a51e72573bd446663ed5b","cross_cats_sorted":["cs.RO"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-03-11T16:24:32Z","title_canon_sha256":"366532b760a0820e2186532680003c232966fcf141d0a2c6817cb3be39859285"},"schema_version":"1.0","source":{"id":"2604.09609","kind":"arxiv","version":2}},"canonical_sha256":"0f7576445a4311a569cbde88fb4371f760594e559cb72f6cf56b75d8f72235f1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0f7576445a4311a569cbde88fb4371f760594e559cb72f6cf56b75d8f72235f1","first_computed_at":"2026-05-20T00:04:31.252046Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-20T00:04:31.252046Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1hiarB0WMC2hY20z6mETnmKwpY8naL4WBWfl43uBIayXXNrS+7MKIEGG6YtcSg7YSqLS6vvx2O4GpJzreQvtDA==","signature_status":"signed_v1","signed_at":"2026-05-20T00:04:31.252928Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.09609","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a7d80c392c6c25fc505e0ce6f141edc74b0396e563ce8d9585eec1af60f0089f","sha256:6ea616bb3a59c5cb84a8d6946cdd34336dac18dceb7cacd617c99d034f4c722e"],"state_sha256":"44ba25bda10345746803881ced10105aa59c52173d21935f2ec219df78c2ea76"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"0Ofs60ClJMPbVtFmpYQMgfTTl1g4zG+GKjkkYW49imY1WJFOmwiin4uwqHRJ4YUSS85NqJQmRyLKDJ852k0eDA==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T14:10:31.769718Z","bundle_sha256":"95972a0f045157cfec2a704dbd19d922d74909ab368972b3c38bcd17c37fd9f8"}}