{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:ZSEJRDVMJUB52CPSPUD235WNWQ","short_pith_number":"pith:ZSEJRDVM","canonical_record":{"source":{"id":"2604.08362","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-09T15:26:21Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"5edb2bbf102b24c7b00ff202185ca811770bef5624cf1828b79dc41a75eaf75d","abstract_canon_sha256":"f50a47cceafdb543b2dfdf5fc8cb22b638d9960a14769a3dd8dbbbbb24a09ea9"},"schema_version":"1.0"},"canonical_sha256":"cc88988eac4d03dd09f27d07adf6cdb43b298a8527ac60eef2511bd5d4471ee3","source":{"kind":"arxiv","id":"2604.08362","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.08362","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2604.08362v2","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.08362","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"ZSEJRDVMJUB5","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"ZSEJRDVMJUB52CPS","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"ZSEJRDVM","created_at":"2026-05-22T02:04:40Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:ZSEJRDVMJUB52CPSPUD235WNWQ","target":"record","payload":{"canonical_record":{"source":{"id":"2604.08362","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-09T15:26:21Z","cross_cats_sorted":["cs.AI","cs.LG"],"title_canon_sha256":"5edb2bbf102b24c7b00ff202185ca811770bef5624cf1828b79dc41a75eaf75d","abstract_canon_sha256":"f50a47cceafdb543b2dfdf5fc8cb22b638d9960a14769a3dd8dbbbbb24a09ea9"},"schema_version":"1.0"},"canonical_sha256":"cc88988eac4d03dd09f27d07adf6cdb43b298a8527ac60eef2511bd5d4471ee3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T02:04:40.799204Z","signature_b64":"EkWht+eRu5VRfeilqjlO6Ptm588Ns31Y6BbE9IJoGcEho0DhixVN9KP3Uk8IPADiNI0tt9/OdwdlgUedJf5+BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cc88988eac4d03dd09f27d07adf6cdb43b298a8527ac60eef2511bd5d4471ee3","last_reissued_at":"2026-05-22T02:04:40.798230Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T02:04:40.798230Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.08362","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T02:04:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"NHOYfRAiO7DnwJg5UnmWwxXXvFkDJ/54peQkrcokNWw5vuVTy6IhEgMGCxcG9KNv8HcuIAsbjKeFdj69sf4gDg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T01:33:37.238479Z"},"content_sha256":"dba7341007c589f2f0be8259080d710a778fec3282e89bbb7a9542d21edda04b","schema_version":"1.0","event_id":"sha256:dba7341007c589f2f0be8259080d710a778fec3282e89bbb7a9542d21edda04b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:ZSEJRDVMJUB52CPSPUD235WNWQ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Towards Real-world Human Behavior Simulation: Benchmarking Large Language Models on Long-horizon, Cross-scenario, Heterogeneous Behavior Traces","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Large language models simulating human behavior converge to a positive average person and erase individual differences.","cross_cats":["cs.AI","cs.LG"],"primary_cat":"cs.CL","authors_text":"Boxi Cao, Hongyu Lin, Jiawei Chen, Le Sun, Ruotong Pan, Ruoxi Xu, Tingting Gao, Xiangyu Wu, Xianpei Han, Yaojie Lu, Yifei Hu, Yingfei Sun, Yong Du, Yunfei Zhang","submitted_at":"2026-04-09T15:26:21Z","abstract_excerpt":"The emergence of Large Language Models (LLMs) has illuminated the potential for a general-purpose user simulator. However, existing benchmarks remain constrained to isolated scenarios, narrow action spaces, or synthetic data, failing to capture the holistic nature of authentic human behavior. To bridge this gap, we introduce OmniBehavior, the first user simulation benchmark constructed entirely from real-world data, integrating long-horizon, cross-scenario, and heterogeneous behavioral patterns into a unified framework. Based on this benchmark, we first provide empirical evidence that previous"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"a systematic comparison between simulated and authentic behaviors uncovers a fundamental structural bias: LLMs tend to converge toward a positive average person, exhibiting hyper-activity, persona homogenization, and a Utopian bias. This results in the loss of individual differences and long-tail behaviors.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the collected real-world behavioral traces are representative of holistic human decision-making without collection or annotation biases, and that the metrics used to quantify hyper-activity, homogenization, and Utopian bias accurately reflect structural LLM limitations rather than benchmark-specific artifacts.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"OmniBehavior benchmark demonstrates that LLMs simulating real human behavior converge on hyper-active positive average personas, losing long-tail individual differences.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Large language models simulating human behavior converge to a positive average person and erase individual differences.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"e37f021cd98b61f4a286908934d554ae8b7aa776309e76c36f6e2176195db166"},"source":{"id":"2604.08362","kind":"arxiv","version":2},"verdict":{"id":"bdc06fd4-b157-43b3-aae4-f323371009a6","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T18:16:31.449091Z","strongest_claim":"a systematic comparison between simulated and authentic behaviors uncovers a fundamental structural bias: LLMs tend to converge toward a positive average person, exhibiting hyper-activity, persona homogenization, and a Utopian bias. This results in the loss of individual differences and long-tail behaviors.","one_line_summary":"OmniBehavior benchmark demonstrates that LLMs simulating real human behavior converge on hyper-active positive average personas, losing long-tail individual differences.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the collected real-world behavioral traces are representative of holistic human decision-making without collection or annotation biases, and that the metrics used to quantify hyper-activity, homogenization, and Utopian bias accurately reflect structural LLM limitations rather than benchmark-specific artifacts.","pith_extraction_headline":"Large language models simulating human behavior converge to a positive average person and erase individual differences."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.08362/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7a41588afc79cf8d2661c5970937739852e69834307c638b972b9e934819d814"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"bdc06fd4-b157-43b3-aae4-f323371009a6"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-22T02:04:40Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"HvTPZI+LwlZ3RjmSPS8GoG/H/FrdeLw+mJ09tMpvHtW1zbR9wneGO0w3IFLkNglaVjOH0v6N6nk/qiJfvSJhCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T01:33:37.239367Z"},"content_sha256":"f4bc7fd6f3e37b53f97a704beb7fc6ceb0d27bab539fe71961c35633e84623f4","schema_version":"1.0","event_id":"sha256:f4bc7fd6f3e37b53f97a704beb7fc6ceb0d27bab539fe71961c35633e84623f4"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/ZSEJRDVMJUB52CPSPUD235WNWQ/bundle.json","state_url":"https://pith.science/pith/ZSEJRDVMJUB52CPSPUD235WNWQ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/ZSEJRDVMJUB52CPSPUD235WNWQ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T01:33:37Z","links":{"resolver":"https://pith.science/pith/ZSEJRDVMJUB52CPSPUD235WNWQ","bundle":"https://pith.science/pith/ZSEJRDVMJUB52CPSPUD235WNWQ/bundle.json","state":"https://pith.science/pith/ZSEJRDVMJUB52CPSPUD235WNWQ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/ZSEJRDVMJUB52CPSPUD235WNWQ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:ZSEJRDVMJUB52CPSPUD235WNWQ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f50a47cceafdb543b2dfdf5fc8cb22b638d9960a14769a3dd8dbbbbb24a09ea9","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-09T15:26:21Z","title_canon_sha256":"5edb2bbf102b24c7b00ff202185ca811770bef5624cf1828b79dc41a75eaf75d"},"schema_version":"1.0","source":{"id":"2604.08362","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.08362","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"arxiv_version","alias_value":"2604.08362v2","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.08362","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"pith_short_12","alias_value":"ZSEJRDVMJUB5","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"pith_short_16","alias_value":"ZSEJRDVMJUB52CPS","created_at":"2026-05-22T02:04:40Z"},{"alias_kind":"pith_short_8","alias_value":"ZSEJRDVM","created_at":"2026-05-22T02:04:40Z"}],"graph_snapshots":[{"event_id":"sha256:f4bc7fd6f3e37b53f97a704beb7fc6ceb0d27bab539fe71961c35633e84623f4","target":"graph","created_at":"2026-05-22T02:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"a systematic comparison between simulated and authentic behaviors uncovers a fundamental structural bias: LLMs tend to converge toward a positive average person, exhibiting hyper-activity, persona homogenization, and a Utopian bias. This results in the loss of individual differences and long-tail behaviors."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That the collected real-world behavioral traces are representative of holistic human decision-making without collection or annotation biases, and that the metrics used to quantify hyper-activity, homogenization, and Utopian bias accurately reflect structural LLM limitations rather than benchmark-specific artifacts."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"OmniBehavior benchmark demonstrates that LLMs simulating real human behavior converge on hyper-active positive average personas, losing long-tail individual differences."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Large language models simulating human behavior converge to a positive average person and erase individual differences."}],"snapshot_sha256":"e37f021cd98b61f4a286908934d554ae8b7aa776309e76c36f6e2176195db166"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"7a41588afc79cf8d2661c5970937739852e69834307c638b972b9e934819d814"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.08362/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"The emergence of Large Language Models (LLMs) has illuminated the potential for a general-purpose user simulator. However, existing benchmarks remain constrained to isolated scenarios, narrow action spaces, or synthetic data, failing to capture the holistic nature of authentic human behavior. To bridge this gap, we introduce OmniBehavior, the first user simulation benchmark constructed entirely from real-world data, integrating long-horizon, cross-scenario, and heterogeneous behavioral patterns into a unified framework. Based on this benchmark, we first provide empirical evidence that previous","authors_text":"Boxi Cao, Hongyu Lin, Jiawei Chen, Le Sun, Ruotong Pan, Ruoxi Xu, Tingting Gao, Xiangyu Wu, Xianpei Han, Yaojie Lu, Yifei Hu, Yingfei Sun, Yong Du, Yunfei Zhang","cross_cats":["cs.AI","cs.LG"],"headline":"Large language models simulating human behavior converge to a positive average person and erase individual differences.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-09T15:26:21Z","title":"Towards Real-world Human Behavior Simulation: Benchmarking Large Language Models on Long-horizon, Cross-scenario, Heterogeneous Behavior Traces"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.08362","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T18:16:31.449091Z","id":"bdc06fd4-b157-43b3-aae4-f323371009a6","model_set":{"reader":"grok-4.3"},"one_line_summary":"OmniBehavior benchmark demonstrates that LLMs simulating real human behavior converge on hyper-active positive average personas, losing long-tail individual differences.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Large language models simulating human behavior converge to a positive average person and erase individual differences.","strongest_claim":"a systematic comparison between simulated and authentic behaviors uncovers a fundamental structural bias: LLMs tend to converge toward a positive average person, exhibiting hyper-activity, persona homogenization, and a Utopian bias. This results in the loss of individual differences and long-tail behaviors.","weakest_assumption":"That the collected real-world behavioral traces are representative of holistic human decision-making without collection or annotation biases, and that the metrics used to quantify hyper-activity, homogenization, and Utopian bias accurately reflect structural LLM limitations rather than benchmark-specific artifacts."}},"verdict_id":"bdc06fd4-b157-43b3-aae4-f323371009a6"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:dba7341007c589f2f0be8259080d710a778fec3282e89bbb7a9542d21edda04b","target":"record","created_at":"2026-05-22T02:04:40Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f50a47cceafdb543b2dfdf5fc8cb22b638d9960a14769a3dd8dbbbbb24a09ea9","cross_cats_sorted":["cs.AI","cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-04-09T15:26:21Z","title_canon_sha256":"5edb2bbf102b24c7b00ff202185ca811770bef5624cf1828b79dc41a75eaf75d"},"schema_version":"1.0","source":{"id":"2604.08362","kind":"arxiv","version":2}},"canonical_sha256":"cc88988eac4d03dd09f27d07adf6cdb43b298a8527ac60eef2511bd5d4471ee3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cc88988eac4d03dd09f27d07adf6cdb43b298a8527ac60eef2511bd5d4471ee3","first_computed_at":"2026-05-22T02:04:40.798230Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-22T02:04:40.798230Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"EkWht+eRu5VRfeilqjlO6Ptm588Ns31Y6BbE9IJoGcEho0DhixVN9KP3Uk8IPADiNI0tt9/OdwdlgUedJf5+BA==","signature_status":"signed_v1","signed_at":"2026-05-22T02:04:40.799204Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.08362","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:dba7341007c589f2f0be8259080d710a778fec3282e89bbb7a9542d21edda04b","sha256:f4bc7fd6f3e37b53f97a704beb7fc6ceb0d27bab539fe71961c35633e84623f4"],"state_sha256":"e9ffe933ba31ae42be5fdfddbaceafff145eaba905e2576524ec803981f56d62"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kpIDY7KwkWPcexpBtZ4vMRe2EiIsBExGQhPsVjevsPh+rPdD9INZXG3dtRqMymHBrCj+sGmPfsjmn+71xST5AQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T01:33:37.246715Z","bundle_sha256":"e8b19d14fd2877ee33e592806d3444a7ef9bca57feebd5333c6799a005dbdece"}}