{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ENNODJDANQEMD6VYPEWBU52CEI","short_pith_number":"pith:ENNODJDA","schema_version":"1.0","canonical_sha256":"235ae1a4606c08c1fab8792c1a77422201c82ac8da52a95a8867e3803074fa02","source":{"kind":"arxiv","id":"2606.25984","version":1},"attestation_state":"computed","paper":{"title":"InvestPhilBench: A Multi-Layer Dynamic Benchmark for Evaluating Large Language Model Procedural Reasoning in Expert Investment Philosophy","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Bo Qu, Mingguang Chen","submitted_at":"2026-06-24T15:53:20Z","abstract_excerpt":"Large language models are increasingly deployed as investment research assistants, yet no benchmark tests whether they can accurately reconstruct and apply the specific procedural decision frameworks of expert investors. We introduce InvestPhilBench, a multi-layer dynamic benchmark spanning eight cognitive tiers, from principle identification (L1) to novel framework extrapolation (L8). The v0.6 release comprises 118 primary-source-verified investment principle cards, 25 decision framework cards with explicit topology metadata, and 243 QA questions (197 dev / 46 held-out test). For reproducible"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.25984","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.AI","submitted_at":"2026-06-24T15:53:20Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"cfbff6ab9923b6f7db51d20097ec91c7fa3d893bc0f64d7bcab6aff2cf427d9c","abstract_canon_sha256":"f3f230eb84235463211a357634f60ffbd5cc288f4ab959afe936e03bec7389c4"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-25T01:18:45.007754Z","signature_b64":"O+J3JIb9oKOiXgqigdq8bTjQo4oBFG2sUscalbH+OoA3Thjp/2RW6990mvv8TKjtbRd2Yt2Zh8JYrDLnp5uZBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"235ae1a4606c08c1fab8792c1a77422201c82ac8da52a95a8867e3803074fa02","last_reissued_at":"2026-06-25T01:18:45.007364Z","signature_status":"signed_v1","first_computed_at":"2026-06-25T01:18:45.007364Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"InvestPhilBench: A Multi-Layer Dynamic Benchmark for Evaluating Large Language Model Procedural Reasoning in Expert Investment Philosophy","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"cs.AI","authors_text":"Bo Qu, Mingguang Chen","submitted_at":"2026-06-24T15:53:20Z","abstract_excerpt":"Large language models are increasingly deployed as investment research assistants, yet no benchmark tests whether they can accurately reconstruct and apply the specific procedural decision frameworks of expert investors. We introduce InvestPhilBench, a multi-layer dynamic benchmark spanning eight cognitive tiers, from principle identification (L1) to novel framework extrapolation (L8). The v0.6 release comprises 118 primary-source-verified investment principle cards, 25 decision framework cards with explicit topology metadata, and 243 QA questions (197 dev / 46 held-out test). For reproducible"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.25984","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.25984/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.25984","created_at":"2026-06-25T01:18:45.007428+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.25984v1","created_at":"2026-06-25T01:18:45.007428+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.25984","created_at":"2026-06-25T01:18:45.007428+00:00"},{"alias_kind":"pith_short_12","alias_value":"ENNODJDANQEM","created_at":"2026-06-25T01:18:45.007428+00:00"},{"alias_kind":"pith_short_16","alias_value":"ENNODJDANQEMD6VY","created_at":"2026-06-25T01:18:45.007428+00:00"},{"alias_kind":"pith_short_8","alias_value":"ENNODJDA","created_at":"2026-06-25T01:18:45.007428+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI","json":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI.json","graph_json":"https://pith.science/api/pith-number/ENNODJDANQEMD6VYPEWBU52CEI/graph.json","events_json":"https://pith.science/api/pith-number/ENNODJDANQEMD6VYPEWBU52CEI/events.json","paper":"https://pith.science/paper/ENNODJDA"},"agent_actions":{"view_html":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI","download_json":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI.json","view_paper":"https://pith.science/paper/ENNODJDA","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.25984&json=true","fetch_graph":"https://pith.science/api/pith-number/ENNODJDANQEMD6VYPEWBU52CEI/graph.json","fetch_events":"https://pith.science/api/pith-number/ENNODJDANQEMD6VYPEWBU52CEI/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI/action/storage_attestation","attest_author":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI/action/author_attestation","sign_citation":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI/action/citation_signature","submit_replication":"https://pith.science/pith/ENNODJDANQEMD6VYPEWBU52CEI/action/replication_record"}},"created_at":"2026-06-25T01:18:45.007428+00:00","updated_at":"2026-06-25T01:18:45.007428+00:00"}