{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:J4HM2RBS4EHMY54WE73GKX44OB","short_pith_number":"pith:J4HM2RBS","schema_version":"1.0","canonical_sha256":"4f0ecd4432e10ecc779627f6655f9c7056a114901d54ebf704006565dde517c1","source":{"kind":"arxiv","id":"2508.05852","version":2},"attestation_state":"computed","paper":{"title":"Interpretable Modeling of Driver Attention Shifts with a Vision--Language Model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Kaiser Hamid, Khandakar Ashrafi Akbar, Nade Liang, Peihang Li","submitted_at":"2025-08-07T21:01:43Z","abstract_excerpt":"Driver gaze is commonly modeled as a spatial heatmap, but heatmaps alone are difficult for humans to interpret because they do not explain which road object or region is being monitored or why an attention shift may matter. This study examines whether minimal human-grounded supervision can steer a vision--language model toward interpretable descriptions of driver attention shifts. Using selected high-change gaze moments from the Berkeley DeepDrive-Attention dataset, we compare zero-shot, one-shot, and LoRA fine-tuned VLM conditions against human-refined reference descriptions and expert rating"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2508.05852","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-08-07T21:01:43Z","cross_cats_sorted":[],"title_canon_sha256":"f847c2776ff7228a271a036726eab3072edec90e228fb775999df7a21884c6e3","abstract_canon_sha256":"721a8bba5a0da59db2dfc307280d82ce30aa6f5ccde9712b673d1decc466e557"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-02T01:03:33.105444Z","signature_b64":"OMoHd0Yse5UPte0hbJjD3PB5vF1W2eTggVCW60DUif58QBe3kk5oFnAreEan8yjdqOzTsezGMhNuwqFYF5ySCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4f0ecd4432e10ecc779627f6655f9c7056a114901d54ebf704006565dde517c1","last_reissued_at":"2026-06-02T01:03:33.104777Z","signature_status":"signed_v1","first_computed_at":"2026-06-02T01:03:33.104777Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Interpretable Modeling of Driver Attention Shifts with a Vision--Language Model","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Kaiser Hamid, Khandakar Ashrafi Akbar, Nade Liang, Peihang Li","submitted_at":"2025-08-07T21:01:43Z","abstract_excerpt":"Driver gaze is commonly modeled as a spatial heatmap, but heatmaps alone are difficult for humans to interpret because they do not explain which road object or region is being monitored or why an attention shift may matter. This study examines whether minimal human-grounded supervision can steer a vision--language model toward interpretable descriptions of driver attention shifts. Using selected high-change gaze moments from the Berkeley DeepDrive-Attention dataset, we compare zero-shot, one-shot, and LoRA fine-tuned VLM conditions against human-refined reference descriptions and expert rating"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.05852","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.05852/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2508.05852","created_at":"2026-06-02T01:03:33.104868+00:00"},{"alias_kind":"arxiv_version","alias_value":"2508.05852v2","created_at":"2026-06-02T01:03:33.104868+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.05852","created_at":"2026-06-02T01:03:33.104868+00:00"},{"alias_kind":"pith_short_12","alias_value":"J4HM2RBS4EHM","created_at":"2026-06-02T01:03:33.104868+00:00"},{"alias_kind":"pith_short_16","alias_value":"J4HM2RBS4EHMY54W","created_at":"2026-06-02T01:03:33.104868+00:00"},{"alias_kind":"pith_short_8","alias_value":"J4HM2RBS","created_at":"2026-06-02T01:03:33.104868+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB","json":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB.json","graph_json":"https://pith.science/api/pith-number/J4HM2RBS4EHMY54WE73GKX44OB/graph.json","events_json":"https://pith.science/api/pith-number/J4HM2RBS4EHMY54WE73GKX44OB/events.json","paper":"https://pith.science/paper/J4HM2RBS"},"agent_actions":{"view_html":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB","download_json":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB.json","view_paper":"https://pith.science/paper/J4HM2RBS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2508.05852&json=true","fetch_graph":"https://pith.science/api/pith-number/J4HM2RBS4EHMY54WE73GKX44OB/graph.json","fetch_events":"https://pith.science/api/pith-number/J4HM2RBS4EHMY54WE73GKX44OB/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB/action/timestamp_anchor","attest_storage":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB/action/storage_attestation","attest_author":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB/action/author_attestation","sign_citation":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB/action/citation_signature","submit_replication":"https://pith.science/pith/J4HM2RBS4EHMY54WE73GKX44OB/action/replication_record"}},"created_at":"2026-06-02T01:03:33.104868+00:00","updated_at":"2026-06-02T01:03:33.104868+00:00"}