{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:AOM3T56NGTA637X3M7ZFR2O3AP","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8e4d7c5a11787d72fdd927acc406011179fef4031b5bd3324e9646f244849e49","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T09:45:14Z","title_canon_sha256":"a40e3744ab1294081093b8203c3e2dda940b6bb1ef92cd55a71bd429cc5cc1f2"},"schema_version":"1.0","source":{"id":"2605.28232","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.28232","created_at":"2026-05-28T01:05:03Z"},{"alias_kind":"arxiv_version","alias_value":"2605.28232v1","created_at":"2026-05-28T01:05:03Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.28232","created_at":"2026-05-28T01:05:03Z"},{"alias_kind":"pith_short_12","alias_value":"AOM3T56NGTA6","created_at":"2026-05-28T01:05:03Z"},{"alias_kind":"pith_short_16","alias_value":"AOM3T56NGTA637X3","created_at":"2026-05-28T01:05:03Z"},{"alias_kind":"pith_short_8","alias_value":"AOM3T56N","created_at":"2026-05-28T01:05:03Z"}],"graph_snapshots":[{"event_id":"sha256:0d5cd9aad80f69df2e624331f2fc2fc723a59e945c37fd87d4cc1a726b871f50","target":"graph","created_at":"2026-05-28T01:05:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.28232/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Occupant comfort and grid-aware energy efficiency are competing objectives whose joint optimization depends critically on how reward functions are specified in deep reinforcement learning (DRL) controllers for buildings. Yet reward design remains largely ad hoc: comfort terms are either hand-tuned heuristics or simple temperature-deviation proxies without explicit grounding in thermal-comfort physics. We present PIRS (Physics-Informed Reward Shaping), which replaces these ad-hoc comfort proxies with the ISO 7730 Predicted Mean Vote (PMV) formulation inside a weighted multi-objective reward for","authors_text":"Khashayar Yavari, Shadmehr Zaregarizi","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T09:45:14Z","title":"PIRS: Physics-Informed Reward Shaping for SAC-Based Building Energy Management"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.28232","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:3f4a09fc77b1def28302cf221decfd3a890ae5a5c92c26aee18f568f340afe49","target":"record","created_at":"2026-05-28T01:05:03Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8e4d7c5a11787d72fdd927acc406011179fef4031b5bd3324e9646f244849e49","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.AI","submitted_at":"2026-05-27T09:45:14Z","title_canon_sha256":"a40e3744ab1294081093b8203c3e2dda940b6bb1ef92cd55a71bd429cc5cc1f2"},"schema_version":"1.0","source":{"id":"2605.28232","kind":"arxiv","version":1}},"canonical_sha256":"0399b9f7cd34c1edfefb67f258e9db03c451846855ed31eeb4999cae2d8d2ec6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"0399b9f7cd34c1edfefb67f258e9db03c451846855ed31eeb4999cae2d8d2ec6","first_computed_at":"2026-05-28T01:05:03.362605Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-28T01:05:03.362605Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"K6ULSbEqPCfHn/BBPxMNk1Ocufl2GLok35fPEbiwd4yY0IKJCTxYn2qY/B65/vkFcEAN+lTRTmgOODI4HTt/CA==","signature_status":"signed_v1","signed_at":"2026-05-28T01:05:03.363018Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.28232","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:3f4a09fc77b1def28302cf221decfd3a890ae5a5c92c26aee18f568f340afe49","sha256:0d5cd9aad80f69df2e624331f2fc2fc723a59e945c37fd87d4cc1a726b871f50"],"state_sha256":"a0e250a53d87d7dbc49aa957d5f29fd0ca7b7ffa305c284b2423752ffd2ca062"}