{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZW2FPXUQNE66PAB562ZJ7DPYFF","short_pith_number":"pith:ZW2FPXUQ","schema_version":"1.0","canonical_sha256":"cdb457de90693de7803df6b29f8df82972b908e656093b98611ab6aa6185c4f9","source":{"kind":"arxiv","id":"2601.11957","version":4},"attestation_state":"computed","paper":{"title":"PEARL: Self-Evolving Assistant for Time Management with Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"PEARL uses reinforcement learning and an external memory of inferred preferences to cut errors in resolving calendar conflicts by 55 percent.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bingxuan Li, Cheng Qian, Eitan Anzenberg, Heng Ji, Jeonghwan Kim, Niran Kundapur, Xiusi Chen","submitted_at":"2026-01-17T08:19:18Z","abstract_excerpt":"Overlapping calendar invitations force busy professionals to repeatedly decide which meetings to attend, reschedule, or decline. We refer to this preference-driven decision process as calendar conflict resolution. Automating this decision process is crucial yet challenging. Scheduling logistics can drain hours, and human delegation often fails at scale, which motivates us to ask: Can we trust large language models (LLMs) or language agents to manage time? To enable a systematic study of this question, we introduce CalConflictBench, a benchmark for long-horizon calendar conflict resolution. In "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2601.11957","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-01-17T08:19:18Z","cross_cats_sorted":[],"title_canon_sha256":"9d10073df80020e3154d6a95be7615839d3995ff7c72de3608fe95b8726d6004","abstract_canon_sha256":"7cf75de1c3634e95e4bbead4addaadb1a4df04fc4df5cf4b21f089f50db037ff"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:14:24.929682Z","signature_b64":"7lBS/1g2xHJVM1WgwsRdd9UrJNZyYpDB5VbbdFMy1BG4N6USIuf8AucbH1dt+TjxrJnUf98JebaLMtvE7QGnBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cdb457de90693de7803df6b29f8df82972b908e656093b98611ab6aa6185c4f9","last_reissued_at":"2026-06-24T01:14:24.929283Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:14:24.929283Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"PEARL: Self-Evolving Assistant for Time Management with Reinforcement Learning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"PEARL uses reinforcement learning and an external memory of inferred preferences to cut errors in resolving calendar conflicts by 55 percent.","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Bingxuan Li, Cheng Qian, Eitan Anzenberg, Heng Ji, Jeonghwan Kim, Niran Kundapur, Xiusi Chen","submitted_at":"2026-01-17T08:19:18Z","abstract_excerpt":"Overlapping calendar invitations force busy professionals to repeatedly decide which meetings to attend, reschedule, or decline. We refer to this preference-driven decision process as calendar conflict resolution. Automating this decision process is crucial yet challenging. Scheduling logistics can drain hours, and human delegation often fails at scale, which motivates us to ask: Can we trust large language models (LLMs) or language agents to manage time? To enable a systematic study of this question, we introduce CalConflictBench, a benchmark for long-horizon calendar conflict resolution. In "},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"Experiments on CalConflictBench show that PEARL achieves an error reduction rate of 0.76 and a 55% improvement in average error rate compared to the strongest baseline.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That the synthetic conflicts and preference signals in CalConflictBench faithfully represent real user decision patterns and that round-wise rewards can be defined without introducing benchmark-specific biases that do not transfer outside the test set.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"PEARL reduces calendar conflict resolution errors by 55% on a new long-horizon benchmark by adding a preference memory and round-wise RL supervision to LLM agents.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"PEARL uses reinforcement learning and an external memory of inferred preferences to cut errors in resolving calendar conflicts by 55 percent.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"3a7b0572692d54f631d3411c3d2f4d46d6beba0d0b99bf766f906bc3ee4e46b9"},"source":{"id":"2601.11957","kind":"arxiv","version":4},"verdict":{"id":"d6d5bfbc-6b90-4cf6-8e0e-d94e569ed945","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-16T13:06:01.068261Z","strongest_claim":"Experiments on CalConflictBench show that PEARL achieves an error reduction rate of 0.76 and a 55% improvement in average error rate compared to the strongest baseline.","one_line_summary":"PEARL reduces calendar conflict resolution errors by 55% on a new long-horizon benchmark by adding a preference memory and round-wise RL supervision to LLM agents.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That the synthetic conflicts and preference signals in CalConflictBench faithfully represent real user decision patterns and that round-wise rewards can be defined without introducing benchmark-specific biases that do not transfer outside the test set.","pith_extraction_headline":"PEARL uses reinforcement learning and an external memory of inferred preferences to cut errors in resolving calendar conflicts by 55 percent."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2601.11957/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2601.11957","created_at":"2026-06-24T01:14:24.929338+00:00"},{"alias_kind":"arxiv_version","alias_value":"2601.11957v4","created_at":"2026-06-24T01:14:24.929338+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2601.11957","created_at":"2026-06-24T01:14:24.929338+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZW2FPXUQNE66","created_at":"2026-06-24T01:14:24.929338+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZW2FPXUQNE66PAB5","created_at":"2026-06-24T01:14:24.929338+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZW2FPXUQ","created_at":"2026-06-24T01:14:24.929338+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF","json":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF.json","graph_json":"https://pith.science/api/pith-number/ZW2FPXUQNE66PAB562ZJ7DPYFF/graph.json","events_json":"https://pith.science/api/pith-number/ZW2FPXUQNE66PAB562ZJ7DPYFF/events.json","paper":"https://pith.science/paper/ZW2FPXUQ"},"agent_actions":{"view_html":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF","download_json":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF.json","view_paper":"https://pith.science/paper/ZW2FPXUQ","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2601.11957&json=true","fetch_graph":"https://pith.science/api/pith-number/ZW2FPXUQNE66PAB562ZJ7DPYFF/graph.json","fetch_events":"https://pith.science/api/pith-number/ZW2FPXUQNE66PAB562ZJ7DPYFF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF/action/storage_attestation","attest_author":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF/action/author_attestation","sign_citation":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF/action/citation_signature","submit_replication":"https://pith.science/pith/ZW2FPXUQNE66PAB562ZJ7DPYFF/action/replication_record"}},"created_at":"2026-06-24T01:14:24.929338+00:00","updated_at":"2026-06-24T01:14:24.929338+00:00"}