{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:X77KXFK4BPWI4IARKRPZ3ZA4LF","short_pith_number":"pith:X77KXFK4","canonical_record":{"source":{"id":"2509.10303","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-12T14:45:39Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"00d39b53d4f44b2e65d180d0444a56bbcc445f4e423e910cc4e9167d13a97ecd","abstract_canon_sha256":"6c3b323a9053a2ba2bf31e27e2e610949b35abb757982220352983f8293719b9"},"schema_version":"1.0"},"canonical_sha256":"bffeab955c0bec8e2011545f9de41c59729149d2fc747dd3a7b6ffcee5b9999f","source":{"kind":"arxiv","id":"2509.10303","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.10303","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"arxiv_version","alias_value":"2509.10303v2","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.10303","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"pith_short_12","alias_value":"X77KXFK4BPWI","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"pith_short_16","alias_value":"X77KXFK4BPWI4IAR","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"pith_short_8","alias_value":"X77KXFK4","created_at":"2026-06-11T01:09:16Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:X77KXFK4BPWI4IARKRPZ3ZA4LF","target":"record","payload":{"canonical_record":{"source":{"id":"2509.10303","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-12T14:45:39Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"00d39b53d4f44b2e65d180d0444a56bbcc445f4e423e910cc4e9167d13a97ecd","abstract_canon_sha256":"6c3b323a9053a2ba2bf31e27e2e610949b35abb757982220352983f8293719b9"},"schema_version":"1.0"},"canonical_sha256":"bffeab955c0bec8e2011545f9de41c59729149d2fc747dd3a7b6ffcee5b9999f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-11T01:09:16.866452Z","signature_b64":"COin0ASwjYwwLyx8b9yYq2ZKmoFoQSbGkb0oG1zQKNbQreDJ2tMF67eJg6We8A6YfnghqNZI9jgyfPXWHQM/AA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"bffeab955c0bec8e2011545f9de41c59729149d2fc747dd3a7b6ffcee5b9999f","last_reissued_at":"2026-06-11T01:09:16.865339Z","signature_status":"signed_v1","first_computed_at":"2026-06-11T01:09:16.865339Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2509.10303","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:09:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"v3F2jWG8QYhfbZgKl1vu9BfmwfujjtKRPB3qCJLr7pSZhRVwgeqbidfZXTGdHhTWGiqrU+KZdKdC+4zYzgMTCA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:34:35.868928Z"},"content_sha256":"46d85a1b9459f22a1a4798105a62bf762564a3ed7fe90c1030e718c61472e131","schema_version":"1.0","event_id":"sha256:46d85a1b9459f22a1a4798105a62bf762564a3ed7fe90c1030e718c61472e131"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:X77KXFK4BPWI4IARKRPZ3ZA4LF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Generalizing Beyond Suboptimality: Offline Reinforcement Learning Learns Effective Scheduling through Random Solutions","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Jesse van Remmerden, Yingqian Zhang, Zaharah Bukhsh","submitted_at":"2025-09-12T14:45:39Z","abstract_excerpt":"Online reinforcement learning (RL) approaches have demonstrated strong performance on Job Shop Scheduling (JSP) and Flexible JSP (FJSP) problems by learning scheduling policies through direct interaction with simulated environments. However, these methods often require extensive training interactions, limiting their sample efficiency and practical applicability. Motivated by this challenge, we introduce Conservative Discrete Quantile Actor-Critic (CDQAC), an offline RL algorithm that learns effective scheduling policies directly from static, suboptimal datasets. CDQAC couples a quantile-based "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.10303","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.10303/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-06-11T01:09:16Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1tUqz7GMHZJJFPhWauqiNTIPPxpi077PkpLl6/OyAxr93nXRvKHG1kL1DHzjdlVbX6O+DHUdcZE7Chz243qlAA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-27T21:34:35.869294Z"},"content_sha256":"827fbeee2ddbdc0319f31f086df81c9db8f8f866019e033381157ca1b20735b7","schema_version":"1.0","event_id":"sha256:827fbeee2ddbdc0319f31f086df81c9db8f8f866019e033381157ca1b20735b7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF/bundle.json","state_url":"https://pith.science/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-27T21:34:35Z","links":{"resolver":"https://pith.science/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF","bundle":"https://pith.science/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF/bundle.json","state":"https://pith.science/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/X77KXFK4BPWI4IARKRPZ3ZA4LF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:X77KXFK4BPWI4IARKRPZ3ZA4LF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6c3b323a9053a2ba2bf31e27e2e610949b35abb757982220352983f8293719b9","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-12T14:45:39Z","title_canon_sha256":"00d39b53d4f44b2e65d180d0444a56bbcc445f4e423e910cc4e9167d13a97ecd"},"schema_version":"1.0","source":{"id":"2509.10303","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2509.10303","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"arxiv_version","alias_value":"2509.10303v2","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.10303","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"pith_short_12","alias_value":"X77KXFK4BPWI","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"pith_short_16","alias_value":"X77KXFK4BPWI4IAR","created_at":"2026-06-11T01:09:16Z"},{"alias_kind":"pith_short_8","alias_value":"X77KXFK4","created_at":"2026-06-11T01:09:16Z"}],"graph_snapshots":[{"event_id":"sha256:827fbeee2ddbdc0319f31f086df81c9db8f8f866019e033381157ca1b20735b7","target":"graph","created_at":"2026-06-11T01:09:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2509.10303/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Online reinforcement learning (RL) approaches have demonstrated strong performance on Job Shop Scheduling (JSP) and Flexible JSP (FJSP) problems by learning scheduling policies through direct interaction with simulated environments. However, these methods often require extensive training interactions, limiting their sample efficiency and practical applicability. Motivated by this challenge, we introduce Conservative Discrete Quantile Actor-Critic (CDQAC), an offline RL algorithm that learns effective scheduling policies directly from static, suboptimal datasets. CDQAC couples a quantile-based ","authors_text":"Jesse van Remmerden, Yingqian Zhang, Zaharah Bukhsh","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-12T14:45:39Z","title":"Generalizing Beyond Suboptimality: Offline Reinforcement Learning Learns Effective Scheduling through Random Solutions"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.10303","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:46d85a1b9459f22a1a4798105a62bf762564a3ed7fe90c1030e718c61472e131","target":"record","created_at":"2026-06-11T01:09:16Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6c3b323a9053a2ba2bf31e27e2e610949b35abb757982220352983f8293719b9","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2025-09-12T14:45:39Z","title_canon_sha256":"00d39b53d4f44b2e65d180d0444a56bbcc445f4e423e910cc4e9167d13a97ecd"},"schema_version":"1.0","source":{"id":"2509.10303","kind":"arxiv","version":2}},"canonical_sha256":"bffeab955c0bec8e2011545f9de41c59729149d2fc747dd3a7b6ffcee5b9999f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"bffeab955c0bec8e2011545f9de41c59729149d2fc747dd3a7b6ffcee5b9999f","first_computed_at":"2026-06-11T01:09:16.865339Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-11T01:09:16.865339Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"COin0ASwjYwwLyx8b9yYq2ZKmoFoQSbGkb0oG1zQKNbQreDJ2tMF67eJg6We8A6YfnghqNZI9jgyfPXWHQM/AA==","signature_status":"signed_v1","signed_at":"2026-06-11T01:09:16.866452Z","signed_message":"canonical_sha256_bytes"},"source_id":"2509.10303","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:46d85a1b9459f22a1a4798105a62bf762564a3ed7fe90c1030e718c61472e131","sha256:827fbeee2ddbdc0319f31f086df81c9db8f8f866019e033381157ca1b20735b7"],"state_sha256":"92f5e22d7e399d54c0ae0b6cc16a3b693aae5e86d53ec8ae9fe83c7d99bf54a1"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"G2XD1gVsSrsxjQVZ4BpsL5/uJg0EcHWKjMSXiJfKOsBjvE5hv9MQmK6d4rERyCXP1MNrSgMJyCGhkQ61ASCNAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-27T21:34:35.871283Z","bundle_sha256":"00713fa881006f78accdb901235ca45fe6d06b557a2698ce7983f9a7ad290379"}}