{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2019:4LGG7UJLYM4VCLADZ7TXEKOEBM","short_pith_number":"pith:4LGG7UJL","canonical_record":{"source":{"id":"1906.06639","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-16T03:28:24Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"0469d0231f24c8cdb6c7660ad4e88ecdecb04a0845ca5f00380696e89517d30d","abstract_canon_sha256":"be4f0a7900a57f190230ec25d5319445c1de4cfa45ccb9abfd937869fcb63673"},"schema_version":"1.0"},"canonical_sha256":"e2cc6fd12bc339512c03cfe77229c40b3b7f1675393b5ff10254b30c2fb15929","source":{"kind":"arxiv","id":"1906.06639","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.06639","created_at":"2026-05-17T23:43:13Z"},{"alias_kind":"arxiv_version","alias_value":"1906.06639v1","created_at":"2026-05-17T23:43:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.06639","created_at":"2026-05-17T23:43:13Z"},{"alias_kind":"pith_short_12","alias_value":"4LGG7UJLYM4V","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"4LGG7UJLYM4VCLAD","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"4LGG7UJL","created_at":"2026-05-18T12:33:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2019:4LGG7UJLYM4VCLADZ7TXEKOEBM","target":"record","payload":{"canonical_record":{"source":{"id":"1906.06639","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-16T03:28:24Z","cross_cats_sorted":["stat.ML"],"title_canon_sha256":"0469d0231f24c8cdb6c7660ad4e88ecdecb04a0845ca5f00380696e89517d30d","abstract_canon_sha256":"be4f0a7900a57f190230ec25d5319445c1de4cfa45ccb9abfd937869fcb63673"},"schema_version":"1.0"},"canonical_sha256":"e2cc6fd12bc339512c03cfe77229c40b3b7f1675393b5ff10254b30c2fb15929","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-17T23:43:13.089899Z","signature_b64":"45gnwdbxzZcl2IwPKyZ0UYz8wymZx29m+dqrJ50EDtpjSYGg+yw7k4W1tG/IUc1b3pxqKpZMgS/W6/6I0GQrCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"e2cc6fd12bc339512c03cfe77229c40b3b7f1675393b5ff10254b30c2fb15929","last_reissued_at":"2026-05-17T23:43:13.089382Z","signature_status":"signed_v1","first_computed_at":"2026-05-17T23:43:13.089382Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1906.06639","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pDfe5Jy6IItdV6UjaBmlAmwrozmM0fXWCZlB6k0v8Rz3pCaq3CVKZOItx46CXWPZaFD0Aa7C/GJ1mnQR6iwsDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T16:24:17.283903Z"},"content_sha256":"87c7cfcd7a52f7a16ee768b121a902be83bf582176b74bce9d8d94a609db1c9f","schema_version":"1.0","event_id":"sha256:87c7cfcd7a52f7a16ee768b121a902be83bf582176b74bce9d8d94a609db1c9f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2019:4LGG7UJLYM4VCLADZ7TXEKOEBM","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Reinforcement Learning Driven Heuristic Optimization","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ML"],"primary_cat":"cs.LG","authors_text":"Azalia Mirhoseini, George Tucker, Jingtao Wang, Qingpeng Cai, Wei Wei, Will Hang","submitted_at":"2019-06-16T03:28:24Z","abstract_excerpt":"Heuristic algorithms such as simulated annealing, Concorde, and METIS are effective and widely used approaches to find solutions to combinatorial optimization problems. However, they are limited by the high sample complexity required to reach a reasonable solution from a cold-start. In this paper, we introduce a novel framework to generate better initial solutions for heuristic algorithms using reinforcement learning (RL), named RLHO. We augment the ability of heuristic algorithms to greedily improve upon an existing initial solution generated by RL, and demonstrate novel results where RL is a"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.06639","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-17T23:43:13Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SVrWeltPUzQZI9xw+cQUS4SKVR+MErlf/idBL5UfQqvP9UxUWg5WQiDTDj0HZ8yWLl3r/HPpCymXJd5Vi+gFBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T16:24:17.284634Z"},"content_sha256":"d466c1cee44334a061b749ab63a4036be61cff5eacca17d0026608bfa7d8272c","schema_version":"1.0","event_id":"sha256:d466c1cee44334a061b749ab63a4036be61cff5eacca17d0026608bfa7d8272c"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM/bundle.json","state_url":"https://pith.science/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T16:24:17Z","links":{"resolver":"https://pith.science/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM","bundle":"https://pith.science/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM/bundle.json","state":"https://pith.science/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM/state.json","well_known_bundle":"https://pith.science/.well-known/pith/4LGG7UJLYM4VCLADZ7TXEKOEBM/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2019:4LGG7UJLYM4VCLADZ7TXEKOEBM","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"be4f0a7900a57f190230ec25d5319445c1de4cfa45ccb9abfd937869fcb63673","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-16T03:28:24Z","title_canon_sha256":"0469d0231f24c8cdb6c7660ad4e88ecdecb04a0845ca5f00380696e89517d30d"},"schema_version":"1.0","source":{"id":"1906.06639","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1906.06639","created_at":"2026-05-17T23:43:13Z"},{"alias_kind":"arxiv_version","alias_value":"1906.06639v1","created_at":"2026-05-17T23:43:13Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1906.06639","created_at":"2026-05-17T23:43:13Z"},{"alias_kind":"pith_short_12","alias_value":"4LGG7UJLYM4V","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_16","alias_value":"4LGG7UJLYM4VCLAD","created_at":"2026-05-18T12:33:10Z"},{"alias_kind":"pith_short_8","alias_value":"4LGG7UJL","created_at":"2026-05-18T12:33:10Z"}],"graph_snapshots":[{"event_id":"sha256:d466c1cee44334a061b749ab63a4036be61cff5eacca17d0026608bfa7d8272c","target":"graph","created_at":"2026-05-17T23:43:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Heuristic algorithms such as simulated annealing, Concorde, and METIS are effective and widely used approaches to find solutions to combinatorial optimization problems. However, they are limited by the high sample complexity required to reach a reasonable solution from a cold-start. In this paper, we introduce a novel framework to generate better initial solutions for heuristic algorithms using reinforcement learning (RL), named RLHO. We augment the ability of heuristic algorithms to greedily improve upon an existing initial solution generated by RL, and demonstrate novel results where RL is a","authors_text":"Azalia Mirhoseini, George Tucker, Jingtao Wang, Qingpeng Cai, Wei Wei, Will Hang","cross_cats":["stat.ML"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-16T03:28:24Z","title":"Reinforcement Learning Driven Heuristic Optimization"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1906.06639","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:87c7cfcd7a52f7a16ee768b121a902be83bf582176b74bce9d8d94a609db1c9f","target":"record","created_at":"2026-05-17T23:43:13Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"be4f0a7900a57f190230ec25d5319445c1de4cfa45ccb9abfd937869fcb63673","cross_cats_sorted":["stat.ML"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.LG","submitted_at":"2019-06-16T03:28:24Z","title_canon_sha256":"0469d0231f24c8cdb6c7660ad4e88ecdecb04a0845ca5f00380696e89517d30d"},"schema_version":"1.0","source":{"id":"1906.06639","kind":"arxiv","version":1}},"canonical_sha256":"e2cc6fd12bc339512c03cfe77229c40b3b7f1675393b5ff10254b30c2fb15929","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"e2cc6fd12bc339512c03cfe77229c40b3b7f1675393b5ff10254b30c2fb15929","first_computed_at":"2026-05-17T23:43:13.089382Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-17T23:43:13.089382Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"45gnwdbxzZcl2IwPKyZ0UYz8wymZx29m+dqrJ50EDtpjSYGg+yw7k4W1tG/IUc1b3pxqKpZMgS/W6/6I0GQrCg==","signature_status":"signed_v1","signed_at":"2026-05-17T23:43:13.089899Z","signed_message":"canonical_sha256_bytes"},"source_id":"1906.06639","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:87c7cfcd7a52f7a16ee768b121a902be83bf582176b74bce9d8d94a609db1c9f","sha256:d466c1cee44334a061b749ab63a4036be61cff5eacca17d0026608bfa7d8272c"],"state_sha256":"3f2cb0780e9ec9bb97259d424a81aaab54605e5d410f31fb4c3093dc0feb76e6"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kRwTunGbL0GktT5kZdpj7zVXDaHZJG4OvCwjBmrr47VmI1TFY33/OzbS+xJ+YIg55jZ1rDRZXXC1QtN7lO04Dw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T16:24:17.288358Z","bundle_sha256":"e26883a9f1b1d49dee0fde5dfd25eea76c29ed909ed4a955bf0d1cfbf5828a04"}}