{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:YCSRNBBUHZSA47XIK6CUGYGEKK","short_pith_number":"pith:YCSRNBBU","canonical_record":{"source":{"id":"2605.24743","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:34:59Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"7c65065ddd1c8e2ae50066176878ed6df00867c11963988cd1a9c32d969b6c49","abstract_canon_sha256":"5c6e1cd5d7b600febdab5763595561afa4fbfdf34ff9de42fe6f43a13a31903d"},"schema_version":"1.0"},"canonical_sha256":"c0a51684343e640e7ee857854360c452ae24003fce9682bb8e61fd9f60ab36a1","source":{"kind":"arxiv","id":"2605.24743","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.24743","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.24743v1","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24743","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_12","alias_value":"YCSRNBBUHZSA","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_16","alias_value":"YCSRNBBUHZSA47XI","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_8","alias_value":"YCSRNBBU","created_at":"2026-05-26T01:03:55Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:YCSRNBBUHZSA47XIK6CUGYGEKK","target":"record","payload":{"canonical_record":{"source":{"id":"2605.24743","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:34:59Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"7c65065ddd1c8e2ae50066176878ed6df00867c11963988cd1a9c32d969b6c49","abstract_canon_sha256":"5c6e1cd5d7b600febdab5763595561afa4fbfdf34ff9de42fe6f43a13a31903d"},"schema_version":"1.0"},"canonical_sha256":"c0a51684343e640e7ee857854360c452ae24003fce9682bb8e61fd9f60ab36a1","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T01:03:55.973608Z","signature_b64":"I5e1JZDwqJ/wR/ioXfoO2yv5WgE5QvbLy1OJUbDlOp8W/uCeuguQTrgZfiCDuusTBlrTrToFg8UX5UexWp74Dw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"c0a51684343e640e7ee857854360c452ae24003fce9682bb8e61fd9f60ab36a1","last_reissued_at":"2026-05-26T01:03:55.972749Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T01:03:55.972749Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2605.24743","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LAknDhBEODUp/L0aE3SHVtu+POTck/fx4METGN1V5j3s3Jot0zZ8ef84l+gABInm19X4mW5zk/shGwRES5fmCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T03:55:13.141207Z"},"content_sha256":"1648a88e6da079682d933032bed77efad49a02b9c4eaf137b5cfa92f53071047","schema_version":"1.0","event_id":"sha256:1648a88e6da079682d933032bed77efad49a02b9c4eaf137b5cfa92f53071047"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:YCSRNBBUHZSA47XIK6CUGYGEKK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Bilevel Optimization of Synthetic Trajectories for Multi-Turn LLM Fine-Tuning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Cheol Woo Kim, Kai Wang, Mauricio Tec, Milind Tambe, Shresth Verma","submitted_at":"2026-05-23T21:34:59Z","abstract_excerpt":"While LLMs excel at single-turn generation, they struggle with long-horizon, multi-turn interactions. Offline reinforcement learning (RL) offers a scalable approach, yet its performance hinges on the availability and quality of multi-turn trajectory data. A common remedy is to augment training with synthetic trajectories generated by LLMs or simulators, but synthetic data is highly heterogeneous in quality, and naively treating all trajectories as equally informative can degrade performance. We propose BOOST, a bilevel optimization framework where the inner level trains the LLM on reweighted d"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24743","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.24743/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T01:03:55Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"1aToeDLT9Zc22edhZO2rLoXiBEJ5noDMWnFFjCUhuWfw0Fe9+PkP40+ZLf5PYQSNYxkkSv26LM4S2u+j6sAkCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-02T03:55:13.141596Z"},"content_sha256":"2ddded1ad004834e07201d15ed4034bf30a13f73bf4c02be2498ddf6208bbd35","schema_version":"1.0","event_id":"sha256:2ddded1ad004834e07201d15ed4034bf30a13f73bf4c02be2498ddf6208bbd35"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/YCSRNBBUHZSA47XIK6CUGYGEKK/bundle.json","state_url":"https://pith.science/pith/YCSRNBBUHZSA47XIK6CUGYGEKK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/YCSRNBBUHZSA47XIK6CUGYGEKK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-02T03:55:13Z","links":{"resolver":"https://pith.science/pith/YCSRNBBUHZSA47XIK6CUGYGEKK","bundle":"https://pith.science/pith/YCSRNBBUHZSA47XIK6CUGYGEKK/bundle.json","state":"https://pith.science/pith/YCSRNBBUHZSA47XIK6CUGYGEKK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/YCSRNBBUHZSA47XIK6CUGYGEKK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:YCSRNBBUHZSA47XIK6CUGYGEKK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"5c6e1cd5d7b600febdab5763595561afa4fbfdf34ff9de42fe6f43a13a31903d","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:34:59Z","title_canon_sha256":"7c65065ddd1c8e2ae50066176878ed6df00867c11963988cd1a9c32d969b6c49"},"schema_version":"1.0","source":{"id":"2605.24743","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.24743","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"arxiv_version","alias_value":"2605.24743v1","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.24743","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_12","alias_value":"YCSRNBBUHZSA","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_16","alias_value":"YCSRNBBUHZSA47XI","created_at":"2026-05-26T01:03:55Z"},{"alias_kind":"pith_short_8","alias_value":"YCSRNBBU","created_at":"2026-05-26T01:03:55Z"}],"graph_snapshots":[{"event_id":"sha256:2ddded1ad004834e07201d15ed4034bf30a13f73bf4c02be2498ddf6208bbd35","target":"graph","created_at":"2026-05-26T01:03:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2605.24743/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"While LLMs excel at single-turn generation, they struggle with long-horizon, multi-turn interactions. Offline reinforcement learning (RL) offers a scalable approach, yet its performance hinges on the availability and quality of multi-turn trajectory data. A common remedy is to augment training with synthetic trajectories generated by LLMs or simulators, but synthetic data is highly heterogeneous in quality, and naively treating all trajectories as equally informative can degrade performance. We propose BOOST, a bilevel optimization framework where the inner level trains the LLM on reweighted d","authors_text":"Cheol Woo Kim, Kai Wang, Mauricio Tec, Milind Tambe, Shresth Verma","cross_cats":["cs.AI"],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:34:59Z","title":"Bilevel Optimization of Synthetic Trajectories for Multi-Turn LLM Fine-Tuning"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.24743","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:1648a88e6da079682d933032bed77efad49a02b9c4eaf137b5cfa92f53071047","target":"record","created_at":"2026-05-26T01:03:55Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"5c6e1cd5d7b600febdab5763595561afa4fbfdf34ff9de42fe6f43a13a31903d","cross_cats_sorted":["cs.AI"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-23T21:34:59Z","title_canon_sha256":"7c65065ddd1c8e2ae50066176878ed6df00867c11963988cd1a9c32d969b6c49"},"schema_version":"1.0","source":{"id":"2605.24743","kind":"arxiv","version":1}},"canonical_sha256":"c0a51684343e640e7ee857854360c452ae24003fce9682bb8e61fd9f60ab36a1","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"c0a51684343e640e7ee857854360c452ae24003fce9682bb8e61fd9f60ab36a1","first_computed_at":"2026-05-26T01:03:55.972749Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T01:03:55.972749Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"I5e1JZDwqJ/wR/ioXfoO2yv5WgE5QvbLy1OJUbDlOp8W/uCeuguQTrgZfiCDuusTBlrTrToFg8UX5UexWp74Dw==","signature_status":"signed_v1","signed_at":"2026-05-26T01:03:55.973608Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.24743","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:1648a88e6da079682d933032bed77efad49a02b9c4eaf137b5cfa92f53071047","sha256:2ddded1ad004834e07201d15ed4034bf30a13f73bf4c02be2498ddf6208bbd35"],"state_sha256":"c2710c1ddc49005e669676c1c38cd6beb87e6b857ae69ec15e5310655ef59b56"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"EWC41MicsK7d9XcH1k4szfz5ajnB43bRdOu/k/zL8bgiwDrceFHYxZ+Ad27tQiL7tTpwu5ZrqbJpp3Bo3SFvAQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-02T03:55:13.143626Z","bundle_sha256":"a1a0dd00d5c5c6554f35ddfe9e65cc182be5ad6edcc8195176dae0634f8d2a21"}}