{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:TGDK2E7XCF77PL4VVZOEUITQSF","short_pith_number":"pith:TGDK2E7X","schema_version":"1.0","canonical_sha256":"9986ad13f7117ff7af95ae5c4a2270917fa629f3017a151f73243af8cf062cc3","source":{"kind":"arxiv","id":"2605.16345","version":1},"attestation_state":"computed","paper":{"title":"Goal-Conditioned Supervised Learning for LLM Fine-Tuning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Joydeep Ghosh, Kaiwen Dong, Shijun Li, Xiang Gao","submitted_at":"2026-05-08T01:55:40Z","abstract_excerpt":"Large language models often require fine-tuning to better align their behavior with user intent at deployment. Existing approaches are commonly divided into online and offline paradigms. Online methods, such as RL-based alignment, can directly optimize outcome quality but typically rely on external reward models and iterative rollouts, making them costly and difficult to deploy in many cases. Offline methods are more efficient, but prevailing approaches such as supervised fine-tuning (SFT) and direct preference optimization (DPO) remain limited: SFT typically collapses graded feedback into bin"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.16345","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-05-08T01:55:40Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"fedd3dd4fc49638e79a9146c2beea11caa36b7df5fafb353f69fb128b36faaad","abstract_canon_sha256":"e1e8a16044508a65fbf190015b64c94a7dde539089c8c288c292bdf9c251de3a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:02:17.710812Z","signature_b64":"wooEfm3bj1ZcplRGzRLqMb7bbEd1VXE4weAqq5Q/2j8f5OeODSzfj5gNCsGxIyJCNTQJnsGj1ipErkuMdwuVCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"9986ad13f7117ff7af95ae5c4a2270917fa629f3017a151f73243af8cf062cc3","last_reissued_at":"2026-05-20T00:02:17.710247Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:02:17.710247Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Goal-Conditioned Supervised Learning for LLM Fine-Tuning","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Joydeep Ghosh, Kaiwen Dong, Shijun Li, Xiang Gao","submitted_at":"2026-05-08T01:55:40Z","abstract_excerpt":"Large language models often require fine-tuning to better align their behavior with user intent at deployment. Existing approaches are commonly divided into online and offline paradigms. Online methods, such as RL-based alignment, can directly optimize outcome quality but typically rely on external reward models and iterative rollouts, making them costly and difficult to deploy in many cases. Offline methods are more efficient, but prevailing approaches such as supervised fine-tuning (SFT) and direct preference optimization (DPO) remain limited: SFT typically collapses graded feedback into bin"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.16345","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16345/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.16345","created_at":"2026-05-20T00:02:17.710329+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.16345v1","created_at":"2026-05-20T00:02:17.710329+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16345","created_at":"2026-05-20T00:02:17.710329+00:00"},{"alias_kind":"pith_short_12","alias_value":"TGDK2E7XCF77","created_at":"2026-05-20T00:02:17.710329+00:00"},{"alias_kind":"pith_short_16","alias_value":"TGDK2E7XCF77PL4V","created_at":"2026-05-20T00:02:17.710329+00:00"},{"alias_kind":"pith_short_8","alias_value":"TGDK2E7X","created_at":"2026-05-20T00:02:17.710329+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF","json":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF.json","graph_json":"https://pith.science/api/pith-number/TGDK2E7XCF77PL4VVZOEUITQSF/graph.json","events_json":"https://pith.science/api/pith-number/TGDK2E7XCF77PL4VVZOEUITQSF/events.json","paper":"https://pith.science/paper/TGDK2E7X"},"agent_actions":{"view_html":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF","download_json":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF.json","view_paper":"https://pith.science/paper/TGDK2E7X","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.16345&json=true","fetch_graph":"https://pith.science/api/pith-number/TGDK2E7XCF77PL4VVZOEUITQSF/graph.json","fetch_events":"https://pith.science/api/pith-number/TGDK2E7XCF77PL4VVZOEUITQSF/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF/action/timestamp_anchor","attest_storage":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF/action/storage_attestation","attest_author":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF/action/author_attestation","sign_citation":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF/action/citation_signature","submit_replication":"https://pith.science/pith/TGDK2E7XCF77PL4VVZOEUITQSF/action/replication_record"}},"created_at":"2026-05-20T00:02:17.710329+00:00","updated_at":"2026-05-20T00:02:17.710329+00:00"}