{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:DKBEQ3MRB7QY3T3QNRG3GWM6IQ","short_pith_number":"pith:DKBEQ3MR","schema_version":"1.0","canonical_sha256":"1a82486d910fe18dcf706c4db3599e441f510da070d71cb3b2d4b0bb38f98eb5","source":{"kind":"arxiv","id":"2606.27939","version":1},"attestation_state":"computed","paper":{"title":"Two-Stage Fine-Tuning for Protein Sequence Generation with Targeted Amino-Acid Composition","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","q-bio.BM","q-bio.GN"],"primary_cat":"cs.LG","authors_text":"Anna Mar\\'ia D\\'iaz-Rovira, Bertran Miquel-Oliver, Isaac Filella-Merce, Rub\\'en Mu\\~noz-Tafalla, V\\'ictor Guallar, Violeta Basten-Romero","submitted_at":"2026-06-26T10:29:42Z","abstract_excerpt":"Protein language models are standard priors for biological sequence generation, but steering them toward explicit distributional design targets remains largely unexplored. We study a constrained protein generation problem in which sequences must match a desired amino-acid (AA) composition profile while preserving plausible sequence statistics and diversity. The motivating application is synthetic feed protein design, where the AA composition of dietary proteins directly determines their nutritional value. We propose a two-stage pipeline in which domain-adaptive fine-tuning (FT) on an in-domain"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.27939","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.LG","submitted_at":"2026-06-26T10:29:42Z","cross_cats_sorted":["cs.AI","q-bio.BM","q-bio.GN"],"title_canon_sha256":"5f53a2569197744ec986f7b6bf395525b39623363833c9bb8e0ef0ca406411d9","abstract_canon_sha256":"2426908df672268b29f0e18c765c375a3e1dee47b940021a7edb036dbbba32aa"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-29T01:14:53.288399Z","signature_b64":"WAbEqv16wDboFYXivo55Ny7RvqB3KgJtDE64ZuhfklrVz+391HFz0wlR7ciHsFP7JgmwZOCAkxNTfPVhCZtkCQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"1a82486d910fe18dcf706c4db3599e441f510da070d71cb3b2d4b0bb38f98eb5","last_reissued_at":"2026-06-29T01:14:53.287873Z","signature_status":"signed_v1","first_computed_at":"2026-06-29T01:14:53.287873Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Two-Stage Fine-Tuning for Protein Sequence Generation with Targeted Amino-Acid Composition","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":["cs.AI","q-bio.BM","q-bio.GN"],"primary_cat":"cs.LG","authors_text":"Anna Mar\\'ia D\\'iaz-Rovira, Bertran Miquel-Oliver, Isaac Filella-Merce, Rub\\'en Mu\\~noz-Tafalla, V\\'ictor Guallar, Violeta Basten-Romero","submitted_at":"2026-06-26T10:29:42Z","abstract_excerpt":"Protein language models are standard priors for biological sequence generation, but steering them toward explicit distributional design targets remains largely unexplored. We study a constrained protein generation problem in which sequences must match a desired amino-acid (AA) composition profile while preserving plausible sequence statistics and diversity. The motivating application is synthetic feed protein design, where the AA composition of dietary proteins directly determines their nutritional value. We propose a two-stage pipeline in which domain-adaptive fine-tuning (FT) on an in-domain"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.27939","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.27939/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.27939","created_at":"2026-06-29T01:14:53.287922+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.27939v1","created_at":"2026-06-29T01:14:53.287922+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.27939","created_at":"2026-06-29T01:14:53.287922+00:00"},{"alias_kind":"pith_short_12","alias_value":"DKBEQ3MRB7QY","created_at":"2026-06-29T01:14:53.287922+00:00"},{"alias_kind":"pith_short_16","alias_value":"DKBEQ3MRB7QY3T3Q","created_at":"2026-06-29T01:14:53.287922+00:00"},{"alias_kind":"pith_short_8","alias_value":"DKBEQ3MR","created_at":"2026-06-29T01:14:53.287922+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ","json":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ.json","graph_json":"https://pith.science/api/pith-number/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/graph.json","events_json":"https://pith.science/api/pith-number/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/events.json","paper":"https://pith.science/paper/DKBEQ3MR"},"agent_actions":{"view_html":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ","download_json":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ.json","view_paper":"https://pith.science/paper/DKBEQ3MR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.27939&json=true","fetch_graph":"https://pith.science/api/pith-number/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/graph.json","fetch_events":"https://pith.science/api/pith-number/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/action/storage_attestation","attest_author":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/action/author_attestation","sign_citation":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/action/citation_signature","submit_replication":"https://pith.science/pith/DKBEQ3MRB7QY3T3QNRG3GWM6IQ/action/replication_record"}},"created_at":"2026-06-29T01:14:53.287922+00:00","updated_at":"2026-06-29T01:14:53.287922+00:00"}