{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:VJ25TUZ43NOQDIOQ3JZSSBGGPP","short_pith_number":"pith:VJ25TUZ4","schema_version":"1.0","canonical_sha256":"aa75d9d33cdb5d01a1d0da732904c67bdd82f6604bca2ae27ac0b6928b59cc02","source":{"kind":"arxiv","id":"2509.23352","version":3},"attestation_state":"computed","paper":{"title":"Dynamic-TreeRPO: Breaking the Independent Trajectory Bottleneck with Structured Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Gaojing Zhou, Jason Li, Jingling Fu, Junshi Huang, Lan Yang, Lichen Ma, ShiPing Dong, Shizhe Zhou, Tan Lit Sin, Xiaolong Fu, Yu He, Zipeng Guo","submitted_at":"2025-09-27T14:59:31Z","abstract_excerpt":"The integration of Reinforcement Learning (RL) into flow matching models for text-to-image (T2I) generation has driven substantial advances in generation quality. However, these gains often come at the cost of exhaustive exploration and inefficient sampling strategies due to slight variation in the sampling group. Building on this insight, we propose Dynamic-TreeRPO, which implements the sliding-window sampling strategy as a tree-structured search with dynamic noise intensities along depth. We perform GRPO-guided optimization and constrained Stochastic Differential Equation (SDE) sampling with"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2509.23352","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2025-09-27T14:59:31Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"41da28a7078b0940c7a472f31457b76e22686d3d4f85503cd0536e9ac0588cf5","abstract_canon_sha256":"fcb185573ad443949b6e748bcff22e45f8ba5c958cb04e6cc134f1a6c7b20199"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:00:24.788200Z","signature_b64":"qXy6ScFIR9lh6pK4QWR/6ZajbDxqNBABEVHRirOalS8lNxCsMU91vsaSszVKHoa3GfeovgyX5KlV+ieCFjtFCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aa75d9d33cdb5d01a1d0da732904c67bdd82f6604bca2ae27ac0b6928b59cc02","last_reissued_at":"2026-05-20T00:00:24.787307Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:00:24.787307Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Dynamic-TreeRPO: Breaking the Independent Trajectory Bottleneck with Structured Sampling","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CV","authors_text":"Gaojing Zhou, Jason Li, Jingling Fu, Junshi Huang, Lan Yang, Lichen Ma, ShiPing Dong, Shizhe Zhou, Tan Lit Sin, Xiaolong Fu, Yu He, Zipeng Guo","submitted_at":"2025-09-27T14:59:31Z","abstract_excerpt":"The integration of Reinforcement Learning (RL) into flow matching models for text-to-image (T2I) generation has driven substantial advances in generation quality. However, these gains often come at the cost of exhaustive exploration and inefficient sampling strategies due to slight variation in the sampling group. Building on this insight, we propose Dynamic-TreeRPO, which implements the sliding-window sampling strategy as a tree-structured search with dynamic noise intensities along depth. We perform GRPO-guided optimization and constrained Stochastic Differential Equation (SDE) sampling with"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2509.23352","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2509.23352/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2509.23352","created_at":"2026-05-20T00:00:24.787474+00:00"},{"alias_kind":"arxiv_version","alias_value":"2509.23352v3","created_at":"2026-05-20T00:00:24.787474+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2509.23352","created_at":"2026-05-20T00:00:24.787474+00:00"},{"alias_kind":"pith_short_12","alias_value":"VJ25TUZ43NOQ","created_at":"2026-05-20T00:00:24.787474+00:00"},{"alias_kind":"pith_short_16","alias_value":"VJ25TUZ43NOQDIOQ","created_at":"2026-05-20T00:00:24.787474+00:00"},{"alias_kind":"pith_short_8","alias_value":"VJ25TUZ4","created_at":"2026-05-20T00:00:24.787474+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":6,"internal_anchor_count":6,"sample":[{"citing_arxiv_id":"2605.10983","citing_title":"TMPO: Trajectory Matching Policy Optimization for Diverse and Efficient Diffusion Alignment","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10983","citing_title":"TMPO: Trajectory Matching Policy Optimization for Diverse and Efficient Diffusion Alignment","ref_index":27,"is_internal_anchor":true},{"citing_arxiv_id":"2605.12112","citing_title":"When Policy Entropy Constraint Fails: Preserving Diversity in Flow-based RLHF via Perceptual Entropy","ref_index":21,"is_internal_anchor":true},{"citing_arxiv_id":"2605.10937","citing_title":"Power Reinforcement Post-Training of Text-to-Image Models with Super-Linear Advantage Shaping","ref_index":106,"is_internal_anchor":true},{"citing_arxiv_id":"2604.06916","citing_title":"FP4 Explore, BF16 Train: Diffusion Reinforcement Learning via Efficient Rollout Scaling","ref_index":40,"is_internal_anchor":true},{"citing_arxiv_id":"2605.02913","citing_title":"Generate, Filter, Control, Replay: A Comprehensive Survey of Rollout Strategies for LLM Reinforcement Learning","ref_index":25,"is_internal_anchor":true}]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP","json":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP.json","graph_json":"https://pith.science/api/pith-number/VJ25TUZ43NOQDIOQ3JZSSBGGPP/graph.json","events_json":"https://pith.science/api/pith-number/VJ25TUZ43NOQDIOQ3JZSSBGGPP/events.json","paper":"https://pith.science/paper/VJ25TUZ4"},"agent_actions":{"view_html":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP","download_json":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP.json","view_paper":"https://pith.science/paper/VJ25TUZ4","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2509.23352&json=true","fetch_graph":"https://pith.science/api/pith-number/VJ25TUZ43NOQDIOQ3JZSSBGGPP/graph.json","fetch_events":"https://pith.science/api/pith-number/VJ25TUZ43NOQDIOQ3JZSSBGGPP/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP/action/timestamp_anchor","attest_storage":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP/action/storage_attestation","attest_author":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP/action/author_attestation","sign_citation":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP/action/citation_signature","submit_replication":"https://pith.science/pith/VJ25TUZ43NOQDIOQ3JZSSBGGPP/action/replication_record"}},"created_at":"2026-05-20T00:00:24.787474+00:00","updated_at":"2026-05-20T00:00:24.787474+00:00"}