{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:MRGE5JZRLJGBWCIN6DQL6CANTS","short_pith_number":"pith:MRGE5JZR","schema_version":"1.0","canonical_sha256":"644c4ea7315a4c1b090df0e0bf080d9ca7d16029729c64dab76b134dc0789ddd","source":{"kind":"arxiv","id":"2607.01897","version":1},"attestation_state":"computed","paper":{"title":"Rank-Then-Act: Reward-Free Control from Frame-Order Progress","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Daniil Gavrilov, George Bredis, Ruslan Rakhimov, Yuriy Maksyuta","submitted_at":"2026-07-02T08:50:32Z","abstract_excerpt":"We introduce Rank-Then-Act (RTA), a framework for learning control policies from expert video demonstrations without environment rewards. RTA trains a Vision-Language Model (VLM) offline as a progress-based ordinal scorer, using a Group Relative Policy Optimization (GRPO) objective over shuffled frame sequences, which forces the model to recover temporal ordering from visual semantics rather than trivial time cues. Importantly, instead of using the scorer directly as a scalar reward model, we propose a correlation-based reward function for reinforcement learning: at each interaction window, we"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2607.01897","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.LG","submitted_at":"2026-07-02T08:50:32Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"bb872993e79483e4da9b38649a06f3b594772a51c5977416a8e1dd35ba1d263c","abstract_canon_sha256":"5efa47e652e41a2978ecdd3370397c58355de985fedd98584cb455a01f6a3f90"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T01:17:32.809664Z","signature_b64":"h0MxLuHP2T+bThYWFeGKcXCcQlCq0EMNCWAUE/H7ZVoty+gtMF8ArCO6y4uHNvN8RSY8lq4f0y6NJ0FLDVXMCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"644c4ea7315a4c1b090df0e0bf080d9ca7d16029729c64dab76b134dc0789ddd","last_reissued_at":"2026-07-03T01:17:32.809281Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T01:17:32.809281Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Rank-Then-Act: Reward-Free Control from Frame-Order Progress","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.LG","authors_text":"Daniil Gavrilov, George Bredis, Ruslan Rakhimov, Yuriy Maksyuta","submitted_at":"2026-07-02T08:50:32Z","abstract_excerpt":"We introduce Rank-Then-Act (RTA), a framework for learning control policies from expert video demonstrations without environment rewards. RTA trains a Vision-Language Model (VLM) offline as a progress-based ordinal scorer, using a Group Relative Policy Optimization (GRPO) objective over shuffled frame sequences, which forces the model to recover temporal ordering from visual semantics rather than trivial time cues. Importantly, instead of using the scorer directly as a scalar reward model, we propose a correlation-based reward function for reinforcement learning: at each interaction window, we"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.01897","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.01897/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2607.01897","created_at":"2026-07-03T01:17:32.809339+00:00"},{"alias_kind":"arxiv_version","alias_value":"2607.01897v1","created_at":"2026-07-03T01:17:32.809339+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.01897","created_at":"2026-07-03T01:17:32.809339+00:00"},{"alias_kind":"pith_short_12","alias_value":"MRGE5JZRLJGB","created_at":"2026-07-03T01:17:32.809339+00:00"},{"alias_kind":"pith_short_16","alias_value":"MRGE5JZRLJGBWCIN","created_at":"2026-07-03T01:17:32.809339+00:00"},{"alias_kind":"pith_short_8","alias_value":"MRGE5JZR","created_at":"2026-07-03T01:17:32.809339+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS","json":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS.json","graph_json":"https://pith.science/api/pith-number/MRGE5JZRLJGBWCIN6DQL6CANTS/graph.json","events_json":"https://pith.science/api/pith-number/MRGE5JZRLJGBWCIN6DQL6CANTS/events.json","paper":"https://pith.science/paper/MRGE5JZR"},"agent_actions":{"view_html":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS","download_json":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS.json","view_paper":"https://pith.science/paper/MRGE5JZR","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2607.01897&json=true","fetch_graph":"https://pith.science/api/pith-number/MRGE5JZRLJGBWCIN6DQL6CANTS/graph.json","fetch_events":"https://pith.science/api/pith-number/MRGE5JZRLJGBWCIN6DQL6CANTS/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS/action/timestamp_anchor","attest_storage":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS/action/storage_attestation","attest_author":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS/action/author_attestation","sign_citation":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS/action/citation_signature","submit_replication":"https://pith.science/pith/MRGE5JZRLJGBWCIN6DQL6CANTS/action/replication_record"}},"created_at":"2026-07-03T01:17:32.809339+00:00","updated_at":"2026-07-03T01:17:32.809339+00:00"}