{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2025:PRJTBIUV3327TF7JSOWHYKX5IO","short_pith_number":"pith:PRJTBIUV","schema_version":"1.0","canonical_sha256":"7c5330a295def5f997e993ac7c2afd43af2a9c85c0acafc68058d3718aa2491f","source":{"kind":"arxiv","id":"2511.12796","version":2},"attestation_state":"computed","paper":{"title":"Maximizing the efficiency of human feedback in AI alignment: a comparative analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.HC","authors_text":"Andreas Chouliaras, Dimitris Chatzopoulos","submitted_at":"2025-11-16T21:55:59Z","abstract_excerpt":"Reinforcement Learning from Human Feedback (RLHF) relies on preference modeling to align machine learning systems with human values, yet the popular approach of random pair sampling with Bradley-Terry modeling is statistically limited and inefficient under constrained annotation budgets. In this work, we explore alternative sampling and evaluation strategies for preference inference in RLHF, drawing inspiration from areas such as game theory, statistics, and social choice theory. Our best-performing method, Swiss InfoGain, employs a Swiss tournament system with a proxy mutual-information-gain "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2511.12796","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.HC","submitted_at":"2025-11-16T21:55:59Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"d76681166c3f1369424d641f875aa27c43a21fd1429c766a7e8d43a414087bcf","abstract_canon_sha256":"d6c6b234bf71feedca04ee177823cfead8fa851e05201be8b34675c694c0098a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:14:59.426119Z","signature_b64":"bZgHQdYKhH200BJk9vdRqjYAOVIQCr0vg1yp1Fbs+vLXVul9FFumyAPHXTjtWV8Qmh34nfjMHP/5pZUL8Wj8BA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7c5330a295def5f997e993ac7c2afd43af2a9c85c0acafc68058d3718aa2491f","last_reissued_at":"2026-06-24T01:14:59.425620Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:14:59.425620Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Maximizing the efficiency of human feedback in AI alignment: a comparative analysis","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.HC","authors_text":"Andreas Chouliaras, Dimitris Chatzopoulos","submitted_at":"2025-11-16T21:55:59Z","abstract_excerpt":"Reinforcement Learning from Human Feedback (RLHF) relies on preference modeling to align machine learning systems with human values, yet the popular approach of random pair sampling with Bradley-Terry modeling is statistically limited and inefficient under constrained annotation budgets. In this work, we explore alternative sampling and evaluation strategies for preference inference in RLHF, drawing inspiration from areas such as game theory, statistics, and social choice theory. Our best-performing method, Swiss InfoGain, employs a Swiss tournament system with a proxy mutual-information-gain "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2511.12796","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2511.12796/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2511.12796","created_at":"2026-06-24T01:14:59.425691+00:00"},{"alias_kind":"arxiv_version","alias_value":"2511.12796v2","created_at":"2026-06-24T01:14:59.425691+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2511.12796","created_at":"2026-06-24T01:14:59.425691+00:00"},{"alias_kind":"pith_short_12","alias_value":"PRJTBIUV3327","created_at":"2026-06-24T01:14:59.425691+00:00"},{"alias_kind":"pith_short_16","alias_value":"PRJTBIUV3327TF7J","created_at":"2026-06-24T01:14:59.425691+00:00"},{"alias_kind":"pith_short_8","alias_value":"PRJTBIUV","created_at":"2026-06-24T01:14:59.425691+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO","json":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO.json","graph_json":"https://pith.science/api/pith-number/PRJTBIUV3327TF7JSOWHYKX5IO/graph.json","events_json":"https://pith.science/api/pith-number/PRJTBIUV3327TF7JSOWHYKX5IO/events.json","paper":"https://pith.science/paper/PRJTBIUV"},"agent_actions":{"view_html":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO","download_json":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO.json","view_paper":"https://pith.science/paper/PRJTBIUV","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2511.12796&json=true","fetch_graph":"https://pith.science/api/pith-number/PRJTBIUV3327TF7JSOWHYKX5IO/graph.json","fetch_events":"https://pith.science/api/pith-number/PRJTBIUV3327TF7JSOWHYKX5IO/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO/action/timestamp_anchor","attest_storage":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO/action/storage_attestation","attest_author":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO/action/author_attestation","sign_citation":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO/action/citation_signature","submit_replication":"https://pith.science/pith/PRJTBIUV3327TF7JSOWHYKX5IO/action/replication_record"}},"created_at":"2026-06-24T01:14:59.425691+00:00","updated_at":"2026-06-24T01:14:59.425691+00:00"}