{"total":31,"items":[{"citing_arxiv_id":"2606.22748","ref_index":182,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"AI Fiction in the Wild","primary_cat":"cs.CL","submitted_at":"2026-06-22T01:29:16+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Analysis of 500k ChatGPT logs shows over one-third of conversations generate fiction, dominated by power users with repetitive and niche patterns.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2606.00869","ref_index":91,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Enhancing LLM Metacognition via Cognitive Pairwise Training","primary_cat":"cs.LG","submitted_at":"2026-05-30T19:53:19+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"CPT is introduced as a pairwise reasoning-trace comparison stage that improves the reasoning-metacognition trade-off over standard SFT+RL pipelines across model scales.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.20654","ref_index":18,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"REFLECTOR: Internalizing Step-wise Reflection against Indirect Jailbreak","primary_cat":"cs.LG","submitted_at":"2026-05-20T03:16:15+00:00","verdict":null,"verdict_confidence":null,"novelty_score":null,"formal_verification":null,"one_line_summary":null,"context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.14175","ref_index":41,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Grounded Continuation: A Linear-Time Runtime Verifier for LLM Conversations","primary_cat":"cs.AI","submitted_at":"2026-05-13T22:54:16+00:00","verdict":"CONDITIONAL","verdict_confidence":"MODERATE","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A hybrid LLM-symbolic verifier maintains a dependency graph over conversation turns classified into eight formal update operations, enabling linear-time groundedness checks and precise retraction propagation with a conflict-free guarantee.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.13339","ref_index":35,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Probing Persona-Dependent Preferences in Language Models","primary_cat":"cs.CL","submitted_at":"2026-05-13T10:57:37+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Linear probes on residual-stream activations identify a shared preference vector in LLMs that tracks choices across prompts and causally steers decisions even for anti-correlated personas.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.11093","ref_index":50,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Enabling Performant and Flexible Model-Internal Observability for LLM Inference","primary_cat":"cs.LG","submitted_at":"2026-05-11T18:01:36+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DMI-Lib delivers 0.4-6.8% overhead for offline batch LLM inference and ~6% for moderate online serving while exposing rich internal signals across backends, cutting latency overhead 2-15x versus prior observability baselines.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"7B for our microbenchmark. Our testbed consists of a compute node from an HPC center, equipped with an NVIDIA H100 80GB GPU and an Intel Xeon Platinum 8468 CPU, and a local GPU workstation for microbenchmarking, equipped with multiple NVIDIA GeForce RTX 4090 GPUs (24GB) and an AMD Ryzen Threadripper PRO 5955WX CPU. Datasets.We evaluate on ShareGPT [ 39] and WildChat-1M [50]. Their average prompt/output lengths are 211/549 and 246.9/400.2 tokens, respectively. Baselines.We compare DMI-Lib against several baselines that support internal-tensor extraction. • Python Callback with Flexible Hook Points.This class provides flexible obser- vation sites and can access intermediate tensors: (1)Torch Hooksin PyTorch pro- vide register_forward_pre_hook and register_forward_hook, which return in-"},{"citing_arxiv_id":"2605.09995","ref_index":59,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Annotations Mitigate Post-Training Mode Collapse","primary_cat":"cs.CL","submitted_at":"2026-05-11T05:11:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Annotation-anchored training reduces semantic diversity collapse in post-trained language models by a factor of six compared to standard supervised fine-tuning while preserving instruction-following and improving with scale.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.09808","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Quantifying the Utility of User Simulators for Building Collaborative LLM Assistants","primary_cat":"cs.CL","submitted_at":"2026-05-10T23:06:24+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Fine-tuned simulators grounded in real human data produce LLM assistants that win more often against real users than those trained against role-playing simulators.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"user utterances from WildChat [25, 34], a corpus of human-LLM conversations. For each simulator, we train one assistant via multi-turn RL, with rewards from LLM judges that score its interactions with the simulator. We evaluate the trained assistants in a user study, where human participants interact with assistants and provide pairwise preferences, and on WildBench [35], a held-out set of difficult real-world tasks from WildChat users. We find that the assistant trained with the role-playing LLM suffers from a severe train-test mismatch (Section 5). Although the assistant achieves higher rewards against its train-time partner (RPUSER), these gains fail to transfer to human users, yielding only a 50.6 ±4.2% win rate against the initial"},{"citing_arxiv_id":"2605.07284","ref_index":25,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Instruction Tuning Changes How Upstream State Conditions Late Readout: A Cross-Patching Diagnostic","primary_cat":"cs.LG","submitted_at":"2026-05-08T05:47:10+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"Instruction tuning makes late-layer computation depend more on the model's own post-trained upstream state than on base-model upstream state, producing a consistent +1.68 logit interaction effect across five model families.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"The checkpoints share architecture; Tulu adds special tokens, so the preflight validates identical raw prompt token IDs and rejects target tokens outside the shared base vocabulary. Table F.4: Tulu fixed-support stage sweep. 26 Tulu stage on fixed Base->Final support Interaction score % of Final score Native top-1 picks t_Final Base 0 by definition 0% 0.2% SFT +0.419 [+0.349, +0.491] 28.8% [25.5%, 31.8%] 56.9% DPO +1.216 [+1.090, +1.341] 83.6% [81.5%, 85.7%] 90.4% Final/RLVR +1.455 [+1.316, +1.606] 100% 99.1% Base interaction is zero by definition because Base is the reference checkpoint; the nonzero native top-1 rate reflects rare cases where the Base native readout still selects the final-token label under the fixed token contrast. The fixed-support label-swap null passes the same orientation test as the main factorial: the observed"},{"citing_arxiv_id":"2605.05777","ref_index":34,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Estimating the Black-box LLM Uncertainty with Distribution-Aligned Adversarial Distillation","primary_cat":"cs.CL","submitted_at":"2026-05-07T07:09:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"DisAAD trains a 1%-sized proxy model via adversarial distillation to quantify uncertainty in black-box LLMs by aligning with their output distributions.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.05678","ref_index":35,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Chain of Risk: Safety Failures in Large Reasoning Models and Mitigation via Adaptive Multi-Principle Steering","primary_cat":"cs.AI","submitted_at":"2026-05-07T05:12:56+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Reasoning traces in large reasoning models expose safety failures missed by final-answer checks, and adaptive multi-principle steering reduces unsafe content in both traces and answers while preserving task performance.","context_count":1,"top_context_role":"dataset","top_context_polarity":"use_dataset","context_text":"This setup lets us compare safety across stages, principles, models, and prompt sources, and also provides the scoring signal used by our mitigation method. 3.2 Data and Models Safety prompts.Our safety prompt benchmark aggregates prompts from multiple public harmful- ness and jailbreak datasets. The in-distribution prompt pool combines seven sources:WildChat[ 35], PKU-SafeRLHF[ 36],JailbreakV[ 37],HarmBench[ 14],BeaverTails[ 38],StrongREJECT[ 15], andJailbreakBench[ 16]. Together, these sources cover direct harmful requests, jailbreaks, malicious role-play, adversarial framing, and naturally occurring unsafe user queries. We map dataset-specific fields to a unified prompt column and source label, filter non-English prompts and length outliers,"},{"citing_arxiv_id":"2605.04029","ref_index":42,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Stayin' Aligned Over Time: Towards Longitudinal Human-LLM Alignment via Contextual Reflection and Privacy-Preserving Behavioral Data","primary_cat":"cs.HC","submitted_at":"2026-05-05T17:51:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"A methodological framework and browser system BITE for collecting evolving user preferences on LLM outputs through context-triggered reflections and privacy-preserving data over time.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.01311","ref_index":37,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"The Partial Testimony of Logs: Evaluation of Language Model Generation under Confounded Model Choice","primary_cat":"cs.LG","submitted_at":"2026-05-02T07:55:55+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"An identification theorem shows that a randomized experiment and simulator together recover causal model values from confounded logs, with logs used only afterward to reduce estimation error.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.00254","ref_index":71,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Rethinking Network Topologies for Cost-Effective Mixture-of-Experts LLM Serving","primary_cat":"cs.NI","submitted_at":"2026-04-30T21:35:22+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Switchless topologies such as 3D full-mesh are 20.6-56.2% more cost-effective than scale-up networks for MoE LLM serving, with current link bandwidths over-provisioned by up to 27%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.27039","ref_index":33,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling","primary_cat":"cs.CL","submitted_at":"2026-04-29T17:09:21+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LenVM models token-level remaining generation length as a bounded discounted value function derived from constant negative per-token rewards, providing a scalable proxy for generation horizon.","context_count":1,"top_context_role":"background","top_context_polarity":"background","context_text":"In this setting, the central numerical question is what relative length resolution the representation can maintain under finite precision. Since longer horizons naturally tolerate larger absolute deviations, the more relevant quantity is the relative perturbation∣δl∣/l. Recall that LenVM predictsˆv=−σ(z)and that the length estimate is recovered from 1+ ˆv=γ ˆl.(33) Since1+ ˆv=1−σ(z)=σ(−z), we have ˆl= ln(σ(−z)) lnγ ,(34) or equivalently, σ(−z)=γ l.(35) Thus, the logit corresponding to horizonlis z(l)=ln 1−γ l γl .(36) A first-order perturbation analysis with respect tozgives »»»»»»» δl l »»»»»»» ≈m(z) ∣δz∣,(37) where m(z)=− σ(z) ln(σ(−z)).(38) 23 Length Value Model: Scalable Value Pretraining for Token-Level Length Modeling"},{"citing_arxiv_id":"2604.25905","ref_index":40,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"A paradox of AI fluency","primary_cat":"cs.CL","submitted_at":"2026-04-28T17:51:13+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"Fluent AI users adopt an active, iterative collaboration mode that produces more visible failures but better recovery and success on hard tasks, whereas novices experience more invisible failures from passive use.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.25080","ref_index":8,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"CacheFlow: Efficient LLM Serving with 3D-Parallel KV Cache Restoration","primary_cat":"cs.DC","submitted_at":"2026-04-28T00:24:29+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"CacheFlow cuts TTFT by 10-62% in batched LLM serving via 3D-parallel KV cache restoration and a two-pointer scheduler that overlaps recompute and I/O.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.10258","ref_index":18,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"From Searchable to Non-Searchable: Generative AI and Information Diversity in Online Information Seeking","primary_cat":"cs.HC","submitted_at":"2026-04-11T15:51:06+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"ChatGPT expands the diversity of user questions (80% non-searchable) but delivers less diverse responses than Google for comparable queries, creating a feedback loop that may constrain information exposure.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2604.02686","ref_index":20,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Beyond Semantic Manipulation: Token-Space Attacks on Reward Models","primary_cat":"cs.LG","submitted_at":"2026-04-03T03:30:34+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"TOMPA performs black-box adversarial optimization in token space to discover non-linguistic patterns that nearly double the reward scores of GPT-5 answers on Skywork-Reward-V2 while producing gibberish text.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2603.23231","ref_index":86,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PERMA: Benchmarking Personalized Memory Agents via Event-Driven Preference and Realistic Task Environments","primary_cat":"cs.AI","submitted_at":"2026-03-24T14:04:11+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"PERMA is a new benchmark using temporally ordered events, text variability, and linguistic alignment to evaluate LLM memory agents on persona consistency beyond simple retrieval.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2605.02897","ref_index":6,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Same Voice, Different Lab: On the Homogenization of Frontier LLM Personalities","primary_cat":"cs.HC","submitted_at":"2026-03-20T19:37:41+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"Frontier LLMs homogenize toward systematic and analytical personalities, suppressing emotional traits like remorseful or sycophantic, indicating an implicit consensus on optimal assistant behavior.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2603.03295","ref_index":22,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Language Model Goal Selection Differs from Humans' in a Self-Directed Learning Task","primary_cat":"cs.CL","submitted_at":"2026-02-06T15:39:54+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LLMs diverge from human goal selection in self-directed learning by exploiting single solutions with low variability across instances.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.21351","ref_index":12,"ref_count":1,"confidence":0.9,"is_internal_anchor":true,"paper_title":"Analytical Provisioning for Attention-FFN Disaggregated LLM Serving under Stochastic Workloads","primary_cat":"cs.LG","submitted_at":"2026-01-29T07:22:27+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":7.0,"formal_verification":"none","one_line_summary":"A renewal-reward analysis yields a closed-form mean-field rule for the optimal Attention/FFN provisioning ratio in disaggregated LLM serving that accounts for stochastic KV-cache growth and matches simulation optima within 10%.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2601.14348","ref_index":43,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"Legal Retrieval for Public Defenders","primary_cat":"cs.IR","submitted_at":"2026-01-20T17:08:34+00:00","verdict":"CONDITIONAL","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"NJ BriefBank is a domain-adapted legal retrieval tool for public defenders that improves on standard benchmarks by incorporating legal reasoning, domain data, and synthetic examples, with a new released taxonomy and annotated evaluation dataset.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2512.20856","ref_index":20,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"NVIDIA Nemotron 3: Efficient and Open Intelligence","primary_cat":"cs.CL","submitted_at":"2025-12-24T00:24:05+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"NVIDIA releases the Nemotron 3 model family with hybrid Mamba-Transformer architecture, LatentMoE, NVFP4 training, MTP layers, and multi-environment RL post-training for reasoning and agentic tasks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2510.24235","ref_index":24,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"PaTaRM: Bridging Pairwise and Pointwise Signals via Preference-Aware Task-Adaptive Reward Modeling","primary_cat":"cs.LG","submitted_at":"2025-10-28T09:43:47+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"PaTaRM converts pairwise preference data into pointwise reward signals via a novel PAR mechanism and task-adaptive rubrics, reporting 8.7% gains on RewardBench/RMBench and 13.6% relative RLHF improvement.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2506.18841","ref_index":49,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LongWriter-Zero: Mastering Ultra-Long Text Generation via Reinforcement Learning","primary_cat":"cs.CL","submitted_at":"2025-06-23T16:59:02+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"LongWriter-Zero applies RL from a base model with specialized rewards for length, quality, and structure to outperform SFT baselines and larger models on long-writing benchmarks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2506.01937","ref_index":15,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"RewardBench 2: Advancing Reward Model Evaluation","primary_cat":"cs.CL","submitted_at":"2025-06-02T17:54:04+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":6.0,"formal_verification":"none","one_line_summary":"RewardBench 2 is a new benchmark that supplies challenging fresh human prompts for reward model evaluation, yielding lower average scores but higher correlation with downstream best-of-N sampling and RLHF training performance.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2503.02574","ref_index":61,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"LLM-Safety Evaluations Lack Robustness","primary_cat":"cs.CR","submitted_at":"2025-03-04T12:55:07+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"LLM safety evaluations are hindered by noise in dataset curation, automated red-teaming, response generation, and LLM-judge evaluation, making fair comparisons difficult and slowing progress.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2412.16720","ref_index":13,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"OpenAI o1 System Card","primary_cat":"cs.AI","submitted_at":"2024-12-21T18:04:31+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":4.0,"formal_verification":"none","one_line_summary":"OpenAI reports that chain-of-thought reasoning in o1 models enables deliberative alignment, yielding state-of-the-art results on selected safety benchmarks for illicit advice, stereotypes, and jailbreaks.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null},{"citing_arxiv_id":"2408.15549","ref_index":41,"ref_count":1,"confidence":0.98,"is_internal_anchor":true,"paper_title":"WildFeedback: Aligning LLMs With In-situ User Interactions And Feedback","primary_cat":"cs.CL","submitted_at":"2024-08-28T05:53:46+00:00","verdict":"UNVERDICTED","verdict_confidence":"LOW","novelty_score":5.0,"formal_verification":"none","one_line_summary":"WildFeedback extracts preference pairs from in-situ user feedback in LLM conversations to fine-tune models for better alignment with real user preferences.","context_count":0,"top_context_role":null,"top_context_polarity":null,"context_text":null}],"limit":50,"offset":0}