{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:BSHHHTD377QA3M7LKDDRL3JU7M","short_pith_number":"pith:BSHHHTD3","schema_version":"1.0","canonical_sha256":"0c8e73cc7bffe00db3eb50c715ed34fb09b697706e56ba3b03a4548dda284f8f","source":{"kind":"arxiv","id":"2606.24667","version":1},"attestation_state":"computed","paper":{"title":"DREAM: Dense Retrieval Embeddings via Autoregressive Modeling","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Yixuan Tang, Yi Yang","submitted_at":"2026-06-23T15:00:30Z","abstract_excerpt":"Dense retrieval embedding models are a fundamental component of modern retrieval-based AI systems. Most dense retrievers are trained with contrastive objectives, which require labeled positive and negative document pairs that are often costly and difficult to obtain. In this work, we investigate whether the autoregressive next-token prediction objective of a large language model (LLM) can provide supervision for dense retrieval. The intuition is simple: if a document contains information relevant to a query, conditioning on that document should make the target output easier for the LLM to pred"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2606.24667","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.CL","submitted_at":"2026-06-23T15:00:30Z","cross_cats_sorted":[],"title_canon_sha256":"d28ab6c4cb88403dafe09d64dac1d79676ac8d458f58a041155ff1b158187810","abstract_canon_sha256":"4e2ad92c7692677bdb4650fbb7f227dcfaea2e8e37eee48a11b74b5e70c1e70a"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-24T01:15:38.695843Z","signature_b64":"6vJT/tyj+U/BiwtrSMW6waySz2P054vy9FLAsl47GQAuyZwNu85Zc2JrgQ3HZ7GSy1FIzvyi6Q3sHB0IJ/9xDw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"0c8e73cc7bffe00db3eb50c715ed34fb09b697706e56ba3b03a4548dda284f8f","last_reissued_at":"2026-06-24T01:15:38.695446Z","signature_status":"signed_v1","first_computed_at":"2026-06-24T01:15:38.695446Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"DREAM: Dense Retrieval Embeddings via Autoregressive Modeling","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Yixuan Tang, Yi Yang","submitted_at":"2026-06-23T15:00:30Z","abstract_excerpt":"Dense retrieval embedding models are a fundamental component of modern retrieval-based AI systems. Most dense retrievers are trained with contrastive objectives, which require labeled positive and negative document pairs that are often costly and difficult to obtain. In this work, we investigate whether the autoregressive next-token prediction objective of a large language model (LLM) can provide supervision for dense retrieval. The intuition is simple: if a document contains information relevant to a query, conditioning on that document should make the target output easier for the LLM to pred"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2606.24667","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2606.24667/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2606.24667","created_at":"2026-06-24T01:15:38.695507+00:00"},{"alias_kind":"arxiv_version","alias_value":"2606.24667v1","created_at":"2026-06-24T01:15:38.695507+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2606.24667","created_at":"2026-06-24T01:15:38.695507+00:00"},{"alias_kind":"pith_short_12","alias_value":"BSHHHTD377QA","created_at":"2026-06-24T01:15:38.695507+00:00"},{"alias_kind":"pith_short_16","alias_value":"BSHHHTD377QA3M7L","created_at":"2026-06-24T01:15:38.695507+00:00"},{"alias_kind":"pith_short_8","alias_value":"BSHHHTD3","created_at":"2026-06-24T01:15:38.695507+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M","json":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M.json","graph_json":"https://pith.science/api/pith-number/BSHHHTD377QA3M7LKDDRL3JU7M/graph.json","events_json":"https://pith.science/api/pith-number/BSHHHTD377QA3M7LKDDRL3JU7M/events.json","paper":"https://pith.science/paper/BSHHHTD3"},"agent_actions":{"view_html":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M","download_json":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M.json","view_paper":"https://pith.science/paper/BSHHHTD3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2606.24667&json=true","fetch_graph":"https://pith.science/api/pith-number/BSHHHTD377QA3M7LKDDRL3JU7M/graph.json","fetch_events":"https://pith.science/api/pith-number/BSHHHTD377QA3M7LKDDRL3JU7M/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M/action/timestamp_anchor","attest_storage":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M/action/storage_attestation","attest_author":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M/action/author_attestation","sign_citation":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M/action/citation_signature","submit_replication":"https://pith.science/pith/BSHHHTD377QA3M7LKDDRL3JU7M/action/replication_record"}},"created_at":"2026-06-24T01:15:38.695507+00:00","updated_at":"2026-06-24T01:15:38.695507+00:00"}