{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2020:GWX4O7GZM5GQDZRYPXCR3NQDUB","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"9db3ea5180cfcc576c48b762eadd2e7dfcc5a9d76632853bba34a963082882f2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2020-10-02T03:54:56Z","title_canon_sha256":"40a2fab3a6dd35c8a99f8614429c51f4718113912ecc19bbb08412d049d41ecb"},"schema_version":"1.0","source":{"id":"2010.00768","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2010.00768","created_at":"2026-07-05T01:39:47Z"},{"alias_kind":"arxiv_version","alias_value":"2010.00768v1","created_at":"2026-07-05T01:39:47Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2010.00768","created_at":"2026-07-05T01:39:47Z"},{"alias_kind":"pith_short_12","alias_value":"GWX4O7GZM5GQ","created_at":"2026-07-05T01:39:47Z"},{"alias_kind":"pith_short_16","alias_value":"GWX4O7GZM5GQDZRY","created_at":"2026-07-05T01:39:47Z"},{"alias_kind":"pith_short_8","alias_value":"GWX4O7GZ","created_at":"2026-07-05T01:39:47Z"}],"graph_snapshots":[{"event_id":"sha256:65090a4ea4c1ae64a18aeed9e4c767b76392b2439ad73d35bec79a8a128ba826","target":"graph","created_at":"2026-07-05T01:39:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2010.00768/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Term-based sparse representations dominate the first-stage text retrieval in industrial applications, due to its advantage in efficiency, interpretability, and exact term matching. In this paper, we study the problem of transferring the deep knowledge of the pre-trained language model (PLM) to Term-based Sparse representations, aiming to improve the representation capacity of bag-of-words(BoW) method for semantic-level matching, while still keeping its advantages. Specifically, we propose a novel framework SparTerm to directly learn sparse text representations in the full vocabulary space. The","authors_text":"Chaoliang Zhang, Fangshan Wang, Gang Wang, Jun Xu, Lifeng Shang, Qun Liu, Xiaoguang Li, Yang Bai, Zhaowei Wang","cross_cats":[],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2020-10-02T03:54:56Z","title":"SparTerm: Learning Term-based Sparse Representation for Fast Text Retrieval"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2010.00768","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:c0988161d66744ff096129107c8b2276a7da093429b801ad4479ef343e846004","target":"record","created_at":"2026-07-05T01:39:47Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"9db3ea5180cfcc576c48b762eadd2e7dfcc5a9d76632853bba34a963082882f2","cross_cats_sorted":[],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2020-10-02T03:54:56Z","title_canon_sha256":"40a2fab3a6dd35c8a99f8614429c51f4718113912ecc19bbb08412d049d41ecb"},"schema_version":"1.0","source":{"id":"2010.00768","kind":"arxiv","version":1}},"canonical_sha256":"35afc77cd9674d01e6387dc51db603a071464349110818ef06a8211fe6016781","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"35afc77cd9674d01e6387dc51db603a071464349110818ef06a8211fe6016781","first_computed_at":"2026-07-05T01:39:47.249069Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T01:39:47.249069Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"gy7SEg0nthYsOzveu/h7T7iBL4hXWYcFU8kZav1Fmk50hcLZUi2vXtaZwVcoUGIb1fNBrzIjeSHomvEetO4RBA==","signature_status":"signed_v1","signed_at":"2026-07-05T01:39:47.249608Z","signed_message":"canonical_sha256_bytes"},"source_id":"2010.00768","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:c0988161d66744ff096129107c8b2276a7da093429b801ad4479ef343e846004","sha256:65090a4ea4c1ae64a18aeed9e4c767b76392b2439ad73d35bec79a8a128ba826"],"state_sha256":"cd53c521239a55140d5da3e410def6ce50f8e6215a8c713b31b3f73477af28e0"}