{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2021:VIILBFQLIWEDLJUEYZX4AXP6J7","short_pith_number":"pith:VIILBFQL","canonical_record":{"source":{"id":"2106.03373","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2021-06-07T06:55:45Z","cross_cats_sorted":[],"title_canon_sha256":"cc97a7fed2bc8b917e0bd5646a8229d72f8beb60edb4f9505ed9e3e861a1d710","abstract_canon_sha256":"bfe41f76dee5d83066caa3152590bf67cbb491ac4e8f8f351fc2884d97bb51c0"},"schema_version":"1.0"},"canonical_sha256":"aa10b0960b458835a684c66fc05dfe4ffce5e9f42ff1ac98685ace26bc16f6d4","source":{"kind":"arxiv","id":"2106.03373","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2106.03373","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"arxiv_version","alias_value":"2106.03373v4","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2106.03373","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"pith_short_12","alias_value":"VIILBFQLIWED","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"pith_short_16","alias_value":"VIILBFQLIWEDLJUE","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"pith_short_8","alias_value":"VIILBFQL","created_at":"2026-07-05T03:23:11Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2021:VIILBFQLIWEDLJUEYZX4AXP6J7","target":"record","payload":{"canonical_record":{"source":{"id":"2106.03373","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2021-06-07T06:55:45Z","cross_cats_sorted":[],"title_canon_sha256":"cc97a7fed2bc8b917e0bd5646a8229d72f8beb60edb4f9505ed9e3e861a1d710","abstract_canon_sha256":"bfe41f76dee5d83066caa3152590bf67cbb491ac4e8f8f351fc2884d97bb51c0"},"schema_version":"1.0"},"canonical_sha256":"aa10b0960b458835a684c66fc05dfe4ffce5e9f42ff1ac98685ace26bc16f6d4","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-05T03:23:11.318298Z","signature_b64":"xMV9sD9J5ssGJn9lSSBEjsl9DSzO+ujp99TjSmbqgydAU/8LfKe3/s8ETcWnqylScjqnYcP24Qr2jkChD6TIAg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"aa10b0960b458835a684c66fc05dfe4ffce5e9f42ff1ac98685ace26bc16f6d4","last_reissued_at":"2026-07-05T03:23:11.317811Z","signature_status":"signed_v1","first_computed_at":"2026-07-05T03:23:11.317811Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2106.03373","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T03:23:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"4OJR7Hsx3Le1yfC2pKn3Eh6Db6xDCrHYT1zZW70izSLgGx7yx+lou3TSATO3B4QYpsc6rPsvo8x6oruDxPojAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T14:59:42.440186Z"},"content_sha256":"94ab5e3dfdfed60b41a67ff1c9b31fb615cfac3730395bd50f70ebe0d629dfc0","schema_version":"1.0","event_id":"sha256:94ab5e3dfdfed60b41a67ff1c9b31fb615cfac3730395bd50f70ebe0d629dfc0"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2021:VIILBFQLIWEDLJUEYZX4AXP6J7","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Pre-trained Language Model for Web-scale Retrieval in Baidu Search","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.IR","authors_text":"Daiting Shi, Dawei Yin, Guan Huang, Jiaxiang Liu, Shuaiqiang Wang, Suqi Cheng, Weixue Lu, Yiding Liu, Yukun Li, Zhicong Cheng","submitted_at":"2021-06-07T06:55:45Z","abstract_excerpt":"Retrieval is a crucial stage in web search that identifies a small set of query-relevant candidates from a billion-scale corpus. Discovering more semantically-related candidates in the retrieval stage is very promising to expose more high-quality results to the end users. However, it still remains non-trivial challenges of building and deploying effective retrieval models for semantic matching in real search engine. In this paper, we describe the retrieval system that we developed and deployed in Baidu Search. The system exploits the recent state-of-the-art Chinese pretrained language model, n"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2106.03373","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2106.03373/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-05T03:23:11Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"pFpXJp1fUpj3VV7NqTCUAD+8LJZGSw+mugzUUeRsJfrpsyi3ZLiu00zcraB+zgvmgjE0kQ7++ORN16lFmqRcDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T14:59:42.440598Z"},"content_sha256":"efcc3fc7a7a7227b8089b6de867152e713ccc9a438d5e9ad0884fbfb80adeb39","schema_version":"1.0","event_id":"sha256:efcc3fc7a7a7227b8089b6de867152e713ccc9a438d5e9ad0884fbfb80adeb39"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/VIILBFQLIWEDLJUEYZX4AXP6J7/bundle.json","state_url":"https://pith.science/pith/VIILBFQLIWEDLJUEYZX4AXP6J7/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/VIILBFQLIWEDLJUEYZX4AXP6J7/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T14:59:42Z","links":{"resolver":"https://pith.science/pith/VIILBFQLIWEDLJUEYZX4AXP6J7","bundle":"https://pith.science/pith/VIILBFQLIWEDLJUEYZX4AXP6J7/bundle.json","state":"https://pith.science/pith/VIILBFQLIWEDLJUEYZX4AXP6J7/state.json","well_known_bundle":"https://pith.science/.well-known/pith/VIILBFQLIWEDLJUEYZX4AXP6J7/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2021:VIILBFQLIWEDLJUEYZX4AXP6J7","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bfe41f76dee5d83066caa3152590bf67cbb491ac4e8f8f351fc2884d97bb51c0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2021-06-07T06:55:45Z","title_canon_sha256":"cc97a7fed2bc8b917e0bd5646a8229d72f8beb60edb4f9505ed9e3e861a1d710"},"schema_version":"1.0","source":{"id":"2106.03373","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2106.03373","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"arxiv_version","alias_value":"2106.03373v4","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2106.03373","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"pith_short_12","alias_value":"VIILBFQLIWED","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"pith_short_16","alias_value":"VIILBFQLIWEDLJUE","created_at":"2026-07-05T03:23:11Z"},{"alias_kind":"pith_short_8","alias_value":"VIILBFQL","created_at":"2026-07-05T03:23:11Z"}],"graph_snapshots":[{"event_id":"sha256:efcc3fc7a7a7227b8089b6de867152e713ccc9a438d5e9ad0884fbfb80adeb39","target":"graph","created_at":"2026-07-05T03:23:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2106.03373/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Retrieval is a crucial stage in web search that identifies a small set of query-relevant candidates from a billion-scale corpus. Discovering more semantically-related candidates in the retrieval stage is very promising to expose more high-quality results to the end users. However, it still remains non-trivial challenges of building and deploying effective retrieval models for semantic matching in real search engine. In this paper, we describe the retrieval system that we developed and deployed in Baidu Search. The system exploits the recent state-of-the-art Chinese pretrained language model, n","authors_text":"Daiting Shi, Dawei Yin, Guan Huang, Jiaxiang Liu, Shuaiqiang Wang, Suqi Cheng, Weixue Lu, Yiding Liu, Yukun Li, Zhicong Cheng","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2021-06-07T06:55:45Z","title":"Pre-trained Language Model for Web-scale Retrieval in Baidu Search"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2106.03373","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:94ab5e3dfdfed60b41a67ff1c9b31fb615cfac3730395bd50f70ebe0d629dfc0","target":"record","created_at":"2026-07-05T03:23:11Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bfe41f76dee5d83066caa3152590bf67cbb491ac4e8f8f351fc2884d97bb51c0","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.IR","submitted_at":"2021-06-07T06:55:45Z","title_canon_sha256":"cc97a7fed2bc8b917e0bd5646a8229d72f8beb60edb4f9505ed9e3e861a1d710"},"schema_version":"1.0","source":{"id":"2106.03373","kind":"arxiv","version":4}},"canonical_sha256":"aa10b0960b458835a684c66fc05dfe4ffce5e9f42ff1ac98685ace26bc16f6d4","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"aa10b0960b458835a684c66fc05dfe4ffce5e9f42ff1ac98685ace26bc16f6d4","first_computed_at":"2026-07-05T03:23:11.317811Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-05T03:23:11.317811Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"xMV9sD9J5ssGJn9lSSBEjsl9DSzO+ujp99TjSmbqgydAU/8LfKe3/s8ETcWnqylScjqnYcP24Qr2jkChD6TIAg==","signature_status":"signed_v1","signed_at":"2026-07-05T03:23:11.318298Z","signed_message":"canonical_sha256_bytes"},"source_id":"2106.03373","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:94ab5e3dfdfed60b41a67ff1c9b31fb615cfac3730395bd50f70ebe0d629dfc0","sha256:efcc3fc7a7a7227b8089b6de867152e713ccc9a438d5e9ad0884fbfb80adeb39"],"state_sha256":"67d6433dec369e3db0bf6943da0cf2eeb7e383a62bf7fccbc50e36bbf56cf6ce"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"WwH8YlsGn81pCCdDjFa4uxSDS+qOHx+/RXcaroyL9AfU8thx7XQGx4fUCyCKdUZB2KDUalNoQ8crxjmCatv3Cw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T14:59:42.443842Z","bundle_sha256":"fd176af19f27f2cca07a2a9c5e09131cb7e91053545406e652b7725413809dc4"}}