{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2013:LQVCQDYE6VUCASYX3MB26VYUJY","short_pith_number":"pith:LQVCQDYE","canonical_record":{"source":{"id":"1303.1932","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2013-03-08T10:15:57Z","cross_cats_sorted":[],"title_canon_sha256":"0ba51365de23c7cf001d38c6d8147a681726d05783784c8f3aa4d06409dd102c","abstract_canon_sha256":"fc728f0508d36e366ae89fb6f65e67b298d75924621ebc54ba1dd5bf672cd712"},"schema_version":"1.0"},"canonical_sha256":"5c2a280f04f568204b17db03af57144e1d532c7ae92b81e93de0926391ae28aa","source":{"kind":"arxiv","id":"1303.1932","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1303.1932","created_at":"2026-05-18T03:31:31Z"},{"alias_kind":"arxiv_version","alias_value":"1303.1932v1","created_at":"2026-05-18T03:31:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1303.1932","created_at":"2026-05-18T03:31:31Z"},{"alias_kind":"pith_short_12","alias_value":"LQVCQDYE6VUC","created_at":"2026-05-18T12:27:51Z"},{"alias_kind":"pith_short_16","alias_value":"LQVCQDYE6VUCASYX","created_at":"2026-05-18T12:27:51Z"},{"alias_kind":"pith_short_8","alias_value":"LQVCQDYE","created_at":"2026-05-18T12:27:51Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2013:LQVCQDYE6VUCASYX3MB26VYUJY","target":"record","payload":{"canonical_record":{"source":{"id":"1303.1932","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2013-03-08T10:15:57Z","cross_cats_sorted":[],"title_canon_sha256":"0ba51365de23c7cf001d38c6d8147a681726d05783784c8f3aa4d06409dd102c","abstract_canon_sha256":"fc728f0508d36e366ae89fb6f65e67b298d75924621ebc54ba1dd5bf672cd712"},"schema_version":"1.0"},"canonical_sha256":"5c2a280f04f568204b17db03af57144e1d532c7ae92b81e93de0926391ae28aa","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:31:31.932499Z","signature_b64":"Tc2lkycsWFMfEDYxCkS9aabOCWumzBP1U1N65LifiP+q5H6U2zPwOyg8kxRFdvDDUt+5LgXkdbHxqHtcHQkqBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5c2a280f04f568204b17db03af57144e1d532c7ae92b81e93de0926391ae28aa","last_reissued_at":"2026-05-18T03:31:31.931575Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:31:31.931575Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1303.1932","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:31:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mhUgFaFLIYskXHTS8hvYLjy8DTYxsq5OUgDgzXvvexGJ5E++eUcgt5TzqLoM4gQBLSEEC43JulEWjKolXIyiBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T09:24:11.288013Z"},"content_sha256":"a807b9a1118ba263ad08388be5a6ea718c1face12633e573871de29b1aa598c6","schema_version":"1.0","event_id":"sha256:a807b9a1118ba263ad08388be5a6ea718c1face12633e573871de29b1aa598c6"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2013:LQVCQDYE6VUCASYX3MB26VYUJY","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Mining and Exploiting Domain-Specific Corpora in the PANACEA Platform","license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","headline":"","cross_cats":[],"primary_cat":"cs.CL","authors_text":"Antonio Toral, N\\'uria Bel, Prokopis Prokopidis, Vassilis Papavasiliou, Victoria Arranz","submitted_at":"2013-03-08T10:15:57Z","abstract_excerpt":"The objective of the PANACEA ICT-2007.2.2 EU project is to build a platform that automates the stages involved in the acquisition, production, updating and maintenance of the large language resources required by, among others, MT systems. The development of a Corpus Acquisition Component (CAC) for extracting monolingual and bilingual data from the web is one of the most innovative building blocks of PANACEA. The CAC, which is the first stage in the PANACEA pipeline for building Language Resources, adopts an efficient and distributed methodology to crawl for web documents with rich textual cont"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1303.1932","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:31:31Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"SSpYh3mbtisVaOliGThXVn1ez7XQSrBDdn8bV8tgfq5CpaPa4qhHzyGHzLsP8tFMDdWVvuvYLHoGT4RRTqy4CQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-26T09:24:11.288693Z"},"content_sha256":"aa0964892855938386cb66027280766b004eac54dcaa00f0ac527e9ce4ad65a7","schema_version":"1.0","event_id":"sha256:aa0964892855938386cb66027280766b004eac54dcaa00f0ac527e9ce4ad65a7"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/LQVCQDYE6VUCASYX3MB26VYUJY/bundle.json","state_url":"https://pith.science/pith/LQVCQDYE6VUCASYX3MB26VYUJY/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/LQVCQDYE6VUCASYX3MB26VYUJY/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-26T09:24:11Z","links":{"resolver":"https://pith.science/pith/LQVCQDYE6VUCASYX3MB26VYUJY","bundle":"https://pith.science/pith/LQVCQDYE6VUCASYX3MB26VYUJY/bundle.json","state":"https://pith.science/pith/LQVCQDYE6VUCASYX3MB26VYUJY/state.json","well_known_bundle":"https://pith.science/.well-known/pith/LQVCQDYE6VUCASYX3MB26VYUJY/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2013:LQVCQDYE6VUCASYX3MB26VYUJY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"fc728f0508d36e366ae89fb6f65e67b298d75924621ebc54ba1dd5bf672cd712","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2013-03-08T10:15:57Z","title_canon_sha256":"0ba51365de23c7cf001d38c6d8147a681726d05783784c8f3aa4d06409dd102c"},"schema_version":"1.0","source":{"id":"1303.1932","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1303.1932","created_at":"2026-05-18T03:31:31Z"},{"alias_kind":"arxiv_version","alias_value":"1303.1932v1","created_at":"2026-05-18T03:31:31Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1303.1932","created_at":"2026-05-18T03:31:31Z"},{"alias_kind":"pith_short_12","alias_value":"LQVCQDYE6VUC","created_at":"2026-05-18T12:27:51Z"},{"alias_kind":"pith_short_16","alias_value":"LQVCQDYE6VUCASYX","created_at":"2026-05-18T12:27:51Z"},{"alias_kind":"pith_short_8","alias_value":"LQVCQDYE","created_at":"2026-05-18T12:27:51Z"}],"graph_snapshots":[{"event_id":"sha256:aa0964892855938386cb66027280766b004eac54dcaa00f0ac527e9ce4ad65a7","target":"graph","created_at":"2026-05-18T03:31:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"The objective of the PANACEA ICT-2007.2.2 EU project is to build a platform that automates the stages involved in the acquisition, production, updating and maintenance of the large language resources required by, among others, MT systems. The development of a Corpus Acquisition Component (CAC) for extracting monolingual and bilingual data from the web is one of the most innovative building blocks of PANACEA. The CAC, which is the first stage in the PANACEA pipeline for building Language Resources, adopts an efficient and distributed methodology to crawl for web documents with rich textual cont","authors_text":"Antonio Toral, N\\'uria Bel, Prokopis Prokopidis, Vassilis Papavasiliou, Victoria Arranz","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2013-03-08T10:15:57Z","title":"Mining and Exploiting Domain-Specific Corpora in the PANACEA Platform"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1303.1932","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:a807b9a1118ba263ad08388be5a6ea718c1face12633e573871de29b1aa598c6","target":"record","created_at":"2026-05-18T03:31:31Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"fc728f0508d36e366ae89fb6f65e67b298d75924621ebc54ba1dd5bf672cd712","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/3.0/","primary_cat":"cs.CL","submitted_at":"2013-03-08T10:15:57Z","title_canon_sha256":"0ba51365de23c7cf001d38c6d8147a681726d05783784c8f3aa4d06409dd102c"},"schema_version":"1.0","source":{"id":"1303.1932","kind":"arxiv","version":1}},"canonical_sha256":"5c2a280f04f568204b17db03af57144e1d532c7ae92b81e93de0926391ae28aa","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"5c2a280f04f568204b17db03af57144e1d532c7ae92b81e93de0926391ae28aa","first_computed_at":"2026-05-18T03:31:31.931575Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:31:31.931575Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Tc2lkycsWFMfEDYxCkS9aabOCWumzBP1U1N65LifiP+q5H6U2zPwOyg8kxRFdvDDUt+5LgXkdbHxqHtcHQkqBw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:31:31.932499Z","signed_message":"canonical_sha256_bytes"},"source_id":"1303.1932","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:a807b9a1118ba263ad08388be5a6ea718c1face12633e573871de29b1aa598c6","sha256:aa0964892855938386cb66027280766b004eac54dcaa00f0ac527e9ce4ad65a7"],"state_sha256":"238325ab53a78a3fdc59eeab49e34d51d5308f7f548eee42d8129b4be6092163"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"6Bfc1c24u3S5h+OackDkHu3cJ0DOCdVa48FGtOFhL6hdu7LXM8fQc/OhZdXBXcgEkvPmGLnq52O9l5zqhL1fCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-26T09:24:11.292428Z","bundle_sha256":"58ad2e47275413778e1890b9047cd241e0dc0224cdc1644c14a81809af27939f"}}