{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2015:NB5VDHRISOPZXWLMRM4GDUE5DG","short_pith_number":"pith:NB5VDHRI","schema_version":"1.0","canonical_sha256":"687b519e28939f9bd96c8b3861d09d19804702d6b5f064251d78402a3f90f73f","source":{"kind":"arxiv","id":"1512.07454","version":1},"attestation_state":"computed","paper":{"title":"Evaluation-as-a-Service: Overview and Outlook","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CY","authors_text":"Allan Hanbury, Anastasia Krithara, Frank Hopfgartner, Gordon V. Cormack, Henning M\\\"uller, Ivan Eggel, Jayashree Kalpathy-Cramer, Jimmy Lin, Krisztian Balog, Martin Potthast, Noriko Kando, Simon Mercer, Tim Gollub, Torben Brodt","submitted_at":"2015-12-23T12:44:09Z","abstract_excerpt":"Evaluation in empirical computer science is essential to show progress and assess technologies developed. Several research domains such as information retrieval have long relied on systematic evaluation to measure progress: here, the Cranfield paradigm of creating shared test collections, defining search tasks, and collecting ground truth for these tasks has persisted up until now. In recent years, however, several new challenges have emerged that do not fit this paradigm very well: extremely large data sets, confidential data sets as found in the medical domain, and rapidly changing data sets"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1512.07454","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CY","submitted_at":"2015-12-23T12:44:09Z","cross_cats_sorted":["cs.IR"],"title_canon_sha256":"8b75fd734f787ac0778180da11221d643a05f1c43f6f2dfc8f3f3afc423171e0","abstract_canon_sha256":"ccfd026c3f85bf935eed91e3082d72c2be754e4bcfabe244b908b43676c37ddc"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T01:23:49.638044Z","signature_b64":"v74LlG2Mu6OOZlQiySzd0yisvOE7I8rglS1hE2yDdo5AlfpGRLLP7bQt9Zev4ZJFqHLFrK2MNWU/z9+bJKBHBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"687b519e28939f9bd96c8b3861d09d19804702d6b5f064251d78402a3f90f73f","last_reissued_at":"2026-05-18T01:23:49.637503Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T01:23:49.637503Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Evaluation-as-a-Service: Overview and Outlook","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.IR"],"primary_cat":"cs.CY","authors_text":"Allan Hanbury, Anastasia Krithara, Frank Hopfgartner, Gordon V. Cormack, Henning M\\\"uller, Ivan Eggel, Jayashree Kalpathy-Cramer, Jimmy Lin, Krisztian Balog, Martin Potthast, Noriko Kando, Simon Mercer, Tim Gollub, Torben Brodt","submitted_at":"2015-12-23T12:44:09Z","abstract_excerpt":"Evaluation in empirical computer science is essential to show progress and assess technologies developed. Several research domains such as information retrieval have long relied on systematic evaluation to measure progress: here, the Cranfield paradigm of creating shared test collections, defining search tasks, and collecting ground truth for these tasks has persisted up until now. In recent years, however, several new challenges have emerged that do not fit this paradigm very well: extremely large data sets, confidential data sets as found in the medical domain, and rapidly changing data sets"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1512.07454","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1512.07454","created_at":"2026-05-18T01:23:49.637580+00:00"},{"alias_kind":"arxiv_version","alias_value":"1512.07454v1","created_at":"2026-05-18T01:23:49.637580+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1512.07454","created_at":"2026-05-18T01:23:49.637580+00:00"},{"alias_kind":"pith_short_12","alias_value":"NB5VDHRISOPZ","created_at":"2026-05-18T12:29:32.376354+00:00"},{"alias_kind":"pith_short_16","alias_value":"NB5VDHRISOPZXWLM","created_at":"2026-05-18T12:29:32.376354+00:00"},{"alias_kind":"pith_short_8","alias_value":"NB5VDHRI","created_at":"2026-05-18T12:29:32.376354+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG","json":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG.json","graph_json":"https://pith.science/api/pith-number/NB5VDHRISOPZXWLMRM4GDUE5DG/graph.json","events_json":"https://pith.science/api/pith-number/NB5VDHRISOPZXWLMRM4GDUE5DG/events.json","paper":"https://pith.science/paper/NB5VDHRI"},"agent_actions":{"view_html":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG","download_json":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG.json","view_paper":"https://pith.science/paper/NB5VDHRI","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1512.07454&json=true","fetch_graph":"https://pith.science/api/pith-number/NB5VDHRISOPZXWLMRM4GDUE5DG/graph.json","fetch_events":"https://pith.science/api/pith-number/NB5VDHRISOPZXWLMRM4GDUE5DG/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG/action/timestamp_anchor","attest_storage":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG/action/storage_attestation","attest_author":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG/action/author_attestation","sign_citation":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG/action/citation_signature","submit_replication":"https://pith.science/pith/NB5VDHRISOPZXWLMRM4GDUE5DG/action/replication_record"}},"created_at":"2026-05-18T01:23:49.637580+00:00","updated_at":"2026-05-18T01:23:49.637580+00:00"}