{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:QW27MOB2SAYBFDUT3OEUB2WTIR","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"6d57439be8692a8d01afa446428cf79987e57721a239217a27f54c7820533e81","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-04-30T13:10:59Z","title_canon_sha256":"3262e2b74fb8b26a3cd53ea7f6e2e6025f23ba77b6da3f761f7a64f4c7a64327"},"schema_version":"1.0","source":{"id":"2504.21614","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2504.21614","created_at":"2026-06-08T01:03:44Z"},{"alias_kind":"arxiv_version","alias_value":"2504.21614v2","created_at":"2026-06-08T01:03:44Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2504.21614","created_at":"2026-06-08T01:03:44Z"},{"alias_kind":"pith_short_12","alias_value":"QW27MOB2SAYB","created_at":"2026-06-08T01:03:44Z"},{"alias_kind":"pith_short_16","alias_value":"QW27MOB2SAYBFDUT","created_at":"2026-06-08T01:03:44Z"},{"alias_kind":"pith_short_8","alias_value":"QW27MOB2","created_at":"2026-06-08T01:03:44Z"}],"graph_snapshots":[{"event_id":"sha256:3a88c4512c875b00438891ca3b31d998adc49037cf57df41f8a2e24cd5c63951","target":"graph","created_at":"2026-06-08T01:03:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2504.21614/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"With an ever-increasing availability of data, it has become more and more challenging to select and label appropriate samples for the training of machine learning models. It is especially difficult to detect long-tail classes of interest in large amounts of unlabeled data. This holds especially true for Intelligent Transportation Systems (ITS), where vehicle fleets and roadside perception systems generate an abundance of raw data. While industrial, proprietary data engines for such iterative data selection and model training processes exist, researchers and the open-source community suffer fro","authors_text":"Abeyankar Giridharan, Daniel Bogdoll, Gregory Stevens, Henry X. Liu, Isabel Moore, Rajanikant Patnaik Ananta","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-04-30T13:10:59Z","title":"Mcity Data Engine: Iterative Model Improvement Through Open-Vocabulary Data Selection"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2504.21614","kind":"arxiv","version":2},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:509aa621b951b6c1957f46618236c7fc6eca89a4d069a226639fb1c097144136","target":"record","created_at":"2026-06-08T01:03:44Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"6d57439be8692a8d01afa446428cf79987e57721a239217a27f54c7820533e81","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2025-04-30T13:10:59Z","title_canon_sha256":"3262e2b74fb8b26a3cd53ea7f6e2e6025f23ba77b6da3f761f7a64f4c7a64327"},"schema_version":"1.0","source":{"id":"2504.21614","kind":"arxiv","version":2}},"canonical_sha256":"85b5f6383a9030128e93db8940ead34454c289603a4403ebe3e55080742cf056","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"85b5f6383a9030128e93db8940ead34454c289603a4403ebe3e55080742cf056","first_computed_at":"2026-06-08T01:03:44.198195Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-06-08T01:03:44.198195Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"XMrlRv2LTo7i+ky2isDaFS9sOuDHvtZTXFDMXgnHcRvUg8JCiBeiSKPDg2hp9AvsY+lIGjixaIyoFJbksNN3Bw==","signature_status":"signed_v1","signed_at":"2026-06-08T01:03:44.199258Z","signed_message":"canonical_sha256_bytes"},"source_id":"2504.21614","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:509aa621b951b6c1957f46618236c7fc6eca89a4d069a226639fb1c097144136","sha256:3a88c4512c875b00438891ca3b31d998adc49037cf57df41f8a2e24cd5c63951"],"state_sha256":"159da049e840c7f327d0ed11faa3418e4d7bc5a9a3cfcc2011c693c3e030f811"}