{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:P42PJQY5UIPZBBJD3Z4I23AF46","short_pith_number":"pith:P42PJQY5","canonical_record":{"source":{"id":"2604.14684","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-16T06:40:44Z","cross_cats_sorted":[],"title_canon_sha256":"ca57599f8b58952bcf14bfdcedf05386e058fca75928a55bcc760fc54762ae7f","abstract_canon_sha256":"bf30b53c95e807b063e261caf6d5fe221d4be713fd8980092763a1cab6be123d"},"schema_version":"1.0"},"canonical_sha256":"7f34f4c31da21f908523de788d6c05e783103fe907fae0d666fd3dadfe8f22c3","source":{"kind":"arxiv","id":"2604.14684","version":2},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.14684","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"arxiv_version","alias_value":"2604.14684v2","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.14684","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"pith_short_12","alias_value":"P42PJQY5UIPZ","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"pith_short_16","alias_value":"P42PJQY5UIPZBBJD","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"pith_short_8","alias_value":"P42PJQY5","created_at":"2026-05-27T01:05:54Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:P42PJQY5UIPZBBJD3Z4I23AF46","target":"record","payload":{"canonical_record":{"source":{"id":"2604.14684","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-16T06:40:44Z","cross_cats_sorted":[],"title_canon_sha256":"ca57599f8b58952bcf14bfdcedf05386e058fca75928a55bcc760fc54762ae7f","abstract_canon_sha256":"bf30b53c95e807b063e261caf6d5fe221d4be713fd8980092763a1cab6be123d"},"schema_version":"1.0"},"canonical_sha256":"7f34f4c31da21f908523de788d6c05e783103fe907fae0d666fd3dadfe8f22c3","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-27T01:05:54.681808Z","signature_b64":"Li6F7xkeaK0Cuv2tq6GRgY5ITTCFk4iuGyVUrUUeNHkj5Gts5dxAsnOYpcaKEplyZMRVbaKAQiKUY78M+/LeCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"7f34f4c31da21f908523de788d6c05e783103fe907fae0d666fd3dadfe8f22c3","last_reissued_at":"2026-05-27T01:05:54.680921Z","signature_status":"signed_v1","first_computed_at":"2026-05-27T01:05:54.680921Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2604.14684","source_version":2,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:05:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"kjAahV1JK4pe4zIPh/ObWimGSkXgTN6eERgYaOm3YnlCpFZ6S8pEUEMQxZhdpzXObnWblnOA5XO4/eut752WAQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T14:48:26.389047Z"},"content_sha256":"0b3185b4ecc29b36bfbd19570ae1c298a6db6bc5fc000f95e7c2385b99c4b97f","schema_version":"1.0","event_id":"sha256:0b3185b4ecc29b36bfbd19570ae1c298a6db6bc5fc000f95e7c2385b99c4b97f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:P42PJQY5UIPZBBJD3Z4I23AF46","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"DETR-ViP: Detection Transformer with Robust Discriminative Visual Prompts","license":"http://creativecommons.org/licenses/by/4.0/","headline":"DETR-ViP adds global integration and distillation to visual prompts so they become class-distinguishable and raise open-vocabulary detection accuracy.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Bo Qian, Dahu Shi, Xing Wei","submitted_at":"2026-04-16T06:40:44Z","abstract_excerpt":"Visual prompted object detection enables interactive and flexible definition of target categories, thereby facilitating open-vocabulary detection. Since visual prompts are derived directly from image features, they often outperform text prompts in recognizing rare categories. Nevertheless, research on visual prompted detection has been largely overlooked, and it is typically treated as a byproduct of training text prompted detectors, which hinders its development. To fully unlock the potential of visual-prompted detection, we investigate the reasons why its performance is suboptimal and reveal"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"DETR-ViP achieves substantially higher performance in visual prompt detection compared to other state-of-the-art counterparts.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The central performance gap in visual-prompted detection is caused by the absence of global discriminability in visual prompts, and the proposed global integration plus distillation steps will reliably close that gap without introducing instability or overfitting.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"DETR-ViP boosts visual-prompted detection performance by learning globally discriminative prompts through integration and distillation on top of image-text contrastive learning, with a selective fusion step for stability.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"DETR-ViP adds global integration and distillation to visual prompts so they become class-distinguishable and raise open-vocabulary detection accuracy.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"8d123d0c96ebd986d1ff2135e41aff3c4f248d1364bb29c66c7bda468a533962"},"source":{"id":"2604.14684","kind":"arxiv","version":2},"verdict":{"id":"a13d4684-7f69-44d8-aed6-93383e41f2d4","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-10T12:04:04.330900Z","strongest_claim":"DETR-ViP achieves substantially higher performance in visual prompt detection compared to other state-of-the-art counterparts.","one_line_summary":"DETR-ViP boosts visual-prompted detection performance by learning globally discriminative prompts through integration and distillation on top of image-text contrastive learning, with a selective fusion step for stability.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The central performance gap in visual-prompted detection is caused by the absence of global discriminability in visual prompts, and the proposed global integration plus distillation steps will reliably close that gap without introducing instability or overfitting.","pith_extraction_headline":"DETR-ViP adds global integration and distillation to visual prompts so they become class-distinguishable and raise open-vocabulary detection accuracy."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2604.14684/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"a13d4684-7f69-44d8-aed6-93383e41f2d4"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-27T01:05:54Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"312N9iiE5b/kHBUDaUhMDWpPPMDbPEYpyGJ50Qp3+2tVm1YCgkhQlHaY5sGduQ+rejCIxE32ueSjsuzuX+6mDA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-29T14:48:26.389915Z"},"content_sha256":"e0dda4cfad13433a44343155c11110aad3136ae58e478e940b76dcecbd9d46d1","schema_version":"1.0","event_id":"sha256:e0dda4cfad13433a44343155c11110aad3136ae58e478e940b76dcecbd9d46d1"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/P42PJQY5UIPZBBJD3Z4I23AF46/bundle.json","state_url":"https://pith.science/pith/P42PJQY5UIPZBBJD3Z4I23AF46/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/P42PJQY5UIPZBBJD3Z4I23AF46/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-29T14:48:26Z","links":{"resolver":"https://pith.science/pith/P42PJQY5UIPZBBJD3Z4I23AF46","bundle":"https://pith.science/pith/P42PJQY5UIPZBBJD3Z4I23AF46/bundle.json","state":"https://pith.science/pith/P42PJQY5UIPZBBJD3Z4I23AF46/state.json","well_known_bundle":"https://pith.science/.well-known/pith/P42PJQY5UIPZBBJD3Z4I23AF46/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:P42PJQY5UIPZBBJD3Z4I23AF46","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"bf30b53c95e807b063e261caf6d5fe221d4be713fd8980092763a1cab6be123d","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-16T06:40:44Z","title_canon_sha256":"ca57599f8b58952bcf14bfdcedf05386e058fca75928a55bcc760fc54762ae7f"},"schema_version":"1.0","source":{"id":"2604.14684","kind":"arxiv","version":2}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2604.14684","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"arxiv_version","alias_value":"2604.14684v2","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2604.14684","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"pith_short_12","alias_value":"P42PJQY5UIPZ","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"pith_short_16","alias_value":"P42PJQY5UIPZBBJD","created_at":"2026-05-27T01:05:54Z"},{"alias_kind":"pith_short_8","alias_value":"P42PJQY5","created_at":"2026-05-27T01:05:54Z"}],"graph_snapshots":[{"event_id":"sha256:e0dda4cfad13433a44343155c11110aad3136ae58e478e940b76dcecbd9d46d1","target":"graph","created_at":"2026-05-27T01:05:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"DETR-ViP achieves substantially higher performance in visual prompt detection compared to other state-of-the-art counterparts."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The central performance gap in visual-prompted detection is caused by the absence of global discriminability in visual prompts, and the proposed global integration plus distillation steps will reliably close that gap without introducing instability or overfitting."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"DETR-ViP boosts visual-prompted detection performance by learning globally discriminative prompts through integration and distillation on top of image-text contrastive learning, with a selective fusion step for stability."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"DETR-ViP adds global integration and distillation to visual prompts so they become class-distinguishable and raise open-vocabulary detection accuracy."}],"snapshot_sha256":"8d123d0c96ebd986d1ff2135e41aff3c4f248d1364bb29c66c7bda468a533962"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2604.14684/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Visual prompted object detection enables interactive and flexible definition of target categories, thereby facilitating open-vocabulary detection. Since visual prompts are derived directly from image features, they often outperform text prompts in recognizing rare categories. Nevertheless, research on visual prompted detection has been largely overlooked, and it is typically treated as a byproduct of training text prompted detectors, which hinders its development. To fully unlock the potential of visual-prompted detection, we investigate the reasons why its performance is suboptimal and reveal","authors_text":"Bo Qian, Dahu Shi, Xing Wei","cross_cats":[],"headline":"DETR-ViP adds global integration and distillation to visual prompts so they become class-distinguishable and raise open-vocabulary detection accuracy.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-16T06:40:44Z","title":"DETR-ViP: Detection Transformer with Robust Discriminative Visual Prompts"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2604.14684","kind":"arxiv","version":2},"verdict":{"created_at":"2026-05-10T12:04:04.330900Z","id":"a13d4684-7f69-44d8-aed6-93383e41f2d4","model_set":{"reader":"grok-4.3"},"one_line_summary":"DETR-ViP boosts visual-prompted detection performance by learning globally discriminative prompts through integration and distillation on top of image-text contrastive learning, with a selective fusion step for stability.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"DETR-ViP adds global integration and distillation to visual prompts so they become class-distinguishable and raise open-vocabulary detection accuracy.","strongest_claim":"DETR-ViP achieves substantially higher performance in visual prompt detection compared to other state-of-the-art counterparts.","weakest_assumption":"The central performance gap in visual-prompted detection is caused by the absence of global discriminability in visual prompts, and the proposed global integration plus distillation steps will reliably close that gap without introducing instability or overfitting."}},"verdict_id":"a13d4684-7f69-44d8-aed6-93383e41f2d4"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:0b3185b4ecc29b36bfbd19570ae1c298a6db6bc5fc000f95e7c2385b99c4b97f","target":"record","created_at":"2026-05-27T01:05:54Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"bf30b53c95e807b063e261caf6d5fe221d4be713fd8980092763a1cab6be123d","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-04-16T06:40:44Z","title_canon_sha256":"ca57599f8b58952bcf14bfdcedf05386e058fca75928a55bcc760fc54762ae7f"},"schema_version":"1.0","source":{"id":"2604.14684","kind":"arxiv","version":2}},"canonical_sha256":"7f34f4c31da21f908523de788d6c05e783103fe907fae0d666fd3dadfe8f22c3","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"7f34f4c31da21f908523de788d6c05e783103fe907fae0d666fd3dadfe8f22c3","first_computed_at":"2026-05-27T01:05:54.680921Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-27T01:05:54.680921Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Li6F7xkeaK0Cuv2tq6GRgY5ITTCFk4iuGyVUrUUeNHkj5Gts5dxAsnOYpcaKEplyZMRVbaKAQiKUY78M+/LeCg==","signature_status":"signed_v1","signed_at":"2026-05-27T01:05:54.681808Z","signed_message":"canonical_sha256_bytes"},"source_id":"2604.14684","source_kind":"arxiv","source_version":2}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:0b3185b4ecc29b36bfbd19570ae1c298a6db6bc5fc000f95e7c2385b99c4b97f","sha256:e0dda4cfad13433a44343155c11110aad3136ae58e478e940b76dcecbd9d46d1"],"state_sha256":"1ee49e8449a9d57ec38132b967be03e3458958da80d9ed26627d99df1bb7591f"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tobroFL9hkgXjhWCuAW6CYNkyzltgtp/tSqU+h2CJYQjJHRbxRocjb4rOPR1zCDRMJu/EnU2UQWJQsIlD4hXDw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-29T14:48:26.394136Z","bundle_sha256":"e0edb901bfa59c1765fcc56f0d4daa04cd437c8730613d2d92d5dec1b893a9fc"}}