{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:K7XGSAC3XXIABZCX2YSQ57T2HX","short_pith_number":"pith:K7XGSAC3","schema_version":"1.0","canonical_sha256":"57ee69005bbdd000e457d6250efe7a3def8124791fd085941272ceb1d2abec77","source":{"kind":"arxiv","id":"2605.22201","version":1},"attestation_state":"computed","paper":{"title":"Zero-Shot Temporal Action Localization Through Textual Guidance","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Alessandro Conti, Benedetta Liberatori, Elisa Ricci, Lorenzo Vaquero, Paolo Rota, Yiming Wang","submitted_at":"2026-05-21T09:05:27Z","abstract_excerpt":"Zero-shot temporal action localization (ZS-TAL) consists of classifying and localizing actions in untrimmed videos, where action classes are unseen at training time. Existing work uses Vision and Language Models (VLMs), taking advantage of their strong zero-shot transfer capabilities. Yet, these models face evident challenges with fine-grained action classification, making it difficult to directly use them to distinguish between the presence and absence of an action. Most current methods for ZS-TAL address these challenges by training models on large-scale video datasets, which require annotat"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2605.22201","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-21T09:05:27Z","cross_cats_sorted":[],"title_canon_sha256":"59797498ad41bacc179ff03467cb5bc022d8febbb619b5389eafbe86a76cc736","abstract_canon_sha256":"fe894a78440bbe77962066af61fd710d3b786c61298b4c7372c85a3b7d53d3fe"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-22T01:04:31.932845Z","signature_b64":"3YIUubJgxBfZfLn3Geld3xe7IluQ9PN2eDkuPg66Dr/FiIs6cf7KFqgjCSKJoWT136ci4lQGFZwhfBOP8uDYBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"57ee69005bbdd000e457d6250efe7a3def8124791fd085941272ceb1d2abec77","last_reissued_at":"2026-05-22T01:04:31.932168Z","signature_status":"signed_v1","first_computed_at":"2026-05-22T01:04:31.932168Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Zero-Shot Temporal Action Localization Through Textual Guidance","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Alessandro Conti, Benedetta Liberatori, Elisa Ricci, Lorenzo Vaquero, Paolo Rota, Yiming Wang","submitted_at":"2026-05-21T09:05:27Z","abstract_excerpt":"Zero-shot temporal action localization (ZS-TAL) consists of classifying and localizing actions in untrimmed videos, where action classes are unseen at training time. Existing work uses Vision and Language Models (VLMs), taking advantage of their strong zero-shot transfer capabilities. Yet, these models face evident challenges with fine-grained action classification, making it difficult to directly use them to distinguish between the presence and absence of an action. Most current methods for ZS-TAL address these challenges by training models on large-scale video datasets, which require annotat"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2605.22201","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.22201/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.22201","created_at":"2026-05-22T01:04:31.932276+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.22201v1","created_at":"2026-05-22T01:04:31.932276+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.22201","created_at":"2026-05-22T01:04:31.932276+00:00"},{"alias_kind":"pith_short_12","alias_value":"K7XGSAC3XXIA","created_at":"2026-05-22T01:04:31.932276+00:00"},{"alias_kind":"pith_short_16","alias_value":"K7XGSAC3XXIABZCX","created_at":"2026-05-22T01:04:31.932276+00:00"},{"alias_kind":"pith_short_8","alias_value":"K7XGSAC3","created_at":"2026-05-22T01:04:31.932276+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX","json":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX.json","graph_json":"https://pith.science/api/pith-number/K7XGSAC3XXIABZCX2YSQ57T2HX/graph.json","events_json":"https://pith.science/api/pith-number/K7XGSAC3XXIABZCX2YSQ57T2HX/events.json","paper":"https://pith.science/paper/K7XGSAC3"},"agent_actions":{"view_html":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX","download_json":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX.json","view_paper":"https://pith.science/paper/K7XGSAC3","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.22201&json=true","fetch_graph":"https://pith.science/api/pith-number/K7XGSAC3XXIABZCX2YSQ57T2HX/graph.json","fetch_events":"https://pith.science/api/pith-number/K7XGSAC3XXIABZCX2YSQ57T2HX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX/action/storage_attestation","attest_author":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX/action/author_attestation","sign_citation":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX/action/citation_signature","submit_replication":"https://pith.science/pith/K7XGSAC3XXIABZCX2YSQ57T2HX/action/replication_record"}},"created_at":"2026-05-22T01:04:31.932276+00:00","updated_at":"2026-05-22T01:04:31.932276+00:00"}