{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:2FV6ZZ4XNLSG3JOKQSHKODI6VX","short_pith_number":"pith:2FV6ZZ4X","schema_version":"1.0","canonical_sha256":"d16bece7976ae46da5ca848ea70d1eaded4ce3d1c2e2cf7907c0669d77aa2da5","source":{"kind":"arxiv","id":"2607.00428","version":1},"attestation_state":"computed","paper":{"title":"HyFL-CLIP: Hyperbolic Fine-Tuning of CLIP for Robust Long-Context Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chulwon Lee, Hayeon Kim, Ji Ha Jang, Junghun James Kim, Se Young Chun","submitted_at":"2026-07-01T04:40:11Z","abstract_excerpt":"CLIP (Contrastive Language-Image Pre-training) has become a de facto paradigm for image-text alignment, but it struggles with long-context descriptions (>77 tokens) due to absolute positional encoding and pretraining on short captions. In long contexts, sentences are often reordered, summarized, or partially omitted. Although prior works extend CLIP with longer positional encodings, they often suffer from degraded image-text alignment under such text perturbations. We attribute this limitation to the Euclidean contrastive objective, which enforces strict one-to-one matching and lacks explicit "},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2607.00428","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CV","submitted_at":"2026-07-01T04:40:11Z","cross_cats_sorted":[],"title_canon_sha256":"a62cc058e7a8e3e715ebac366713c390b35dea7539ebe26f719684709633e94d","abstract_canon_sha256":"3c94f6ab4993cffecaf289463d37f2bbb87589b09822fa2cd0693f68145f9761"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-02T01:17:43.060433Z","signature_b64":"eHxgDOwas0mWmM+muE2eDDluPpO/HnBWH/jCorvTre/Gvd0wOoth+MIGpVcFLqhN52xeev4WC9qTfZ1bnDdICw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"d16bece7976ae46da5ca848ea70d1eaded4ce3d1c2e2cf7907c0669d77aa2da5","last_reissued_at":"2026-07-02T01:17:43.060080Z","signature_status":"signed_v1","first_computed_at":"2026-07-02T01:17:43.060080Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"HyFL-CLIP: Hyperbolic Fine-Tuning of CLIP for Robust Long-Context Understanding","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Chulwon Lee, Hayeon Kim, Ji Ha Jang, Junghun James Kim, Se Young Chun","submitted_at":"2026-07-01T04:40:11Z","abstract_excerpt":"CLIP (Contrastive Language-Image Pre-training) has become a de facto paradigm for image-text alignment, but it struggles with long-context descriptions (>77 tokens) due to absolute positional encoding and pretraining on short captions. In long contexts, sentences are often reordered, summarized, or partially omitted. Although prior works extend CLIP with longer positional encodings, they often suffer from degraded image-text alignment under such text perturbations. We attribute this limitation to the Euclidean contrastive objective, which enforces strict one-to-one matching and lacks explicit "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2607.00428","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2607.00428/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2607.00428","created_at":"2026-07-02T01:17:43.060141+00:00"},{"alias_kind":"arxiv_version","alias_value":"2607.00428v1","created_at":"2026-07-02T01:17:43.060141+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2607.00428","created_at":"2026-07-02T01:17:43.060141+00:00"},{"alias_kind":"pith_short_12","alias_value":"2FV6ZZ4XNLSG","created_at":"2026-07-02T01:17:43.060141+00:00"},{"alias_kind":"pith_short_16","alias_value":"2FV6ZZ4XNLSG3JOK","created_at":"2026-07-02T01:17:43.060141+00:00"},{"alias_kind":"pith_short_8","alias_value":"2FV6ZZ4X","created_at":"2026-07-02T01:17:43.060141+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX","json":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX.json","graph_json":"https://pith.science/api/pith-number/2FV6ZZ4XNLSG3JOKQSHKODI6VX/graph.json","events_json":"https://pith.science/api/pith-number/2FV6ZZ4XNLSG3JOKQSHKODI6VX/events.json","paper":"https://pith.science/paper/2FV6ZZ4X"},"agent_actions":{"view_html":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX","download_json":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX.json","view_paper":"https://pith.science/paper/2FV6ZZ4X","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2607.00428&json=true","fetch_graph":"https://pith.science/api/pith-number/2FV6ZZ4XNLSG3JOKQSHKODI6VX/graph.json","fetch_events":"https://pith.science/api/pith-number/2FV6ZZ4XNLSG3JOKQSHKODI6VX/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX/action/timestamp_anchor","attest_storage":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX/action/storage_attestation","attest_author":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX/action/author_attestation","sign_citation":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX/action/citation_signature","submit_replication":"https://pith.science/pith/2FV6ZZ4XNLSG3JOKQSHKODI6VX/action/replication_record"}},"created_at":"2026-07-02T01:17:43.060141+00:00","updated_at":"2026-07-02T01:17:43.060141+00:00"}