{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:L2WYBRJNKD2RBYUBDUF5F3GYMY","short_pith_number":"pith:L2WYBRJN","schema_version":"1.0","canonical_sha256":"5ead80c52d50f510e2811d0bd2ecd86604a787def84d771fb0e78e8bf48eaa38","source":{"kind":"arxiv","id":"2602.13748","version":2},"attestation_state":"computed","paper":{"title":"RMPL: Relation-aware Multi-task Progressive Learning with Stage-wise Training for Multimedia Event Extraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Jianmin Yao, Jianwen Luo, Jingjing Wang, Yongkang Jin, Yu Hong","submitted_at":"2026-02-14T12:43:25Z","abstract_excerpt":"Multimedia Event Extraction (MEE) aims to identify events and their arguments from documents that contain both text and images. It requires grounding event semantics across different modalities. Progress in MEE is limited by the lack of annotated training data. M2E2 is the only established benchmark, but it provides annotations only for evaluation. This makes direct supervised training impractical. Existing methods mainly rely on cross-modal alignment or inference-time prompting with Vision--Language Models (VLMs). These approaches do not explicitly learn structured event representations and o"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2602.13748","kind":"arxiv","version":2},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2026-02-14T12:43:25Z","cross_cats_sorted":["cs.CV"],"title_canon_sha256":"95e706b31290ce8873320f0290bbca7db18b7bfb3b2f46c066ab80a460ef153f","abstract_canon_sha256":"f359846f7c43a24e87d4eb30850dfccfc9cd78fcb55a30916f513e1a7db3ebd1"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-28T01:04:36.868538Z","signature_b64":"0STfR02rKFvqeQcvqsfYmoT6/cydsRw8cTXUROAJLWfMzp4rGff44NuVhwU62wtjNyoOkd8z/UWHF4ISNZ9kCw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"5ead80c52d50f510e2811d0bd2ecd86604a787def84d771fb0e78e8bf48eaa38","last_reissued_at":"2026-05-28T01:04:36.868041Z","signature_status":"signed_v1","first_computed_at":"2026-05-28T01:04:36.868041Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"RMPL: Relation-aware Multi-task Progressive Learning with Stage-wise Training for Multimedia Event Extraction","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.CV"],"primary_cat":"cs.CL","authors_text":"Jianmin Yao, Jianwen Luo, Jingjing Wang, Yongkang Jin, Yu Hong","submitted_at":"2026-02-14T12:43:25Z","abstract_excerpt":"Multimedia Event Extraction (MEE) aims to identify events and their arguments from documents that contain both text and images. It requires grounding event semantics across different modalities. Progress in MEE is limited by the lack of annotated training data. M2E2 is the only established benchmark, but it provides annotations only for evaluation. This makes direct supervised training impractical. Existing methods mainly rely on cross-modal alignment or inference-time prompting with Vision--Language Models (VLMs). These approaches do not explicitly learn structured event representations and o"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2602.13748","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2602.13748/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2602.13748","created_at":"2026-05-28T01:04:36.868100+00:00"},{"alias_kind":"arxiv_version","alias_value":"2602.13748v2","created_at":"2026-05-28T01:04:36.868100+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2602.13748","created_at":"2026-05-28T01:04:36.868100+00:00"},{"alias_kind":"pith_short_12","alias_value":"L2WYBRJNKD2R","created_at":"2026-05-28T01:04:36.868100+00:00"},{"alias_kind":"pith_short_16","alias_value":"L2WYBRJNKD2RBYUB","created_at":"2026-05-28T01:04:36.868100+00:00"},{"alias_kind":"pith_short_8","alias_value":"L2WYBRJN","created_at":"2026-05-28T01:04:36.868100+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY","json":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY.json","graph_json":"https://pith.science/api/pith-number/L2WYBRJNKD2RBYUBDUF5F3GYMY/graph.json","events_json":"https://pith.science/api/pith-number/L2WYBRJNKD2RBYUBDUF5F3GYMY/events.json","paper":"https://pith.science/paper/L2WYBRJN"},"agent_actions":{"view_html":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY","download_json":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY.json","view_paper":"https://pith.science/paper/L2WYBRJN","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2602.13748&json=true","fetch_graph":"https://pith.science/api/pith-number/L2WYBRJNKD2RBYUBDUF5F3GYMY/graph.json","fetch_events":"https://pith.science/api/pith-number/L2WYBRJNKD2RBYUBDUF5F3GYMY/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY/action/timestamp_anchor","attest_storage":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY/action/storage_attestation","attest_author":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY/action/author_attestation","sign_citation":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY/action/citation_signature","submit_replication":"https://pith.science/pith/L2WYBRJNKD2RBYUBDUF5F3GYMY/action/replication_record"}},"created_at":"2026-05-28T01:04:36.868100+00:00","updated_at":"2026-05-28T01:04:36.868100+00:00"}