{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2026:EMQ7WYLFTUKJYMWXXJ2UJT63BF","short_pith_number":"pith:EMQ7WYLF","canonical_record":{"source":{"id":"2603.09046","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-03-10T00:31:25Z","cross_cats_sorted":["cs.LG","cs.OS"],"title_canon_sha256":"f38ae1944cb60340e583f1ac9d2ee6b87b47688d41e4a10f9415ea7685367f7b","abstract_canon_sha256":"f0f6015630047f5dd504b5c140c4b48f8d9e104b7d3f16d8d557b5bc563535f5"},"schema_version":"1.0"},"canonical_sha256":"2321fb61659d149c32d7ba7544cfdb09722cf9d742cfb062e82cc5fc51bec945","source":{"kind":"arxiv","id":"2603.09046","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.09046","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"arxiv_version","alias_value":"2603.09046v3","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.09046","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"pith_short_12","alias_value":"EMQ7WYLFTUKJ","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"pith_short_16","alias_value":"EMQ7WYLFTUKJYMWX","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"pith_short_8","alias_value":"EMQ7WYLF","created_at":"2026-07-03T00:16:53Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2026:EMQ7WYLFTUKJYMWXXJ2UJT63BF","target":"record","payload":{"canonical_record":{"source":{"id":"2603.09046","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-03-10T00:31:25Z","cross_cats_sorted":["cs.LG","cs.OS"],"title_canon_sha256":"f38ae1944cb60340e583f1ac9d2ee6b87b47688d41e4a10f9415ea7685367f7b","abstract_canon_sha256":"f0f6015630047f5dd504b5c140c4b48f8d9e104b7d3f16d8d557b5bc563535f5"},"schema_version":"1.0"},"canonical_sha256":"2321fb61659d149c32d7ba7544cfdb09722cf9d742cfb062e82cc5fc51bec945","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-07-03T00:16:53.070588Z","signature_b64":"Lwc8qupfhG0sdqd/mZhbmgnosLIzO5+AK87rT+gH7ouX3PpLAG8Y6x461diBZGHgc097QFJ2AeFBRlqiQKZiDQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2321fb61659d149c32d7ba7544cfdb09722cf9d742cfb062e82cc5fc51bec945","last_reissued_at":"2026-07-03T00:16:53.070116Z","signature_status":"signed_v1","first_computed_at":"2026-07-03T00:16:53.070116Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2603.09046","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-03T00:16:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"LTDlQyq3DWQeOSFEw5/eJIwPPPNggBEfUaa4uhn/OAzCMt/M4f4zZVMferPxBQS/sqCcXLkxqibUMZbwQj1BCw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T03:08:06.525574Z"},"content_sha256":"6143b3d960fc8ecee067b37e2c01b9e811bc83b5a205ac4b6a8501270bce4d68","schema_version":"1.0","event_id":"sha256:6143b3d960fc8ecee067b37e2c01b9e811bc83b5a205ac4b6a8501270bce4d68"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2026:EMQ7WYLFTUKJYMWXXJ2UJT63BF","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"FlexServe: A Fast and Secure LLM Serving System for Mobile Devices with Flexible Resource Isolation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"FlexServe allows ARM TrustZone to protect mobile LLM inference by switching memory and NPU modes on demand, cutting time to first token by over 10x versus rigid baselines.","cross_cats":["cs.LG","cs.OS"],"primary_cat":"cs.CR","authors_text":"Jinyu Gu, Lixiang Wang, Yinpeng Wu, Yitong Chen, Yubin Xia, Zhichao Hua","submitted_at":"2026-03-10T00:31:25Z","abstract_excerpt":"Device-side Large Language Models (LLMs) have witnessed explosive growth, offering higher privacy and availability compared to cloud-side LLMs. During LLM inference, both model weights and user data are valuable, and attackers may even compromise the OS kernel to steal them. ARM TrustZone is the de facto hardware-based isolation technology on mobile devices, used to protect sensitive applications from a compromised OS. However, protecting LLM inference with TrustZone incurs significant overhead due to its inflexible isolation of memory and the NPU. To address these challenges, this paper intro"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"FlexServe achieves an average 10.05× speedup in Time to First Token (TTFT) compared to the strawman, and an average 2.44× TTFT speedup compared to an optimized strawman with pipeline and secure NPU enabled. For multi-model agent workflows, the end-to-end speedup is up to 24.30× and 4.05× compared to the strawman and optimized strawman, respectively.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The flexible switching between protected and unprotected modes for memory and NPU does not introduce new security vulnerabilities or significant unmeasured overheads beyond the reported prototype benchmarks.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"FlexServe achieves up to 10x faster time-to-first-token for secure LLM inference on mobile devices by using flexible resource isolation in TrustZone compared to standard approaches.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"FlexServe allows ARM TrustZone to protect mobile LLM inference by switching memory and NPU modes on demand, cutting time to first token by over 10x versus rigid baselines.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"037f794a5a03324ee95b1ba22700eb05601ba727fda121d684d7a6fe8c8ab752"},"source":{"id":"2603.09046","kind":"arxiv","version":3},"verdict":{"id":"c4c34be4-23b7-40a8-bdc3-2c286ae5db6a","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-15T14:18:35.483668Z","strongest_claim":"FlexServe achieves an average 10.05× speedup in Time to First Token (TTFT) compared to the strawman, and an average 2.44× TTFT speedup compared to an optimized strawman with pipeline and secure NPU enabled. For multi-model agent workflows, the end-to-end speedup is up to 24.30× and 4.05× compared to the strawman and optimized strawman, respectively.","one_line_summary":"FlexServe achieves up to 10x faster time-to-first-token for secure LLM inference on mobile devices by using flexible resource isolation in TrustZone compared to standard approaches.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The flexible switching between protected and unprotected modes for memory and NPU does not introduce new security vulnerabilities or significant unmeasured overheads beyond the reported prototype benchmarks.","pith_extraction_headline":"FlexServe allows ARM TrustZone to protect mobile LLM inference by switching memory and NPU modes on demand, cutting time to first token by over 10x versus rigid baselines."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.09046/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"c4c34be4-23b7-40a8-bdc3-2c286ae5db6a"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-07-03T00:16:53Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"RCptpxAXCBQMrvtPd1fEyUh4Z60gVFyM60zFepM/lN9/VWk+KmFpjxxews8uwRuBpXdBE6r5qVDEgcY2AqOWBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-07-05T03:08:06.526058Z"},"content_sha256":"fe0f6fbb8dc56a705cc70a40c5b3e18c77e2c868f2045e2a033bb2953c8a060a","schema_version":"1.0","event_id":"sha256:fe0f6fbb8dc56a705cc70a40c5b3e18c77e2c868f2045e2a033bb2953c8a060a"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF/bundle.json","state_url":"https://pith.science/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-07-05T03:08:06Z","links":{"resolver":"https://pith.science/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF","bundle":"https://pith.science/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF/bundle.json","state":"https://pith.science/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF/state.json","well_known_bundle":"https://pith.science/.well-known/pith/EMQ7WYLFTUKJYMWXXJ2UJT63BF/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:EMQ7WYLFTUKJYMWXXJ2UJT63BF","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f0f6015630047f5dd504b5c140c4b48f8d9e104b7d3f16d8d557b5bc563535f5","cross_cats_sorted":["cs.LG","cs.OS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-03-10T00:31:25Z","title_canon_sha256":"f38ae1944cb60340e583f1ac9d2ee6b87b47688d41e4a10f9415ea7685367f7b"},"schema_version":"1.0","source":{"id":"2603.09046","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2603.09046","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"arxiv_version","alias_value":"2603.09046v3","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.09046","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"pith_short_12","alias_value":"EMQ7WYLFTUKJ","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"pith_short_16","alias_value":"EMQ7WYLFTUKJYMWX","created_at":"2026-07-03T00:16:53Z"},{"alias_kind":"pith_short_8","alias_value":"EMQ7WYLF","created_at":"2026-07-03T00:16:53Z"}],"graph_snapshots":[{"event_id":"sha256:fe0f6fbb8dc56a705cc70a40c5b3e18c77e2c868f2045e2a033bb2953c8a060a","target":"graph","created_at":"2026-07-03T00:16:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"FlexServe achieves an average 10.05× speedup in Time to First Token (TTFT) compared to the strawman, and an average 2.44× TTFT speedup compared to an optimized strawman with pipeline and secure NPU enabled. For multi-model agent workflows, the end-to-end speedup is up to 24.30× and 4.05× compared to the strawman and optimized strawman, respectively."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The flexible switching between protected and unprotected modes for memory and NPU does not introduce new security vulnerabilities or significant unmeasured overheads beyond the reported prototype benchmarks."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"FlexServe achieves up to 10x faster time-to-first-token for secure LLM inference on mobile devices by using flexible resource isolation in TrustZone compared to standard approaches."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"FlexServe allows ARM TrustZone to protect mobile LLM inference by switching memory and NPU modes on demand, cutting time to first token by over 10x versus rigid baselines."}],"snapshot_sha256":"037f794a5a03324ee95b1ba22700eb05601ba727fda121d684d7a6fe8c8ab752"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2603.09046/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Device-side Large Language Models (LLMs) have witnessed explosive growth, offering higher privacy and availability compared to cloud-side LLMs. During LLM inference, both model weights and user data are valuable, and attackers may even compromise the OS kernel to steal them. ARM TrustZone is the de facto hardware-based isolation technology on mobile devices, used to protect sensitive applications from a compromised OS. However, protecting LLM inference with TrustZone incurs significant overhead due to its inflexible isolation of memory and the NPU. To address these challenges, this paper intro","authors_text":"Jinyu Gu, Lixiang Wang, Yinpeng Wu, Yitong Chen, Yubin Xia, Zhichao Hua","cross_cats":["cs.LG","cs.OS"],"headline":"FlexServe allows ARM TrustZone to protect mobile LLM inference by switching memory and NPU modes on demand, cutting time to first token by over 10x versus rigid baselines.","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-03-10T00:31:25Z","title":"FlexServe: A Fast and Secure LLM Serving System for Mobile Devices with Flexible Resource Isolation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.09046","kind":"arxiv","version":3},"verdict":{"created_at":"2026-05-15T14:18:35.483668Z","id":"c4c34be4-23b7-40a8-bdc3-2c286ae5db6a","model_set":{"reader":"grok-4.3"},"one_line_summary":"FlexServe achieves up to 10x faster time-to-first-token for secure LLM inference on mobile devices by using flexible resource isolation in TrustZone compared to standard approaches.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"FlexServe allows ARM TrustZone to protect mobile LLM inference by switching memory and NPU modes on demand, cutting time to first token by over 10x versus rigid baselines.","strongest_claim":"FlexServe achieves an average 10.05× speedup in Time to First Token (TTFT) compared to the strawman, and an average 2.44× TTFT speedup compared to an optimized strawman with pipeline and secure NPU enabled. For multi-model agent workflows, the end-to-end speedup is up to 24.30× and 4.05× compared to the strawman and optimized strawman, respectively.","weakest_assumption":"The flexible switching between protected and unprotected modes for memory and NPU does not introduce new security vulnerabilities or significant unmeasured overheads beyond the reported prototype benchmarks."}},"verdict_id":"c4c34be4-23b7-40a8-bdc3-2c286ae5db6a"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:6143b3d960fc8ecee067b37e2c01b9e811bc83b5a205ac4b6a8501270bce4d68","target":"record","created_at":"2026-07-03T00:16:53Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f0f6015630047f5dd504b5c140c4b48f8d9e104b7d3f16d8d557b5bc563535f5","cross_cats_sorted":["cs.LG","cs.OS"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CR","submitted_at":"2026-03-10T00:31:25Z","title_canon_sha256":"f38ae1944cb60340e583f1ac9d2ee6b87b47688d41e4a10f9415ea7685367f7b"},"schema_version":"1.0","source":{"id":"2603.09046","kind":"arxiv","version":3}},"canonical_sha256":"2321fb61659d149c32d7ba7544cfdb09722cf9d742cfb062e82cc5fc51bec945","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"2321fb61659d149c32d7ba7544cfdb09722cf9d742cfb062e82cc5fc51bec945","first_computed_at":"2026-07-03T00:16:53.070116Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-07-03T00:16:53.070116Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"Lwc8qupfhG0sdqd/mZhbmgnosLIzO5+AK87rT+gH7ouX3PpLAG8Y6x461diBZGHgc097QFJ2AeFBRlqiQKZiDQ==","signature_status":"signed_v1","signed_at":"2026-07-03T00:16:53.070588Z","signed_message":"canonical_sha256_bytes"},"source_id":"2603.09046","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:6143b3d960fc8ecee067b37e2c01b9e811bc83b5a205ac4b6a8501270bce4d68","sha256:fe0f6fbb8dc56a705cc70a40c5b3e18c77e2c868f2045e2a033bb2953c8a060a"],"state_sha256":"594c7e841fa14af1ed25032205b2c18a7b0f883adcc736425c2f48719f43e860"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"a6pvboflFaSoz3pbuNnLzICiXQGMVeJ4PDksde5R/8EwQtMoMDuIMpUwGM8dSjBWwA4AomA22r6nYZ77qcd8DQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-07-05T03:08:06.528648Z","bundle_sha256":"64ffa068e5ab48aebab099330d5e3160dcbffb73427d22c8565034950536012d"}}