{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2017:UEGYW5IEQQGNEIIT2ZY5QS7YMM","short_pith_number":"pith:UEGYW5IE","schema_version":"1.0","canonical_sha256":"a10d8b7504840cd22113d671d84bf86302cc46f972e292107cd4139d1185bf62","source":{"kind":"arxiv","id":"1708.05746","version":1},"attestation_state":"computed","paper":{"title":"Sparkle: Optimizing Spark for Large Memory Machines and Analytics","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Alexander Ulanov, Haris Volos, Joseph Tucek, Jun Li, Kimberly Keeton, Le Xu, Lucy Cherkasova, Manish Marwah, Mijung Kim, Pradeep Fernando","submitted_at":"2017-08-18T19:49:20Z","abstract_excerpt":"Spark is an in-memory analytics platform that targets commodity server environments today. It relies on the Hadoop Distributed File System (HDFS) to persist intermediate checkpoint states and final processing results. In Spark, immutable data are used for storing data updates in each iteration, making it inefficient for long running, iterative workloads. A non-deterministic garbage collector further worsens this problem. Sparkle is a library that optimizes memory usage in Spark. It exploits large shared memory to achieve better data shuffling and intermediate storage. Sparkle replaces the curr"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"1708.05746","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.DC","submitted_at":"2017-08-18T19:49:20Z","cross_cats_sorted":[],"title_canon_sha256":"f38efa8f01526b5d103ca3dcb6262764e1fc283e68a2530d001a638bd45079c2","abstract_canon_sha256":"73bce9f9ab7639be62ee7169ac6cd271964b6943b497a503c9736e52507dbbda"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:37:44.444440Z","signature_b64":"uAO+XrH1YmpV/D173woLWEu9ubm1N5/kPOe1Hc/kB3i/KEZXhNYxrXQHpkdvI/zzWzizNbyK1ZfR0jPSd271DQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a10d8b7504840cd22113d671d84bf86302cc46f972e292107cd4139d1185bf62","last_reissued_at":"2026-05-18T00:37:44.443780Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:37:44.443780Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Sparkle: Optimizing Spark for Large Memory Machines and Analytics","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":[],"primary_cat":"cs.DC","authors_text":"Alexander Ulanov, Haris Volos, Joseph Tucek, Jun Li, Kimberly Keeton, Le Xu, Lucy Cherkasova, Manish Marwah, Mijung Kim, Pradeep Fernando","submitted_at":"2017-08-18T19:49:20Z","abstract_excerpt":"Spark is an in-memory analytics platform that targets commodity server environments today. It relies on the Hadoop Distributed File System (HDFS) to persist intermediate checkpoint states and final processing results. In Spark, immutable data are used for storing data updates in each iteration, making it inefficient for long running, iterative workloads. A non-deterministic garbage collector further worsens this problem. Sparkle is a library that optimizes memory usage in Spark. It exploits large shared memory to achieve better data shuffling and intermediate storage. Sparkle replaces the curr"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1708.05746","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"1708.05746","created_at":"2026-05-18T00:37:44.443883+00:00"},{"alias_kind":"arxiv_version","alias_value":"1708.05746v1","created_at":"2026-05-18T00:37:44.443883+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1708.05746","created_at":"2026-05-18T00:37:44.443883+00:00"},{"alias_kind":"pith_short_12","alias_value":"UEGYW5IEQQGN","created_at":"2026-05-18T12:31:46.661854+00:00"},{"alias_kind":"pith_short_16","alias_value":"UEGYW5IEQQGNEIIT","created_at":"2026-05-18T12:31:46.661854+00:00"},{"alias_kind":"pith_short_8","alias_value":"UEGYW5IE","created_at":"2026-05-18T12:31:46.661854+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM","json":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM.json","graph_json":"https://pith.science/api/pith-number/UEGYW5IEQQGNEIIT2ZY5QS7YMM/graph.json","events_json":"https://pith.science/api/pith-number/UEGYW5IEQQGNEIIT2ZY5QS7YMM/events.json","paper":"https://pith.science/paper/UEGYW5IE"},"agent_actions":{"view_html":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM","download_json":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM.json","view_paper":"https://pith.science/paper/UEGYW5IE","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=1708.05746&json=true","fetch_graph":"https://pith.science/api/pith-number/UEGYW5IEQQGNEIIT2ZY5QS7YMM/graph.json","fetch_events":"https://pith.science/api/pith-number/UEGYW5IEQQGNEIIT2ZY5QS7YMM/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM/action/timestamp_anchor","attest_storage":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM/action/storage_attestation","attest_author":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM/action/author_attestation","sign_citation":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM/action/citation_signature","submit_replication":"https://pith.science/pith/UEGYW5IEQQGNEIIT2ZY5QS7YMM/action/replication_record"}},"created_at":"2026-05-18T00:37:44.443883+00:00","updated_at":"2026-05-18T00:37:44.443883+00:00"}