{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:QF3SZ4VE6UCC2LXXKKVSERLOPZ","short_pith_number":"pith:QF3SZ4VE","canonical_record":{"source":{"id":"1707.02275","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-07T17:15:27Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"7d4fdafc501a4354cd392248f366e138eed4d263799cf3f8230dd3a41e82b88d","abstract_canon_sha256":"f3565d75f8221b466a431c8bff537264739074bee5c1a6a0d13c40169ab98de8"},"schema_version":"1.0"},"canonical_sha256":"81772cf2a4f5042d2ef752ab22456e7e7021724c15caee39254ef71be11715a7","source":{"kind":"arxiv","id":"1707.02275","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.02275","created_at":"2026-05-18T00:40:42Z"},{"alias_kind":"arxiv_version","alias_value":"1707.02275v1","created_at":"2026-05-18T00:40:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.02275","created_at":"2026-05-18T00:40:42Z"},{"alias_kind":"pith_short_12","alias_value":"QF3SZ4VE6UCC","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_16","alias_value":"QF3SZ4VE6UCC2LXX","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_8","alias_value":"QF3SZ4VE","created_at":"2026-05-18T12:31:37Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:QF3SZ4VE6UCC2LXXKKVSERLOPZ","target":"record","payload":{"canonical_record":{"source":{"id":"1707.02275","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-07T17:15:27Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"7d4fdafc501a4354cd392248f366e138eed4d263799cf3f8230dd3a41e82b88d","abstract_canon_sha256":"f3565d75f8221b466a431c8bff537264739074bee5c1a6a0d13c40169ab98de8"},"schema_version":"1.0"},"canonical_sha256":"81772cf2a4f5042d2ef752ab22456e7e7021724c15caee39254ef71be11715a7","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:40:42.404127Z","signature_b64":"zkkOIFCeEXVvvFWEBw9mXW7jTCa2Fxadqrl14xtTeatebTE6bgu3D2SxaFX1owcF1XtkOG6cC/0Azun1GJfmCA==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"81772cf2a4f5042d2ef752ab22456e7e7021724c15caee39254ef71be11715a7","last_reissued_at":"2026-05-18T00:40:42.403572Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:40:42.403572Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1707.02275","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"i1ZYkg+3Mc4ARrN7G3SwB2It7Mn8HwVy9ipYLaNQbeGMGiRZwPV/fvTKdbpEet+J1wjCuTnZsYD7fuC330IxDQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T19:59:19.363368Z"},"content_sha256":"105bdff7504e5fb03a5dcee191a6a453f32ac7f3a22e6045baa094089d6c4714","schema_version":"1.0","event_id":"sha256:105bdff7504e5fb03a5dcee191a6a453f32ac7f3a22e6045baa094089d6c4714"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:QF3SZ4VE6UCC2LXXKKVSERLOPZ","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"A parallel corpus of Python functions and documentation strings for automated code documentation and code generation","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"Antonio Valerio Miceli Barone, Rico Sennrich","submitted_at":"2017-07-07T17:15:27Z","abstract_excerpt":"Automated documentation of programming source code and automated code generation from natural language are challenging tasks of both practical and scientific interest. Progress in these areas has been limited by the low availability of parallel corpora of code and natural language descriptions, which tend to be small and constrained to specific domains.\n  In this work we introduce a large and diverse parallel corpus of a hundred thousands Python functions with their documentation strings (\"docstrings\") generated by scraping open source repositories on GitHub. We describe baseline results for t"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.02275","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:40:42Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"FodmI2I7kKEsZevFhQxvdbYDw/3fPxTyPMpIzIogfuLaCvMeqqUM+wfNldHyquWMm1zRyCiai8kQf7Zepnd3Cg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-06-03T19:59:19.363719Z"},"content_sha256":"b503bf38e9e39eb3d9c799f4a3c818cb340352dbabe0956e7b95147bdd59c1c8","schema_version":"1.0","event_id":"sha256:b503bf38e9e39eb3d9c799f4a3c818cb340352dbabe0956e7b95147bdd59c1c8"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ/bundle.json","state_url":"https://pith.science/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-06-03T19:59:19Z","links":{"resolver":"https://pith.science/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ","bundle":"https://pith.science/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ/bundle.json","state":"https://pith.science/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ/state.json","well_known_bundle":"https://pith.science/.well-known/pith/QF3SZ4VE6UCC2LXXKKVSERLOPZ/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:QF3SZ4VE6UCC2LXXKKVSERLOPZ","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"f3565d75f8221b466a431c8bff537264739074bee5c1a6a0d13c40169ab98de8","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-07T17:15:27Z","title_canon_sha256":"7d4fdafc501a4354cd392248f366e138eed4d263799cf3f8230dd3a41e82b88d"},"schema_version":"1.0","source":{"id":"1707.02275","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1707.02275","created_at":"2026-05-18T00:40:42Z"},{"alias_kind":"arxiv_version","alias_value":"1707.02275v1","created_at":"2026-05-18T00:40:42Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1707.02275","created_at":"2026-05-18T00:40:42Z"},{"alias_kind":"pith_short_12","alias_value":"QF3SZ4VE6UCC","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_16","alias_value":"QF3SZ4VE6UCC2LXX","created_at":"2026-05-18T12:31:37Z"},{"alias_kind":"pith_short_8","alias_value":"QF3SZ4VE","created_at":"2026-05-18T12:31:37Z"}],"graph_snapshots":[{"event_id":"sha256:b503bf38e9e39eb3d9c799f4a3c818cb340352dbabe0956e7b95147bdd59c1c8","target":"graph","created_at":"2026-05-18T00:40:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Automated documentation of programming source code and automated code generation from natural language are challenging tasks of both practical and scientific interest. Progress in these areas has been limited by the low availability of parallel corpora of code and natural language descriptions, which tend to be small and constrained to specific domains.\n  In this work we introduce a large and diverse parallel corpus of a hundred thousands Python functions with their documentation strings (\"docstrings\") generated by scraping open source repositories on GitHub. We describe baseline results for t","authors_text":"Antonio Valerio Miceli Barone, Rico Sennrich","cross_cats":["cs.AI"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-07T17:15:27Z","title":"A parallel corpus of Python functions and documentation strings for automated code documentation and code generation"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1707.02275","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:105bdff7504e5fb03a5dcee191a6a453f32ac7f3a22e6045baa094089d6c4714","target":"record","created_at":"2026-05-18T00:40:42Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"f3565d75f8221b466a431c8bff537264739074bee5c1a6a0d13c40169ab98de8","cross_cats_sorted":["cs.AI"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.CL","submitted_at":"2017-07-07T17:15:27Z","title_canon_sha256":"7d4fdafc501a4354cd392248f366e138eed4d263799cf3f8230dd3a41e82b88d"},"schema_version":"1.0","source":{"id":"1707.02275","kind":"arxiv","version":1}},"canonical_sha256":"81772cf2a4f5042d2ef752ab22456e7e7021724c15caee39254ef71be11715a7","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"81772cf2a4f5042d2ef752ab22456e7e7021724c15caee39254ef71be11715a7","first_computed_at":"2026-05-18T00:40:42.403572Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:40:42.403572Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"zkkOIFCeEXVvvFWEBw9mXW7jTCa2Fxadqrl14xtTeatebTE6bgu3D2SxaFX1owcF1XtkOG6cC/0Azun1GJfmCA==","signature_status":"signed_v1","signed_at":"2026-05-18T00:40:42.404127Z","signed_message":"canonical_sha256_bytes"},"source_id":"1707.02275","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:105bdff7504e5fb03a5dcee191a6a453f32ac7f3a22e6045baa094089d6c4714","sha256:b503bf38e9e39eb3d9c799f4a3c818cb340352dbabe0956e7b95147bdd59c1c8"],"state_sha256":"de021367aefa3dd7b0b3448266b43bd5be3ea1f4cc56a2f2f47aa9b590679a54"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"IOGCSFymGu1xSelR8B4g8aHbHFY5o8D1y2IQIwRHhFvZEsrBkT2cW0Pwgt4KfvrTxbYIOe4+JTEwdgT4+2M3CQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-06-03T19:59:19.365670Z","bundle_sha256":"1ddc7ad47b730bda55cd58cffd8c80fb94427ab43e37d85c536317669e78f3b7"}}