{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2017:CI542N3ZIEVU23BQV5HIKF4CMK","short_pith_number":"pith:CI542N3Z","canonical_record":{"source":{"id":"1712.03346","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2017-12-09T06:36:17Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"ac3e3372f5cb4483001415c0a603b0b7531b3b142a34284d450913f2f4cca46c","abstract_canon_sha256":"ab44934b8cb0b0496a7543104b17bf01eb58251f6fcc85a5bba1f5dbec486a6c"},"schema_version":"1.0"},"canonical_sha256":"123bcd3779412b4d6c30af4e85178262b7e16f2318413d7683e85e31adfa773f","source":{"kind":"arxiv","id":"1712.03346","version":3},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.03346","created_at":"2026-05-18T00:26:48Z"},{"alias_kind":"arxiv_version","alias_value":"1712.03346v3","created_at":"2026-05-18T00:26:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.03346","created_at":"2026-05-18T00:26:48Z"},{"alias_kind":"pith_short_12","alias_value":"CI542N3ZIEVU","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_16","alias_value":"CI542N3ZIEVU23BQ","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_8","alias_value":"CI542N3Z","created_at":"2026-05-18T12:31:10Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2017:CI542N3ZIEVU23BQV5HIKF4CMK","target":"record","payload":{"canonical_record":{"source":{"id":"1712.03346","kind":"arxiv","version":3},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2017-12-09T06:36:17Z","cross_cats_sorted":["cs.LG"],"title_canon_sha256":"ac3e3372f5cb4483001415c0a603b0b7531b3b142a34284d450913f2f4cca46c","abstract_canon_sha256":"ab44934b8cb0b0496a7543104b17bf01eb58251f6fcc85a5bba1f5dbec486a6c"},"schema_version":"1.0"},"canonical_sha256":"123bcd3779412b4d6c30af4e85178262b7e16f2318413d7683e85e31adfa773f","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T00:26:48.961097Z","signature_b64":"W6zGX/7gSLkTSXifiCUyjFOCPSxP9KghDQsSCf7yvsTg4kJ05uK4YM4DVvpheFUorGSSuOeFTz/b/5m98sMHDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"123bcd3779412b4d6c30af4e85178262b7e16f2318413d7683e85e31adfa773f","last_reissued_at":"2026-05-18T00:26:48.960422Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T00:26:48.960422Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1712.03346","source_version":3,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:26:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"wnQdApfwPuA36cI5NBVB/t0277/5YFHrI2XVz7lUuBQFR4EKNgBQsqWDPKxL/YzGXV8c8G+DBFSdeBZFmTlODw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T12:55:23.106607Z"},"content_sha256":"72bcd89d75c1ab39ec042032c616664f40cd5ac9b59e626e760377f007e66d0b","schema_version":"1.0","event_id":"sha256:72bcd89d75c1ab39ec042032c616664f40cd5ac9b59e626e760377f007e66d0b"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2017:CI542N3ZIEVU23BQV5HIKF4CMK","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Variational auto-encoding of protein sequences","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["cs.LG"],"primary_cat":"q-bio.QM","authors_text":"Eric Kelsic, George M. Church, Martin A. Nowak, Sam Sinai","submitted_at":"2017-12-09T06:36:17Z","abstract_excerpt":"Proteins are responsible for the most diverse set of functions in biology. The ability to extract information from protein sequences and to predict the effects of mutations is extremely valuable in many domains of biology and medicine. However the mapping between protein sequence and function is complex and poorly understood. Here we present an embedding of natural protein sequences using a Variational Auto-Encoder and use it to predict how mutations affect protein function. We use this unsupervised approach to cluster natural variants and learn interactions between sets of positions within a "},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.03346","kind":"arxiv","version":3},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T00:26:48Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"/zrkMwhGNe1WKWmPXre00x9Kqk/zwBeInrLFmrNdHHmJ51XQeVptRSafqXjJ0pnUH3NPaZFjg4wnDe/PnjKZBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T12:55:23.107252Z"},"content_sha256":"b1d0cd73a08735ed785237c19144749848ce8f7c1653d04f74426cd0931e2e26","schema_version":"1.0","event_id":"sha256:b1d0cd73a08735ed785237c19144749848ce8f7c1653d04f74426cd0931e2e26"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/CI542N3ZIEVU23BQV5HIKF4CMK/bundle.json","state_url":"https://pith.science/pith/CI542N3ZIEVU23BQV5HIKF4CMK/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/CI542N3ZIEVU23BQV5HIKF4CMK/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T12:55:23Z","links":{"resolver":"https://pith.science/pith/CI542N3ZIEVU23BQV5HIKF4CMK","bundle":"https://pith.science/pith/CI542N3ZIEVU23BQV5HIKF4CMK/bundle.json","state":"https://pith.science/pith/CI542N3ZIEVU23BQV5HIKF4CMK/state.json","well_known_bundle":"https://pith.science/.well-known/pith/CI542N3ZIEVU23BQV5HIKF4CMK/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2017:CI542N3ZIEVU23BQV5HIKF4CMK","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"ab44934b8cb0b0496a7543104b17bf01eb58251f6fcc85a5bba1f5dbec486a6c","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2017-12-09T06:36:17Z","title_canon_sha256":"ac3e3372f5cb4483001415c0a603b0b7531b3b142a34284d450913f2f4cca46c"},"schema_version":"1.0","source":{"id":"1712.03346","kind":"arxiv","version":3}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1712.03346","created_at":"2026-05-18T00:26:48Z"},{"alias_kind":"arxiv_version","alias_value":"1712.03346v3","created_at":"2026-05-18T00:26:48Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1712.03346","created_at":"2026-05-18T00:26:48Z"},{"alias_kind":"pith_short_12","alias_value":"CI542N3ZIEVU","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_16","alias_value":"CI542N3ZIEVU23BQ","created_at":"2026-05-18T12:31:10Z"},{"alias_kind":"pith_short_8","alias_value":"CI542N3Z","created_at":"2026-05-18T12:31:10Z"}],"graph_snapshots":[{"event_id":"sha256:b1d0cd73a08735ed785237c19144749848ce8f7c1653d04f74426cd0931e2e26","target":"graph","created_at":"2026-05-18T00:26:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Proteins are responsible for the most diverse set of functions in biology. The ability to extract information from protein sequences and to predict the effects of mutations is extremely valuable in many domains of biology and medicine. However the mapping between protein sequence and function is complex and poorly understood. Here we present an embedding of natural protein sequences using a Variational Auto-Encoder and use it to predict how mutations affect protein function. We use this unsupervised approach to cluster natural variants and learn interactions between sets of positions within a ","authors_text":"Eric Kelsic, George M. Church, Martin A. Nowak, Sam Sinai","cross_cats":["cs.LG"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2017-12-09T06:36:17Z","title":"Variational auto-encoding of protein sequences"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1712.03346","kind":"arxiv","version":3},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:72bcd89d75c1ab39ec042032c616664f40cd5ac9b59e626e760377f007e66d0b","target":"record","created_at":"2026-05-18T00:26:48Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"ab44934b8cb0b0496a7543104b17bf01eb58251f6fcc85a5bba1f5dbec486a6c","cross_cats_sorted":["cs.LG"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"q-bio.QM","submitted_at":"2017-12-09T06:36:17Z","title_canon_sha256":"ac3e3372f5cb4483001415c0a603b0b7531b3b142a34284d450913f2f4cca46c"},"schema_version":"1.0","source":{"id":"1712.03346","kind":"arxiv","version":3}},"canonical_sha256":"123bcd3779412b4d6c30af4e85178262b7e16f2318413d7683e85e31adfa773f","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"123bcd3779412b4d6c30af4e85178262b7e16f2318413d7683e85e31adfa773f","first_computed_at":"2026-05-18T00:26:48.960422Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T00:26:48.960422Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"W6zGX/7gSLkTSXifiCUyjFOCPSxP9KghDQsSCf7yvsTg4kJ05uK4YM4DVvpheFUorGSSuOeFTz/b/5m98sMHDg==","signature_status":"signed_v1","signed_at":"2026-05-18T00:26:48.961097Z","signed_message":"canonical_sha256_bytes"},"source_id":"1712.03346","source_kind":"arxiv","source_version":3}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:72bcd89d75c1ab39ec042032c616664f40cd5ac9b59e626e760377f007e66d0b","sha256:b1d0cd73a08735ed785237c19144749848ce8f7c1653d04f74426cd0931e2e26"],"state_sha256":"4289c8aae1de760363c6498fbbe51c0becdb37750036cdd8f8bf3a75ad0614ae"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bWhPCs1fClTKfAOcNBLiOPpOCAVpOFkQZTKwZBVhVshmcM5MTXFWC0Jas1lqIBHwkxYk1SJXHNBG5+2tP2cBBQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T12:55:23.111152Z","bundle_sha256":"b00a80dd8785cf6c8a00e37cca315e4acde9ca92d1635d024d101fd63378db5b"}}