{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2012:IVYTKHLUBDFANJQBJ2E3EJI5O6","short_pith_number":"pith:IVYTKHLU","canonical_record":{"source":{"id":"1206.3278","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2012-06-13T15:42:17Z","cross_cats_sorted":["stat.ME"],"title_canon_sha256":"7f2910b17dc5361ea7d78702f8d7de900e00b6919a5449bc6cbc4fd1bdbece4c","abstract_canon_sha256":"815ce1346b305904666a1db1026de3ae6af1965577c5bdc97bcecd26de4f96e6"},"schema_version":"1.0"},"canonical_sha256":"4571351d7408ca06a6014e89b2251d77af58c76391d925202b9642660a7c3a96","source":{"kind":"arxiv","id":"1206.3278","version":1},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.3278","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"arxiv_version","alias_value":"1206.3278v1","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.3278","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"pith_short_12","alias_value":"IVYTKHLUBDFA","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_16","alias_value":"IVYTKHLUBDFANJQB","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_8","alias_value":"IVYTKHLU","created_at":"2026-05-18T12:27:09Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2012:IVYTKHLUBDFANJQBJ2E3EJI5O6","target":"record","payload":{"canonical_record":{"source":{"id":"1206.3278","kind":"arxiv","version":1},"metadata":{"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2012-06-13T15:42:17Z","cross_cats_sorted":["stat.ME"],"title_canon_sha256":"7f2910b17dc5361ea7d78702f8d7de900e00b6919a5449bc6cbc4fd1bdbece4c","abstract_canon_sha256":"815ce1346b305904666a1db1026de3ae6af1965577c5bdc97bcecd26de4f96e6"},"schema_version":"1.0"},"canonical_sha256":"4571351d7408ca06a6014e89b2251d77af58c76391d925202b9642660a7c3a96","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-18T03:53:30.810208Z","signature_b64":"1LwwSrVTuM+dtT/C6B0NenE5ngIpo1c6+5svQMPA2uP2JeJkU3X81pvEYUUJYWrqDyKz1ZHNRgtH7jp30m0gCg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"4571351d7408ca06a6014e89b2251d77af58c76391d925202b9642660a7c3a96","last_reissued_at":"2026-05-18T03:53:30.809652Z","signature_status":"signed_v1","first_computed_at":"2026-05-18T03:53:30.809652Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"1206.3278","source_version":1,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:53:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"bUj1b6t/A/yJJ71/c/+J6jyaDgS+vudX/onQaZ5nLsCKiSwYeByT3mMKgG4byhupbPl6Uf3hW7ahrkHer28SBw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T23:52:02.086732Z"},"content_sha256":"b307d413b88d0886211e0ccb24a9e6ca9b030a5db0df76e8395414e24e427d1a","schema_version":"1.0","event_id":"sha256:b307d413b88d0886211e0ccb24a9e6ca9b030a5db0df76e8395414e24e427d1a"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2012:IVYTKHLUBDFANJQBJ2E3EJI5O6","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Topic Models Conditioned on Arbitrary Features with Dirichlet-multinomial Regression","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","headline":"","cross_cats":["stat.ME"],"primary_cat":"cs.IR","authors_text":"Andrew McCallum, David Mimno","submitted_at":"2012-06-13T15:42:17Z","abstract_excerpt":"Although fully generative models have been successfully used to model the contents of text documents, they are often awkward to apply to combinations of text data and document metadata. In this paper we propose a Dirichlet-multinomial regression (DMR) topic model that includes a log-linear prior on document-topic distributions that is a function of observed features of the document, such as author, publication venue, references, and dates. We show that by selecting appropriate features, DMR topic models can meet or exceed the performance of several previously published topic models designed fo"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.3278","kind":"arxiv","version":1},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-18T03:53:30Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"9eXLwD/lZkHGCh2JGu8tPihPbw28dVmdx6QM8SR2194khvXLiwf2SmdgOFelq2NlM0kVeykgTAlAWg4yJwTiBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-27T23:52:02.087084Z"},"content_sha256":"292a44e9ddc51222116a501c0ce71a09487febf4a6ad13fb9103ecf029182256","schema_version":"1.0","event_id":"sha256:292a44e9ddc51222116a501c0ce71a09487febf4a6ad13fb9103ecf029182256"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6/bundle.json","state_url":"https://pith.science/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-27T23:52:02Z","links":{"resolver":"https://pith.science/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6","bundle":"https://pith.science/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6/bundle.json","state":"https://pith.science/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6/state.json","well_known_bundle":"https://pith.science/.well-known/pith/IVYTKHLUBDFANJQBJ2E3EJI5O6/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2012:IVYTKHLUBDFANJQBJ2E3EJI5O6","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"815ce1346b305904666a1db1026de3ae6af1965577c5bdc97bcecd26de4f96e6","cross_cats_sorted":["stat.ME"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2012-06-13T15:42:17Z","title_canon_sha256":"7f2910b17dc5361ea7d78702f8d7de900e00b6919a5449bc6cbc4fd1bdbece4c"},"schema_version":"1.0","source":{"id":"1206.3278","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"1206.3278","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"arxiv_version","alias_value":"1206.3278v1","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.1206.3278","created_at":"2026-05-18T03:53:30Z"},{"alias_kind":"pith_short_12","alias_value":"IVYTKHLUBDFA","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_16","alias_value":"IVYTKHLUBDFANJQB","created_at":"2026-05-18T12:27:09Z"},{"alias_kind":"pith_short_8","alias_value":"IVYTKHLU","created_at":"2026-05-18T12:27:09Z"}],"graph_snapshots":[{"event_id":"sha256:292a44e9ddc51222116a501c0ce71a09487febf4a6ad13fb9103ecf029182256","target":"graph","created_at":"2026-05-18T03:53:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"paper":{"abstract_excerpt":"Although fully generative models have been successfully used to model the contents of text documents, they are often awkward to apply to combinations of text data and document metadata. In this paper we propose a Dirichlet-multinomial regression (DMR) topic model that includes a log-linear prior on document-topic distributions that is a function of observed features of the document, such as author, publication venue, references, and dates. We show that by selecting appropriate features, DMR topic models can meet or exceed the performance of several previously published topic models designed fo","authors_text":"Andrew McCallum, David Mimno","cross_cats":["stat.ME"],"headline":"","license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2012-06-13T15:42:17Z","title":"Topic Models Conditioned on Arbitrary Features with Dirichlet-multinomial Regression"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"1206.3278","kind":"arxiv","version":1},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:b307d413b88d0886211e0ccb24a9e6ca9b030a5db0df76e8395414e24e427d1a","target":"record","created_at":"2026-05-18T03:53:30Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"815ce1346b305904666a1db1026de3ae6af1965577c5bdc97bcecd26de4f96e6","cross_cats_sorted":["stat.ME"],"license":"http://arxiv.org/licenses/nonexclusive-distrib/1.0/","primary_cat":"cs.IR","submitted_at":"2012-06-13T15:42:17Z","title_canon_sha256":"7f2910b17dc5361ea7d78702f8d7de900e00b6919a5449bc6cbc4fd1bdbece4c"},"schema_version":"1.0","source":{"id":"1206.3278","kind":"arxiv","version":1}},"canonical_sha256":"4571351d7408ca06a6014e89b2251d77af58c76391d925202b9642660a7c3a96","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"4571351d7408ca06a6014e89b2251d77af58c76391d925202b9642660a7c3a96","first_computed_at":"2026-05-18T03:53:30.809652Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:53:30.809652Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1LwwSrVTuM+dtT/C6B0NenE5ngIpo1c6+5svQMPA2uP2JeJkU3X81pvEYUUJYWrqDyKz1ZHNRgtH7jp30m0gCg==","signature_status":"signed_v1","signed_at":"2026-05-18T03:53:30.810208Z","signed_message":"canonical_sha256_bytes"},"source_id":"1206.3278","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:b307d413b88d0886211e0ccb24a9e6ca9b030a5db0df76e8395414e24e427d1a","sha256:292a44e9ddc51222116a501c0ce71a09487febf4a6ad13fb9103ecf029182256"],"state_sha256":"5ba9688de8a10e1f83f27789575617691eca85de364ea93845530f4ff3fc4c22"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"e/JjIFN4UuASlPB9oV8E6yQIeVRAQ/GP/C7VeYwnRZPySc1e3MCMPSO88dG1ageG14uyhBJftrLb/gx7i6L1Bw==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-27T23:52:02.089293Z","bundle_sha256":"ad99d861722e15abf8b2651a903d6a22d93eae4331cb51b68146832b730b2c73"}}