{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2026:Z3NS6PQ4EMUPIP33DE3W6SNWSY","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"539004f8e4b2819ae5dc8db60d6ad72631d6c8b0284a598bb0f637c45dbe21a0","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T03:52:34Z","title_canon_sha256":"c1d254205fa83bcaff477e1e07f86e19849fc4865f657fcf15a700888f49667d"},"schema_version":"1.0","source":{"id":"2605.12961","kind":"arxiv","version":1}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2605.12961","created_at":"2026-05-18T03:09:09Z"},{"alias_kind":"arxiv_version","alias_value":"2605.12961v1","created_at":"2026-05-18T03:09:09Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.12961","created_at":"2026-05-18T03:09:09Z"},{"alias_kind":"pith_short_12","alias_value":"Z3NS6PQ4EMUP","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_16","alias_value":"Z3NS6PQ4EMUPIP33","created_at":"2026-05-18T12:33:37Z"},{"alias_kind":"pith_short_8","alias_value":"Z3NS6PQ4","created_at":"2026-05-18T12:33:37Z"}],"graph_snapshots":[{"event_id":"sha256:99438bad66c4bc078eb2eacfb3bfaf77aa968e2b28d2021215ff4a8b7bc77d9d","target":"graph","created_at":"2026-05-18T03:09:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"Comparative experiments demonstrate that GSEC outperforms 18 state-of-the-art methods across six benchmark datasets, while further analysis confirms its effectiveness in simultaneously reducing both bias and variance."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"That semantic descriptions generated by current multimodal LLMs supply unbiased, task-adaptive prior knowledge that improves clustering more reliably than matching against predefined vocabularies, and that the bi-layer ensemble reduces variance without introducing new systematic errors."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"GSEC uses MLLM-generated semantic guidance and bi-layer ensemble learning to reduce bias and variance, outperforming 18 prior methods on six image clustering benchmarks."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"GSEC generates adaptive semantic descriptions with multimodal LLMs and applies a bi-layer ensemble to reduce both bias and variance in image clustering."}],"snapshot_sha256":"2b867d97622cd95fdd01bdd2331f86ec30f9936e6335107b9b3ee72215602d01"},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f612ec12d37662a6d2c38ddd4e8ab225b38ffcfb80ae6e53d5baf665d6a399e9"},"paper":{"abstract_excerpt":"Image clustering aims to partition unlabeled image datasets into distinct groups. A core aspect of this task is constructing and leveraging prior knowledge to guide the clustering process. Recent approaches introduce semantic descriptions as prior information, most of which typically relying on matching-based techniques with predefined vocabularies. However, the limited matching space restricts their adaptability to downstream clustering tasks. Moreover, these methods primarily focus on reducing bias to improve performance, frequently overlooking the importance of variance reduction. To addres","authors_text":"Feijiang Li, Jieting Wang, Liang Du, Saixiong Liu, Zhenxiong Li, Zizheng Jiu","cross_cats":["cs.LG"],"headline":"GSEC generates adaptive semantic descriptions with multimodal LLMs and applies a bi-layer ensemble to reduce both bias and variance in image clustering.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T03:52:34Z","title":"Reducing Bias and Variance: Generative Semantic Guidance and Bi-Layer Ensemble for Image Clustering"},"references":{"count":39,"internal_anchors":1,"resolved_work":39,"sample":[{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":1,"title":"Food-101–mining discriminative com- ponents with random forests","work_id":"c9632dd1-869e-4082-b235-5fb946af4c76","year":2014},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":2,"title":"Bagging predictors.Machine learning, 24(2):123–140,","work_id":"7ef0e147-29f3-46ec-ab7c-37bbfe1c00c0","year":1996},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":3,"title":"Random forests.Machine learning, 45(1):5–32,","work_id":"7d32936a-5fc7-4005-a3d3-05307cd394f7","year":2001},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":4,"title":"Semantic-enhanced im- age clustering","work_id":"032a7f2a-4305-4e67-907e-59401e49e072","year":2023},{"cited_arxiv_id":"","doi":"","is_internal_anchor":false,"ref_index":5,"title":"Deep clustering for unsupervised learning of visual features","work_id":"3f6fc355-cb0d-41f9-a910-2dfaa948c4c1","year":2018}],"snapshot_sha256":"f3d031ac090ad157b0415aed2677924b5b0d6f494b5010d4b7c88c6d49e4b61b"},"source":{"id":"2605.12961","kind":"arxiv","version":1},"verdict":{"created_at":"2026-05-14T19:29:45.737202Z","id":"f4711475-5710-4c64-8e41-8ebf37a25f9f","model_set":{"reader":"grok-4.3"},"one_line_summary":"GSEC uses MLLM-generated semantic guidance and bi-layer ensemble learning to reduce bias and variance, outperforming 18 prior methods on six image clustering benchmarks.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"GSEC generates adaptive semantic descriptions with multimodal LLMs and applies a bi-layer ensemble to reduce both bias and variance in image clustering.","strongest_claim":"Comparative experiments demonstrate that GSEC outperforms 18 state-of-the-art methods across six benchmark datasets, while further analysis confirms its effectiveness in simultaneously reducing both bias and variance.","weakest_assumption":"That semantic descriptions generated by current multimodal LLMs supply unbiased, task-adaptive prior knowledge that improves clustering more reliably than matching against predefined vocabularies, and that the bi-layer ensemble reduces variance without introducing new systematic errors."}},"verdict_id":"f4711475-5710-4c64-8e41-8ebf37a25f9f"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:30283206adbb8df643a1b316826957cd9552efca9720d27bce92e4bb1848d022","target":"record","created_at":"2026-05-18T03:09:09Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"539004f8e4b2819ae5dc8db60d6ad72631d6c8b0284a598bb0f637c45dbe21a0","cross_cats_sorted":["cs.LG"],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-13T03:52:34Z","title_canon_sha256":"c1d254205fa83bcaff477e1e07f86e19849fc4865f657fcf15a700888f49667d"},"schema_version":"1.0","source":{"id":"2605.12961","kind":"arxiv","version":1}},"canonical_sha256":"cedb2f3e1c2328f43f7b19376f49b69638538b2e8c7f5ee3c0aef7fd71355f47","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"cedb2f3e1c2328f43f7b19376f49b69638538b2e8c7f5ee3c0aef7fd71355f47","first_computed_at":"2026-05-18T03:09:09.162903Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-18T03:09:09.162903Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"5N2b1AbYTWSvjytkLJ2SlPwvAo1uuYOTw7v3m6IJTvBn2AG/xam1Lut/t4i2G7WuB1CHl4sHZ3dOqlqza2USCw==","signature_status":"signed_v1","signed_at":"2026-05-18T03:09:09.163556Z","signed_message":"canonical_sha256_bytes"},"source_id":"2605.12961","source_kind":"arxiv","source_version":1}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:30283206adbb8df643a1b316826957cd9552efca9720d27bce92e4bb1848d022","sha256:99438bad66c4bc078eb2eacfb3bfaf77aa968e2b28d2021215ff4a8b7bc77d9d"],"state_sha256":"b623edaaf67854d25d6b2abd69eaa6722acb099a1ece408ca038af5c8bed038e"}