{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:FWUC24DSCS5W4EW3BQCE2FKWFQ","short_pith_number":"pith:FWUC24DS","schema_version":"1.0","canonical_sha256":"2da82d707214bb6e12db0c044d15562c04dc1875b7f1195661121332e9d3100d","source":{"kind":"arxiv","id":"2605.16832","version":1},"attestation_state":"computed","paper":{"title":"Coarse Semantic Injection for LLM-Conditioned Structured Indoor Prediction","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"Appending a coarse four-group semantic color code to raw point attributes before tokenization improves LLM-based structured indoor prediction while leaving the decoder unchanged.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Jinjia Zhou, Shuliang Zhu, Tomiwa Adey","submitted_at":"2026-05-16T06:28:23Z","abstract_excerpt":"Large language models (LLMs) have recently been used as structured decoders for indoor understanding from 3D point-token inputs. However, point cloud encoders often under-represent thin structural elements such as doors and windows after voxelization and sparse pooling, and may miss individual furniture instances in cluttered scenes. We propose an interface-preserving semantic augmentation for LLM-conditioned structured decoding. The key idea is to associate semantic evidence with the point-cloud representation, reduce it to a coarse four-group code (furniture, walls, openings, and others), an"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":true,"formal_links_present":true},"canonical_record":{"source":{"id":"2605.16832","kind":"arxiv","version":1},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","primary_cat":"cs.CV","submitted_at":"2026-05-16T06:28:23Z","cross_cats_sorted":[],"title_canon_sha256":"1fe06f09205860adb2554dde281a7774eac692126f075d5c1891217e1a6ee109","abstract_canon_sha256":"9e7c972760fa532a9f5c6ecb62c24d81ef8430603f01148371aba0795b810e97"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-20T00:03:25.097556Z","signature_b64":"gIZBnAQCjTU/nWIzp54hlB2IW/mX5VGujlDJYAsXkTbpf2/JH9PpiOwRlBcyyGHNsfhlpzZQOXRQU0Q/cJv4CQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"2da82d707214bb6e12db0c044d15562c04dc1875b7f1195661121332e9d3100d","last_reissued_at":"2026-05-20T00:03:25.096709Z","signature_status":"signed_v1","first_computed_at":"2026-05-20T00:03:25.096709Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"Coarse Semantic Injection for LLM-Conditioned Structured Indoor Prediction","license":"http://creativecommons.org/licenses/by-nc-nd/4.0/","headline":"Appending a coarse four-group semantic color code to raw point attributes before tokenization improves LLM-based structured indoor prediction while leaving the decoder unchanged.","cross_cats":[],"primary_cat":"cs.CV","authors_text":"Jinjia Zhou, Shuliang Zhu, Tomiwa Adey","submitted_at":"2026-05-16T06:28:23Z","abstract_excerpt":"Large language models (LLMs) have recently been used as structured decoders for indoor understanding from 3D point-token inputs. However, point cloud encoders often under-represent thin structural elements such as doors and windows after voxelization and sparse pooling, and may miss individual furniture instances in cluttered scenes. We propose an interface-preserving semantic augmentation for LLM-conditioned structured decoding. The key idea is to associate semantic evidence with the point-cloud representation, reduce it to a coarse four-group code (furniture, walls, openings, and others), an"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"The semantic color code is appended to the original raw point attributes before tokenization, so geometry and semantics share the same sparse tokenization path while the downstream language model decoder and output serialization remain unchanged.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"That reliable coarse semantic evidence (furniture/walls/openings/others) can be obtained from RGB or other sources and injected without introducing errors that outweigh the benefits after sparse pooling and LLM decoding.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"Coarse four-group semantic color coding (RGBB) appended to point clouds before tokenization improves LLM-based structured indoor prediction on Structured3D, SpatialLM, and ARKitScenes, especially for openings and furniture instances.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Appending a coarse four-group semantic color code to raw point attributes before tokenization improves LLM-based structured indoor prediction while leaving the decoder unchanged.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"cfc763c03e1c49c0c265c97e15cb8e43bfe0fb49daa97cfee8f41bacbc0cc7e9"},"source":{"id":"2605.16832","kind":"arxiv","version":1},"verdict":{"id":"3bf7dbd4-64af-4fe2-a144-74a11cc3e873","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-19T20:49:53.481134Z","strongest_claim":"The semantic color code is appended to the original raw point attributes before tokenization, so geometry and semantics share the same sparse tokenization path while the downstream language model decoder and output serialization remain unchanged.","one_line_summary":"Coarse four-group semantic color coding (RGBB) appended to point clouds before tokenization improves LLM-based structured indoor prediction on Structured3D, SpatialLM, and ARKitScenes, especially for openings and furniture instances.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"That reliable coarse semantic evidence (furniture/walls/openings/others) can be obtained from RGB or other sources and injected without introducing errors that outweigh the benefits after sparse pooling and LLM decoding.","pith_extraction_headline":"Appending a coarse four-group semantic color code to raw point attributes before tokenization improves LLM-based structured indoor prediction while leaving the decoder unchanged."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2605.16832/integrity.json","findings":[],"available":true,"detectors_run":[{"name":"doi_compliance","ran_at":"2026-05-19T21:01:25.218380Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"doi_title_agreement","ran_at":"2026-05-19T21:01:19.249799Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"claim_evidence","ran_at":"2026-05-19T19:01:56.256913Z","status":"completed","version":"1.0.0","findings_count":0},{"name":"ai_meta_artifact","ran_at":"2026-05-19T18:33:26.400283Z","status":"skipped","version":"1.0.0","findings_count":0}],"snapshot_sha256":"eba3a408f250375cc5ad72c2efe4d82539400ad3c0252729fc85471ae45dbfb6"},"references":{"count":48,"sample":[{"doi":"","year":null,"title":"Proceedings of the IEEE conference on computer vision and pattern recognition , pages=","work_id":"99192ff0-9f38-46e0-bb3f-6c1128b3e8d7","ref_index":1,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Wang, Shuzhe and Leroy, Vincent and Cabon, Yohann and Chidlovskii, Boris and Revaud, Jerome , booktitle=","work_id":"fa252e6a-4b80-41fb-a418-d9accd6439a1","ref_index":2,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":2024,"title":"Grounding image matching in","work_id":"75ce4ad8-18fa-4e4a-99a4-f8cbf25209fb","ref_index":3,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Wang, Jianyuan and Chen, Minghao and Karaev, Nikita and Vedaldi, Andrea and Rupprecht, Christian and Novotny, David , booktitle=","work_id":"52b27afb-5c4d-4f61-9560-2b628ffc74c3","ref_index":4,"cited_arxiv_id":"","is_internal_anchor":false},{"doi":"","year":null,"title":"Nie, Yinyu and Han, Xiaoguang and Guo, Shihui and Zheng, Yujian and Chang, Jian and Zhang, Jian Jun , booktitle=","work_id":"965ec94c-668e-47b3-8744-1b0e319834f0","ref_index":5,"cited_arxiv_id":"","is_internal_anchor":false}],"resolved_work":48,"snapshot_sha256":"560638369b562a81b497c3c7677bf52b0e92b983db740f43a932631715e7dfdc","internal_anchors":0},"formal_canon":{"evidence_count":2,"snapshot_sha256":"f8bc70b0537a77d1324269cc643f6d61e37977ea6b9ddbdbaeae882acf18be42"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2605.16832","created_at":"2026-05-20T00:03:25.096863+00:00"},{"alias_kind":"arxiv_version","alias_value":"2605.16832v1","created_at":"2026-05-20T00:03:25.096863+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2605.16832","created_at":"2026-05-20T00:03:25.096863+00:00"},{"alias_kind":"pith_short_12","alias_value":"FWUC24DSCS5W","created_at":"2026-05-20T00:03:25.096863+00:00"},{"alias_kind":"pith_short_16","alias_value":"FWUC24DSCS5W4EW3","created_at":"2026-05-20T00:03:25.096863+00:00"},{"alias_kind":"pith_short_8","alias_value":"FWUC24DS","created_at":"2026-05-20T00:03:25.096863+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":2,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ","json":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ.json","graph_json":"https://pith.science/api/pith-number/FWUC24DSCS5W4EW3BQCE2FKWFQ/graph.json","events_json":"https://pith.science/api/pith-number/FWUC24DSCS5W4EW3BQCE2FKWFQ/events.json","paper":"https://pith.science/paper/FWUC24DS"},"agent_actions":{"view_html":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ","download_json":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ.json","view_paper":"https://pith.science/paper/FWUC24DS","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2605.16832&json=true","fetch_graph":"https://pith.science/api/pith-number/FWUC24DSCS5W4EW3BQCE2FKWFQ/graph.json","fetch_events":"https://pith.science/api/pith-number/FWUC24DSCS5W4EW3BQCE2FKWFQ/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ/action/timestamp_anchor","attest_storage":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ/action/storage_attestation","attest_author":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ/action/author_attestation","sign_citation":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ/action/citation_signature","submit_replication":"https://pith.science/pith/FWUC24DSCS5W4EW3BQCE2FKWFQ/action/replication_record"}},"created_at":"2026-05-20T00:03:25.096863+00:00","updated_at":"2026-05-20T00:03:25.096863+00:00"}