{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:KZHB2ET3VNDXKCHS5X3HSYGHN4","short_pith_number":"pith:KZHB2ET3","canonical_record":{"source":{"id":"2507.07644","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2025-07-10T11:16:48Z","cross_cats_sorted":[],"title_canon_sha256":"cd39c66341415d66368ad08a0f71f10739743da69bc123a9dba3bfc17176b014","abstract_canon_sha256":"1114e04f96c576a7a68e67d00c425c7b992c9fffc7f5b9a6af5d495a514a6454"},"schema_version":"1.0"},"canonical_sha256":"564e1d127bab477508f2edf67960c76f275fd459297907afbf54b143a629a1c6","source":{"kind":"arxiv","id":"2507.07644","version":4},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.07644","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"arxiv_version","alias_value":"2507.07644v4","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.07644","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"pith_short_12","alias_value":"KZHB2ET3VNDX","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"pith_short_16","alias_value":"KZHB2ET3VNDXKCHS","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"pith_short_8","alias_value":"KZHB2ET3","created_at":"2026-05-26T02:03:52Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:KZHB2ET3VNDXKCHS5X3HSYGHN4","target":"record","payload":{"canonical_record":{"source":{"id":"2507.07644","kind":"arxiv","version":4},"metadata":{"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2025-07-10T11:16:48Z","cross_cats_sorted":[],"title_canon_sha256":"cd39c66341415d66368ad08a0f71f10739743da69bc123a9dba3bfc17176b014","abstract_canon_sha256":"1114e04f96c576a7a68e67d00c425c7b992c9fffc7f5b9a6af5d495a514a6454"},"schema_version":"1.0"},"canonical_sha256":"564e1d127bab477508f2edf67960c76f275fd459297907afbf54b143a629a1c6","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-26T02:03:52.607824Z","signature_b64":"1wqEcDrodvhAW/4Jl+AZdDcmV+emb6EvtvDaZjXDm43yLzp6Gsf200TQUbcae9PqfoxyYl/ENFDDsbbsFIesBw==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"564e1d127bab477508f2edf67960c76f275fd459297907afbf54b143a629a1c6","last_reissued_at":"2026-05-26T02:03:52.606982Z","signature_status":"signed_v1","first_computed_at":"2026-05-26T02:03:52.606982Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2507.07644","source_version":4,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T02:03:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"q0LQ+6ON8C04/nW9TrGZmbQWY62XxsFiEoJh7p0O45d2wYeretOaM4Lo5FzI/nCIElbZUdRcOqdo3bx/Aw+oBQ==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T09:48:49.618506Z"},"content_sha256":"705324b553286a31998022a2f6123e0dc597558fbf17a2bd44caf822448b203f","schema_version":"1.0","event_id":"sha256:705324b553286a31998022a2f6123e0dc597558fbf17a2bd44caf822448b203f"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:KZHB2ET3VNDXKCHS5X3HSYGHN4","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"FloorplanQA: A Benchmark for Spatial Reasoning in LLMs using Structured Representations","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","headline":"","cross_cats":[],"primary_cat":"cs.AI","authors_text":"Abdelrahman Eldesokey, Bernard Ghanem, Fedor Rodionov, John Femiani, Michael Birsak, Peter Wonka","submitted_at":"2025-07-10T11:16:48Z","abstract_excerpt":"We introduce FloorplanQA, a diagnostic benchmark for evaluating spatial reasoning in large language models (LLMs). FloorplanQA is grounded in structured representations of indoor scenes, such as (e.g., kitchens, living rooms, bedrooms, bathrooms, and others), encoded symbolically in JSON or XML layouts. The benchmark covers core spatial tasks, including distance measurement, visibility, path finding, and object placement within constrained spaces. Our results across a variety of frontier open-source and commercial LLMs reveal that while models may succeed in shallow queries, they often fail to"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.07644","kind":"arxiv","version":4},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2507.07644/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":null},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-26T02:03:52Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"tM6SNoyrplQJsu4+00WyAgmFFy13soZxRpc3yrbxTZmaWgA6JoPHPMKDmoU8FBl/meHrqImqcxHTv+8vSy+bBg==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-28T09:48:49.618886Z"},"content_sha256":"f749d6e8eb2233025fe08867e6a0f126fb6cb6fe004b8ad0c21528c49fb40e98","schema_version":"1.0","event_id":"sha256:f749d6e8eb2233025fe08867e6a0f126fb6cb6fe004b8ad0c21528c49fb40e98"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4/bundle.json","state_url":"https://pith.science/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-28T09:48:49Z","links":{"resolver":"https://pith.science/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4","bundle":"https://pith.science/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4/bundle.json","state":"https://pith.science/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4/state.json","well_known_bundle":"https://pith.science/.well-known/pith/KZHB2ET3VNDXKCHS5X3HSYGHN4/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:KZHB2ET3VNDXKCHS5X3HSYGHN4","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"1114e04f96c576a7a68e67d00c425c7b992c9fffc7f5b9a6af5d495a514a6454","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2025-07-10T11:16:48Z","title_canon_sha256":"cd39c66341415d66368ad08a0f71f10739743da69bc123a9dba3bfc17176b014"},"schema_version":"1.0","source":{"id":"2507.07644","kind":"arxiv","version":4}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2507.07644","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"arxiv_version","alias_value":"2507.07644v4","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2507.07644","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"pith_short_12","alias_value":"KZHB2ET3VNDX","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"pith_short_16","alias_value":"KZHB2ET3VNDXKCHS","created_at":"2026-05-26T02:03:52Z"},{"alias_kind":"pith_short_8","alias_value":"KZHB2ET3","created_at":"2026-05-26T02:03:52Z"}],"graph_snapshots":[{"event_id":"sha256:f749d6e8eb2233025fe08867e6a0f126fb6cb6fe004b8ad0c21528c49fb40e98","target":"graph","created_at":"2026-05-26T02:03:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2507.07644/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"We introduce FloorplanQA, a diagnostic benchmark for evaluating spatial reasoning in large language models (LLMs). FloorplanQA is grounded in structured representations of indoor scenes, such as (e.g., kitchens, living rooms, bedrooms, bathrooms, and others), encoded symbolically in JSON or XML layouts. The benchmark covers core spatial tasks, including distance measurement, visibility, path finding, and object placement within constrained spaces. Our results across a variety of frontier open-source and commercial LLMs reveal that while models may succeed in shallow queries, they often fail to","authors_text":"Abdelrahman Eldesokey, Bernard Ghanem, Fedor Rodionov, John Femiani, Michael Birsak, Peter Wonka","cross_cats":[],"headline":"","license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2025-07-10T11:16:48Z","title":"FloorplanQA: A Benchmark for Spatial Reasoning in LLMs using Structured Representations"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2507.07644","kind":"arxiv","version":4},"verdict":{"created_at":null,"id":null,"model_set":{},"one_line_summary":"","pipeline_version":null,"pith_extraction_headline":"","strongest_claim":"","weakest_assumption":""}},"verdict_id":null}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:705324b553286a31998022a2f6123e0dc597558fbf17a2bd44caf822448b203f","target":"record","created_at":"2026-05-26T02:03:52Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"1114e04f96c576a7a68e67d00c425c7b992c9fffc7f5b9a6af5d495a514a6454","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by-nc-sa/4.0/","primary_cat":"cs.AI","submitted_at":"2025-07-10T11:16:48Z","title_canon_sha256":"cd39c66341415d66368ad08a0f71f10739743da69bc123a9dba3bfc17176b014"},"schema_version":"1.0","source":{"id":"2507.07644","kind":"arxiv","version":4}},"canonical_sha256":"564e1d127bab477508f2edf67960c76f275fd459297907afbf54b143a629a1c6","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"564e1d127bab477508f2edf67960c76f275fd459297907afbf54b143a629a1c6","first_computed_at":"2026-05-26T02:03:52.606982Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-26T02:03:52.606982Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"1wqEcDrodvhAW/4Jl+AZdDcmV+emb6EvtvDaZjXDm43yLzp6Gsf200TQUbcae9PqfoxyYl/ENFDDsbbsFIesBw==","signature_status":"signed_v1","signed_at":"2026-05-26T02:03:52.607824Z","signed_message":"canonical_sha256_bytes"},"source_id":"2507.07644","source_kind":"arxiv","source_version":4}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:705324b553286a31998022a2f6123e0dc597558fbf17a2bd44caf822448b203f","sha256:f749d6e8eb2233025fe08867e6a0f126fb6cb6fe004b8ad0c21528c49fb40e98"],"state_sha256":"e601bf3d59f80a94673e955b573fefa1223baf85404f3c6c388e4b899a6e1943"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"TQ8YxWK1L0p3iveBJoL0OaHjAUif/Z2iXUnLs1ETUH90ezh6D/x52ks4kmy0meBXYOcvi4vAu+yi4fWc8vAhDg==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-28T09:48:49.620907Z","bundle_sha256":"7370019d3b15b88338ba09b98894823c77c09887b1f49ee278edeba7f79aa0f8"}}