{"bundle_type":"pith_open_graph_bundle","bundle_version":"1.0","pith_number":"pith:2025:UVBEQKV3TMV4RHGB7ZAS4EUW2I","short_pith_number":"pith:UVBEQKV3","canonical_record":{"source":{"id":"2508.15503","kind":"arxiv","version":6},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2025-08-21T12:30:30Z","cross_cats_sorted":[],"title_canon_sha256":"e0309463248c4b33dc5e30dc42b44758a0eef593f3d393d909aca9647c5e61ed","abstract_canon_sha256":"8dfd1ca1acf1d61e1b26adcb9bc4f84852bcfbebced2dc224ab91ec70d040fb3"},"schema_version":"1.0"},"canonical_sha256":"a542482abb9b2bc89cc1fe412e1296d2142692dcd147e588f2f07e6db6ae6680","source":{"kind":"arxiv","id":"2508.15503","version":6},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.15503","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"arxiv_version","alias_value":"2508.15503v6","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.15503","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"pith_short_12","alias_value":"UVBEQKV3TMV4","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"pith_short_16","alias_value":"UVBEQKV3TMV4RHGB","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"pith_short_8","alias_value":"UVBEQKV3","created_at":"2026-05-25T02:01:06Z"}],"events":[{"event_type":"record_created","subject_pith_number":"pith:2025:UVBEQKV3TMV4RHGB7ZAS4EUW2I","target":"record","payload":{"canonical_record":{"source":{"id":"2508.15503","kind":"arxiv","version":6},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2025-08-21T12:30:30Z","cross_cats_sorted":[],"title_canon_sha256":"e0309463248c4b33dc5e30dc42b44758a0eef593f3d393d909aca9647c5e61ed","abstract_canon_sha256":"8dfd1ca1acf1d61e1b26adcb9bc4f84852bcfbebced2dc224ab91ec70d040fb3"},"schema_version":"1.0"},"canonical_sha256":"a542482abb9b2bc89cc1fe412e1296d2142692dcd147e588f2f07e6db6ae6680","receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-05-25T02:01:06.476576Z","signature_b64":"7dmoh3BEBh+Id5lvO4glXzcAYrj4NAPerKPKiJUmBT2HEpGeQ2vJhsTcT9MGWvlmghmAIVWHn/llROsFBuZqDg==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"a542482abb9b2bc89cc1fe412e1296d2142692dcd147e588f2f07e6db6ae6680","last_reissued_at":"2026-05-25T02:01:06.475903Z","signature_status":"signed_v1","first_computed_at":"2026-05-25T02:01:06.475903Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"source_kind":"arxiv","source_id":"2508.15503","source_version":6,"attestation_state":"computed"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"mVr72/kAE8EWGchGCjsYG+n8dK7dlNNNox5Xi1nyxxeuXyKW7ojkUCvrDc+YHq7JJMEXmO5mRTxm6VKxMZQbDw==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T21:37:22.516853Z"},"content_sha256":"63da39ad6435d12da543e7e52f574a6714d545bf81aeed81e8ed54c568e2afc2","schema_version":"1.0","event_id":"sha256:63da39ad6435d12da543e7e52f574a6714d545bf81aeed81e8ed54c568e2afc2"},{"event_type":"graph_snapshot","subject_pith_number":"pith:2025:UVBEQKV3TMV4RHGB7ZAS4EUW2I","target":"graph","payload":{"graph_snapshot":{"paper":{"title":"Guidelines for Empirical Studies in Software Engineering involving Large Language Models","license":"http://creativecommons.org/licenses/by/4.0/","headline":"Guidelines organize LLM use in software engineering studies into seven types and provide eight reporting rules to boost reproducibility.","cross_cats":[],"primary_cat":"cs.SE","authors_text":"Brian Fitzgerald, Chetan Arora, Christoph Treude, Chunyang Chen, Cristina Martinez Montes, Daniel Russo, Davide Falessi, Davide Fucci, Fabio Calefato, Florian Angermeir, Junda He, Lukas B\\\"ohme, Lutz Prechelt, Marcos Kalinowski, Marvin Mu\\~noz Bar\\'on, Mircea Lungu, Neil Ernst, Paul Ralph, Rijnard van Tonder, Sebastian Baltes, Stefano Lambiase, Stefan Wagner","submitted_at":"2025-08-21T12:30:30Z","abstract_excerpt":"Large Language Models (LLMs) are widely used in software engineering (SE) research and practice, yet their non-determinism, opaque training data, and rapidly evolving models threaten the reproducibility and replicability of empirical studies. We address this challenge through a collaborative effort of 22 researchers, presenting a taxonomy of seven study types that organizes how LLMs are used in SE research, together with eight guidelines for designing and reporting such studies. Each guideline distinguishes requirements (must) from recommended practices (should) and is contextualized by the st"},"claims":{"count":4,"items":[{"kind":"strongest_claim","text":"We address this challenge through a collaborative effort of 22 researchers, presenting a taxonomy of seven study types that organizes how LLMs are used in SE research, together with eight guidelines for designing and reporting such studies.","source":"verdict.strongest_claim","status":"machine_extracted","claim_id":"C1","attestation":"unclaimed"},{"kind":"weakest_assumption","text":"The guidelines assume that declaring usage, reporting versions and prompts, human validation, and open baselines will sufficiently mitigate threats from non-determinism and opacity, without the paper providing new empirical evidence that these practices improve reproducibility outcomes.","source":"verdict.weakest_assumption","status":"machine_extracted","claim_id":"C2","attestation":"unclaimed"},{"kind":"one_line_summary","text":"A group of 22 researchers proposes seven study types and eight guidelines for empirical software engineering studies involving LLMs to enhance reproducibility and replicability.","source":"verdict.one_line_summary","status":"machine_extracted","claim_id":"C3","attestation":"unclaimed"},{"kind":"headline","text":"Guidelines organize LLM use in software engineering studies into seven types and provide eight reporting rules to boost reproducibility.","source":"verdict.pith_extraction.headline","status":"machine_extracted","claim_id":"C4","attestation":"unclaimed"}],"snapshot_sha256":"82e2a417d9b59e55b7b996338458f121e52bd03a45ab59211393066883505920"},"source":{"id":"2508.15503","kind":"arxiv","version":6},"verdict":{"id":"8feff219-60c3-4aa2-94f5-663d09b1c494","model_set":{"reader":"grok-4.3"},"created_at":"2026-05-18T21:59:19.130773Z","strongest_claim":"We address this challenge through a collaborative effort of 22 researchers, presenting a taxonomy of seven study types that organizes how LLMs are used in SE research, together with eight guidelines for designing and reporting such studies.","one_line_summary":"A group of 22 researchers proposes seven study types and eight guidelines for empirical software engineering studies involving LLMs to enhance reproducibility and replicability.","pipeline_version":"pith-pipeline@v0.9.0","weakest_assumption":"The guidelines assume that declaring usage, reporting versions and prompts, human validation, and open baselines will sufficiently mitigate threats from non-determinism and opacity, without the paper providing new empirical evidence that these practices improve reproducibility outcomes.","pith_extraction_headline":"Guidelines organize LLM use in software engineering studies into seven types and provide eight reporting rules to boost reproducibility."},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2508.15503/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":1,"snapshot_sha256":"d7bf83a3d887ead10a6fa2da351533223e91333ebd9ea9eb41dc3048580fa39d"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"verdict_id":"8feff219-60c3-4aa2-94f5-663d09b1c494"},"signer":{"signer_id":"pith.science","signer_type":"pith_registry","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"created_at":"2026-05-25T02:01:06Z","supersedes":[],"prev_event":null,"signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"izcHa9806iJB3aiQHUBiuQEZaZUXl4kOomIgBuinzwImHY8eX5AJfzJUTeFD+//OTE5Ycm8HpN/LdQGPyK1RBA==","signed_message":"open_graph_event_sha256_bytes","signed_at":"2026-05-30T21:37:22.517820Z"},"content_sha256":"ae82c22092bfe37df3c0a5f671ce5e8d252ab1123af9960cb80a1fab8b5ccd67","schema_version":"1.0","event_id":"sha256:ae82c22092bfe37df3c0a5f671ce5e8d252ab1123af9960cb80a1fab8b5ccd67"}],"timestamp_proofs":[],"mirror_hints":[{"mirror_type":"https","name":"Pith Resolver","base_url":"https://pith.science","bundle_url":"https://pith.science/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I/bundle.json","state_url":"https://pith.science/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I/state.json","well_known_bundle_url":"https://pith.science/.well-known/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I/bundle.json","status":"primary"}],"public_keys":[{"key_id":"pith-v1-2026-05","algorithm":"ed25519","format":"raw","public_key_b64":"stVStoiQhXFxp4s2pdzPNoqVNBMojDU/fJ2db5S3CbM=","public_key_hex":"b2d552b68890857171a78b36a5dccf368a953413288c353f7c9d9d6f94b709b3","fingerprint_sha256_b32_first128bits":"RVFV5Z2OI2J3ZUO7ERDEBCYNKS","fingerprint_sha256_hex":"8d4b5ee74e4693bcd1df2446408b0d54","rotates_at":null,"url":"https://pith.science/pith-signing-key.json","notes":"Pith uses this Ed25519 key to sign canonical record SHA-256 digests. Verify with: ed25519_verify(public_key, message=canonical_sha256_bytes, signature=base64decode(signature_b64))."}],"merge_version":"pith-open-graph-merge-v1","built_at":"2026-05-30T21:37:22Z","links":{"resolver":"https://pith.science/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I","bundle":"https://pith.science/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I/bundle.json","state":"https://pith.science/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I/state.json","well_known_bundle":"https://pith.science/.well-known/pith/UVBEQKV3TMV4RHGB7ZAS4EUW2I/bundle.json"},"state":{"state_type":"pith_open_graph_state","state_version":"1.0","pith_number":"pith:2025:UVBEQKV3TMV4RHGB7ZAS4EUW2I","merge_version":"pith-open-graph-merge-v1","event_count":2,"valid_event_count":2,"invalid_event_count":0,"equivocation_count":0,"current":{"canonical_record":{"metadata":{"abstract_canon_sha256":"8dfd1ca1acf1d61e1b26adcb9bc4f84852bcfbebced2dc224ab91ec70d040fb3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2025-08-21T12:30:30Z","title_canon_sha256":"e0309463248c4b33dc5e30dc42b44758a0eef593f3d393d909aca9647c5e61ed"},"schema_version":"1.0","source":{"id":"2508.15503","kind":"arxiv","version":6}},"source_aliases":[{"alias_kind":"arxiv","alias_value":"2508.15503","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"arxiv_version","alias_value":"2508.15503v6","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2508.15503","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"pith_short_12","alias_value":"UVBEQKV3TMV4","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"pith_short_16","alias_value":"UVBEQKV3TMV4RHGB","created_at":"2026-05-25T02:01:06Z"},{"alias_kind":"pith_short_8","alias_value":"UVBEQKV3","created_at":"2026-05-25T02:01:06Z"}],"graph_snapshots":[{"event_id":"sha256:ae82c22092bfe37df3c0a5f671ce5e8d252ab1123af9960cb80a1fab8b5ccd67","target":"graph","created_at":"2026-05-25T02:01:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"graph_snapshot":{"author_claims":{"count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","strong_count":0},"builder_version":"pith-number-builder-2026-05-17-v1","claims":{"count":4,"items":[{"attestation":"unclaimed","claim_id":"C1","kind":"strongest_claim","source":"verdict.strongest_claim","status":"machine_extracted","text":"We address this challenge through a collaborative effort of 22 researchers, presenting a taxonomy of seven study types that organizes how LLMs are used in SE research, together with eight guidelines for designing and reporting such studies."},{"attestation":"unclaimed","claim_id":"C2","kind":"weakest_assumption","source":"verdict.weakest_assumption","status":"machine_extracted","text":"The guidelines assume that declaring usage, reporting versions and prompts, human validation, and open baselines will sufficiently mitigate threats from non-determinism and opacity, without the paper providing new empirical evidence that these practices improve reproducibility outcomes."},{"attestation":"unclaimed","claim_id":"C3","kind":"one_line_summary","source":"verdict.one_line_summary","status":"machine_extracted","text":"A group of 22 researchers proposes seven study types and eight guidelines for empirical software engineering studies involving LLMs to enhance reproducibility and replicability."},{"attestation":"unclaimed","claim_id":"C4","kind":"headline","source":"verdict.pith_extraction.headline","status":"machine_extracted","text":"Guidelines organize LLM use in software engineering studies into seven types and provide eight reporting rules to boost reproducibility."}],"snapshot_sha256":"82e2a417d9b59e55b7b996338458f121e52bd03a45ab59211393066883505920"},"formal_canon":{"evidence_count":1,"snapshot_sha256":"d7bf83a3d887ead10a6fa2da351533223e91333ebd9ea9eb41dc3048580fa39d"},"integrity":{"available":true,"clean":true,"detectors_run":[],"endpoint":"/pith/2508.15503/integrity.json","findings":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938","summary":{"advisory":0,"by_detector":{},"critical":0,"informational":0}},"paper":{"abstract_excerpt":"Large Language Models (LLMs) are widely used in software engineering (SE) research and practice, yet their non-determinism, opaque training data, and rapidly evolving models threaten the reproducibility and replicability of empirical studies. We address this challenge through a collaborative effort of 22 researchers, presenting a taxonomy of seven study types that organizes how LLMs are used in SE research, together with eight guidelines for designing and reporting such studies. Each guideline distinguishes requirements (must) from recommended practices (should) and is contextualized by the st","authors_text":"Brian Fitzgerald, Chetan Arora, Christoph Treude, Chunyang Chen, Cristina Martinez Montes, Daniel Russo, Davide Falessi, Davide Fucci, Fabio Calefato, Florian Angermeir, Junda He, Lukas B\\\"ohme, Lutz Prechelt, Marcos Kalinowski, Marvin Mu\\~noz Bar\\'on, Mircea Lungu, Neil Ernst, Paul Ralph, Rijnard van Tonder, Sebastian Baltes, Stefano Lambiase, Stefan Wagner","cross_cats":[],"headline":"Guidelines organize LLM use in software engineering studies into seven types and provide eight reporting rules to boost reproducibility.","license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2025-08-21T12:30:30Z","title":"Guidelines for Empirical Studies in Software Engineering involving Large Language Models"},"references":{"count":0,"internal_anchors":0,"resolved_work":0,"sample":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2508.15503","kind":"arxiv","version":6},"verdict":{"created_at":"2026-05-18T21:59:19.130773Z","id":"8feff219-60c3-4aa2-94f5-663d09b1c494","model_set":{"reader":"grok-4.3"},"one_line_summary":"A group of 22 researchers proposes seven study types and eight guidelines for empirical software engineering studies involving LLMs to enhance reproducibility and replicability.","pipeline_version":"pith-pipeline@v0.9.0","pith_extraction_headline":"Guidelines organize LLM use in software engineering studies into seven types and provide eight reporting rules to boost reproducibility.","strongest_claim":"We address this challenge through a collaborative effort of 22 researchers, presenting a taxonomy of seven study types that organizes how LLMs are used in SE research, together with eight guidelines for designing and reporting such studies.","weakest_assumption":"The guidelines assume that declaring usage, reporting versions and prompts, human validation, and open baselines will sufficiently mitigate threats from non-determinism and opacity, without the paper providing new empirical evidence that these practices improve reproducibility outcomes."}},"verdict_id":"8feff219-60c3-4aa2-94f5-663d09b1c494"}}],"author_attestations":[],"timestamp_anchors":[],"storage_attestations":[],"citation_signatures":[],"replication_records":[],"corrections":[],"mirror_hints":[],"record_created":{"event_id":"sha256:63da39ad6435d12da543e7e52f574a6714d545bf81aeed81e8ed54c568e2afc2","target":"record","created_at":"2026-05-25T02:01:06Z","signer":{"key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signer_id":"pith.science","signer_type":"pith_registry"},"payload":{"attestation_state":"computed","canonical_record":{"metadata":{"abstract_canon_sha256":"8dfd1ca1acf1d61e1b26adcb9bc4f84852bcfbebced2dc224ab91ec70d040fb3","cross_cats_sorted":[],"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.SE","submitted_at":"2025-08-21T12:30:30Z","title_canon_sha256":"e0309463248c4b33dc5e30dc42b44758a0eef593f3d393d909aca9647c5e61ed"},"schema_version":"1.0","source":{"id":"2508.15503","kind":"arxiv","version":6}},"canonical_sha256":"a542482abb9b2bc89cc1fe412e1296d2142692dcd147e588f2f07e6db6ae6680","receipt":{"algorithm":"ed25519","builder_version":"pith-number-builder-2026-05-17-v1","canonical_sha256":"a542482abb9b2bc89cc1fe412e1296d2142692dcd147e588f2f07e6db6ae6680","first_computed_at":"2026-05-25T02:01:06.475903Z","key_id":"pith-v1-2026-05","kind":"pith_receipt","last_reissued_at":"2026-05-25T02:01:06.475903Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","receipt_version":"0.3","signature_b64":"7dmoh3BEBh+Id5lvO4glXzcAYrj4NAPerKPKiJUmBT2HEpGeQ2vJhsTcT9MGWvlmghmAIVWHn/llROsFBuZqDg==","signature_status":"signed_v1","signed_at":"2026-05-25T02:01:06.476576Z","signed_message":"canonical_sha256_bytes"},"source_id":"2508.15503","source_kind":"arxiv","source_version":6}}},"equivocations":[],"invalid_events":[],"applied_event_ids":["sha256:63da39ad6435d12da543e7e52f574a6714d545bf81aeed81e8ed54c568e2afc2","sha256:ae82c22092bfe37df3c0a5f671ce5e8d252ab1123af9960cb80a1fab8b5ccd67"],"state_sha256":"798875f6be462693a5671d07805043a93b32be68f79a6acd5d078fa6bed20418"},"bundle_signature":{"signature_status":"signed_v1","algorithm":"ed25519","key_id":"pith-v1-2026-05","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54","signature_b64":"ha9QSL/Tw3U/+vFrgCeXPqPGqsdsgpYJdi2B7iROwrWuat4REuTPHy2sEFwthz4ev9A4OjNVUsgIZ1cndHYWCQ==","signed_message":"bundle_sha256_bytes","signed_at":"2026-05-30T21:37:22.521307Z","bundle_sha256":"8038b897b2b63b2ae9e2b28bf84488871ba7754773fd40a34a97a094ec435c99"}}