{"record_type":"pith_number_record","schema_url":"https://pith.science/schemas/pith-number/v1.json","pith_number":"pith:2026:ZUUGOSD574DL7T2VTGHFUMY7M2","short_pith_number":"pith:ZUUGOSD5","schema_version":"1.0","canonical_sha256":"cd2867487dff06bfcf55998e5a331f6687f1d76379553573b48e19d7598432a1","source":{"kind":"arxiv","id":"2603.26292","version":2},"attestation_state":"computed","paper":{"title":"findsylls: A Language-Agnostic Toolkit for Syllable-Level Speech Tokenization and Embedding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"H\\'ector Javier V\\'azquez Mart\\'inez","submitted_at":"2026-03-27T11:03:08Z","abstract_excerpt":"Syllable-level units offer compact and linguistically meaningful representations for spoken language modeling and unsupervised word discovery, but research on syllabification remains fragmented across disparate implementations, datasets, and evaluation protocols. We introduce findsylls, a modular, language-agnostic toolkit that unifies classical syllable detectors and end-to-end syllabifiers under a common interface for syllable segmentation, embedding extraction, and multi-granular evaluation. The toolkit implements and standardizes widely used methods (e.g., Sylber, VG-HuBERT) and allows the"},"verification_status":{"content_addressed":true,"pith_receipt":true,"author_attested":false,"weak_author_claims":0,"strong_author_claims":0,"externally_anchored":false,"storage_verified":false,"citation_signatures":0,"replication_records":0,"graph_snapshot":true,"references_resolved":false,"formal_links_present":false},"canonical_record":{"source":{"id":"2603.26292","kind":"arxiv","version":2},"metadata":{"license":"http://creativecommons.org/licenses/by/4.0/","primary_cat":"cs.CL","submitted_at":"2026-03-27T11:03:08Z","cross_cats_sorted":["cs.AI"],"title_canon_sha256":"a216f7ce6d6ee583188a5be3c1a95bddb62ff9bead6096507041d1b0c39edf11","abstract_canon_sha256":"f98fb03240e8b14644bdea0942c95e57041181f5df76025e3c66c02ef507808f"},"schema_version":"1.0"},"receipt":{"kind":"pith_receipt","key_id":"pith-v1-2026-05","algorithm":"ed25519","signed_at":"2026-06-19T16:09:57.431607Z","signature_b64":"8a7ZL+TeGoJv5VFHf12eUTP3yS/qZXWv/0FraK28+KcMnoTI8XR9LsBbSzlGxEt3DYZsf8KZvpxizEGIsTfNBQ==","signed_message":"canonical_sha256_bytes","builder_version":"pith-number-builder-2026-05-17-v1","receipt_version":"0.3","canonical_sha256":"cd2867487dff06bfcf55998e5a331f6687f1d76379553573b48e19d7598432a1","last_reissued_at":"2026-06-19T16:09:57.431176Z","signature_status":"signed_v1","first_computed_at":"2026-06-19T16:09:57.431176Z","public_key_fingerprint":"8d4b5ee74e4693bcd1df2446408b0d54"},"graph_snapshot":{"paper":{"title":"findsylls: A Language-Agnostic Toolkit for Syllable-Level Speech Tokenization and Embedding","license":"http://creativecommons.org/licenses/by/4.0/","headline":"","cross_cats":["cs.AI"],"primary_cat":"cs.CL","authors_text":"H\\'ector Javier V\\'azquez Mart\\'inez","submitted_at":"2026-03-27T11:03:08Z","abstract_excerpt":"Syllable-level units offer compact and linguistically meaningful representations for spoken language modeling and unsupervised word discovery, but research on syllabification remains fragmented across disparate implementations, datasets, and evaluation protocols. We introduce findsylls, a modular, language-agnostic toolkit that unifies classical syllable detectors and end-to-end syllabifiers under a common interface for syllable segmentation, embedding extraction, and multi-granular evaluation. The toolkit implements and standardizes widely used methods (e.g., Sylber, VG-HuBERT) and allows the"},"claims":{"count":0,"items":[],"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"source":{"id":"2603.26292","kind":"arxiv","version":2},"verdict":{"id":null,"model_set":{},"created_at":null,"strongest_claim":"","one_line_summary":"","pipeline_version":null,"weakest_assumption":"","pith_extraction_headline":""},"integrity":{"clean":true,"summary":{"advisory":0,"critical":0,"by_detector":{},"informational":0},"endpoint":"/pith/2603.26292/integrity.json","findings":[],"available":true,"detectors_run":[],"snapshot_sha256":"c28c3603d3b5d939e8dc4c7e95fa8dfce3d595e45f758748cecf8e644a296938"},"references":{"count":0,"sample":[],"resolved_work":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57","internal_anchors":0},"formal_canon":{"evidence_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"author_claims":{"count":0,"strong_count":0,"snapshot_sha256":"258153158e38e3291e3d48162225fcdb2d5a3ed65a07baac614ab91432fd4f57"},"builder_version":"pith-number-builder-2026-05-17-v1"},"aliases":[{"alias_kind":"arxiv","alias_value":"2603.26292","created_at":"2026-06-19T16:09:57.431233+00:00"},{"alias_kind":"arxiv_version","alias_value":"2603.26292v2","created_at":"2026-06-19T16:09:57.431233+00:00"},{"alias_kind":"doi","alias_value":"10.48550/arxiv.2603.26292","created_at":"2026-06-19T16:09:57.431233+00:00"},{"alias_kind":"pith_short_12","alias_value":"ZUUGOSD574DL","created_at":"2026-06-19T16:09:57.431233+00:00"},{"alias_kind":"pith_short_16","alias_value":"ZUUGOSD574DL7T2V","created_at":"2026-06-19T16:09:57.431233+00:00"},{"alias_kind":"pith_short_8","alias_value":"ZUUGOSD5","created_at":"2026-06-19T16:09:57.431233+00:00"}],"events":[],"event_summary":{},"paper_claims":[],"inbound_citations":{"count":0,"internal_anchor_count":0,"sample":[]},"formal_canon":{"evidence_count":0,"sample":[],"anchors":[]},"links":{"html":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2","json":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2.json","graph_json":"https://pith.science/api/pith-number/ZUUGOSD574DL7T2VTGHFUMY7M2/graph.json","events_json":"https://pith.science/api/pith-number/ZUUGOSD574DL7T2VTGHFUMY7M2/events.json","paper":"https://pith.science/paper/ZUUGOSD5"},"agent_actions":{"view_html":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2","download_json":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2.json","view_paper":"https://pith.science/paper/ZUUGOSD5","resolve_alias":"https://pith.science/api/pith-number/resolve?arxiv=2603.26292&json=true","fetch_graph":"https://pith.science/api/pith-number/ZUUGOSD574DL7T2VTGHFUMY7M2/graph.json","fetch_events":"https://pith.science/api/pith-number/ZUUGOSD574DL7T2VTGHFUMY7M2/events.json","actions":{"anchor_timestamp":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2/action/timestamp_anchor","attest_storage":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2/action/storage_attestation","attest_author":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2/action/author_attestation","sign_citation":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2/action/citation_signature","submit_replication":"https://pith.science/pith/ZUUGOSD574DL7T2VTGHFUMY7M2/action/replication_record"}},"created_at":"2026-06-19T16:09:57.431233+00:00","updated_at":"2026-06-19T16:09:57.431233+00:00"}