mirror of
https://github.com/daveallie/crosspoint-reader.git
synced 2026-02-06 23:57:39 +03:00
script to embed hypher-generated .bin tries into constexpr headers
This commit is contained in:
parent
163fae57ec
commit
fc8bcbce4f
82
scripts/generate_hyphenation_trie.py
Executable file
82
scripts/generate_hyphenation_trie.py
Executable file
@ -0,0 +1,82 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Embed hypher-generated `.bin` tries into constexpr headers."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
|
||||||
|
def _format_bytes(blob: bytes, per_line: int = 16) -> str:
|
||||||
|
# Render the blob as a comma separated list of hex literals with consistent wrapping.
|
||||||
|
lines = []
|
||||||
|
for i in range(0, len(blob), per_line):
|
||||||
|
chunk = ', '.join(f"0x{b:02X}" for b in blob[i : i + per_line])
|
||||||
|
lines.append(f" {chunk},")
|
||||||
|
if not lines:
|
||||||
|
lines.append(" 0x00,")
|
||||||
|
return '\n'.join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
def _symbol_from_output(path: pathlib.Path) -> str:
|
||||||
|
# Derive a stable C identifier from the destination header name (e.g., hyph-en.trie.h -> en).
|
||||||
|
name = path.name
|
||||||
|
if name.endswith('.trie.h'):
|
||||||
|
name = name[:-7]
|
||||||
|
if name.startswith('hyph-'):
|
||||||
|
name = name[5:]
|
||||||
|
name = name.replace('-', '_')
|
||||||
|
if name.endswith('.trie'):
|
||||||
|
name = name[:-5]
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def write_header(path: pathlib.Path, blob: bytes, symbol: str) -> None:
|
||||||
|
# Emit a constexpr header containing the raw bytes plus a SerializedHyphenationPatterns descriptor.
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
data_symbol = f"{symbol}_trie_data"
|
||||||
|
patterns_symbol = f"{symbol}_patterns"
|
||||||
|
bytes_literal = _format_bytes(blob)
|
||||||
|
content = f"""#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
#include "../SerializedHyphenationTrie.h"
|
||||||
|
|
||||||
|
// Auto-generated by generate_hyphenation_trie.py. Do not edit manually.
|
||||||
|
alignas(4) constexpr uint8_t {data_symbol}[] = {{
|
||||||
|
{bytes_literal}
|
||||||
|
}};
|
||||||
|
|
||||||
|
constexpr SerializedHyphenationPatterns {patterns_symbol} = {{
|
||||||
|
{data_symbol},
|
||||||
|
sizeof({data_symbol}),
|
||||||
|
}};
|
||||||
|
"""
|
||||||
|
path.write_text(content)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('--input', dest='inputs', action='append', required=True,
|
||||||
|
help='Path to a hypher-generated .bin trie')
|
||||||
|
parser.add_argument('--output', dest='outputs', action='append', required=True,
|
||||||
|
help='Destination header path (hyph-*.trie.h)')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if len(args.inputs) != len(args.outputs):
|
||||||
|
raise SystemExit('input/output counts must match')
|
||||||
|
|
||||||
|
for src, dst in zip(args.inputs, args.outputs):
|
||||||
|
# Process each input/output pair independently so mixed-language refreshes work in one invocation.
|
||||||
|
src_path = pathlib.Path(src)
|
||||||
|
blob = src_path.read_bytes()
|
||||||
|
out_path = pathlib.Path(dst)
|
||||||
|
symbol = _symbol_from_output(out_path)
|
||||||
|
write_header(out_path, blob, symbol)
|
||||||
|
print(f'wrote {dst} ({len(blob)} bytes payload)')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Loading…
Reference in New Issue
Block a user