Tokenizer is now done, we also have the new InternalizeID and ExternalizeID
This commit is contained in:
96
tools/font-glyphs.py
Executable file
96
tools/font-glyphs.py
Executable file
@@ -0,0 +1,96 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Report which Unicode code points have vector outlines in ALL of the given font files.
|
||||
|
||||
Usage: python3 font-glyphs.py font1.ttf font2.ttf ...
|
||||
"""
|
||||
|
||||
import sys
|
||||
import unicodedata
|
||||
from fontTools.ttLib import TTFont
|
||||
from fontTools.pens.statisticsPen import StatisticsPen
|
||||
|
||||
|
||||
def get_vector_codepoints(path):
|
||||
"""Return the set of code points that have actual vector outlines in the font."""
|
||||
font = TTFont(path)
|
||||
cmap = font.getBestCmap()
|
||||
if cmap is None:
|
||||
print(f"WARNING: {path} has no cmap table", file=sys.stderr)
|
||||
return set()
|
||||
|
||||
glyf = font.get("glyf") # TrueType outlines
|
||||
cff = font.get("CFF ") # CFF outlines
|
||||
|
||||
result = set()
|
||||
for codepoint, glyph_name in cmap.items():
|
||||
has_outline = False
|
||||
if glyf is not None:
|
||||
g = glyf.get(glyph_name)
|
||||
if g is not None and g.numberOfContours != 0:
|
||||
has_outline = True
|
||||
if cff is not None:
|
||||
# CFF fonts store outlines in charstrings.
|
||||
try:
|
||||
cs = cff.cff.topDictIndex[0].CharStrings[glyph_name]
|
||||
pen = StatisticsPen(glyphset=font.getGlyphSet())
|
||||
cs.draw(pen)
|
||||
if pen.area != 0:
|
||||
has_outline = True
|
||||
except (KeyError, AttributeError):
|
||||
pass
|
||||
if has_outline:
|
||||
result.add(codepoint)
|
||||
|
||||
font.close()
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print(f"Usage: {sys.argv[0]} font1.ttf [font2.ttf ...]", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
paths = sys.argv[1:]
|
||||
|
||||
# Process each font and intersect.
|
||||
common = None
|
||||
for path in paths:
|
||||
cps = get_vector_codepoints(path)
|
||||
print(f"{len(cps):6d} glyphs {path}")
|
||||
if common is None:
|
||||
common = cps
|
||||
else:
|
||||
common &= cps
|
||||
|
||||
if len(paths) > 1:
|
||||
print(f"{len(common):6d} glyphs common to all {len(paths)} fonts", file=sys.stderr)
|
||||
|
||||
# Build the character string, excluding quote and backslash.
|
||||
chars = []
|
||||
for cp in sorted(common):
|
||||
if cp == ord('"') or cp == ord('\\'):
|
||||
continue
|
||||
chars.append(chr(cp))
|
||||
|
||||
# Emit C++ file.
|
||||
print("// Auto-generated by tools/font-glyphs.py — do not edit by hand.")
|
||||
print(f"// {len(chars)} characters common to all {len(paths)} font(s).")
|
||||
print()
|
||||
print("const TCHAR *CommonChars = TEXT(")
|
||||
|
||||
# Break into lines of ~70 chars for readability.
|
||||
line = ""
|
||||
for ch in chars:
|
||||
line += ch
|
||||
if len(line) >= 70:
|
||||
print(f'\t"{line}"')
|
||||
line = ""
|
||||
if line:
|
||||
print(f'\t"{line}"')
|
||||
|
||||
print(");")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
54
tools/gen-entities.py
Normal file
54
tools/gen-entities.py
Normal file
@@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Read entities.json (WHATWG HTML named character references) and generate
|
||||
WingEntities.cpp with a lookup table of { "name", codepoint } rows.
|
||||
|
||||
Rules:
|
||||
- Only entries whose key ends with ';' (skip legacy semicolon-less forms).
|
||||
- Only entries with exactly one codepoint.
|
||||
- Codepoint must be <= 0xFFFF (Unreal uses 16-bit TCHAR).
|
||||
"""
|
||||
|
||||
import json, os
|
||||
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_dir = os.path.dirname(script_dir)
|
||||
input_path = os.path.join(project_dir, "entities.json")
|
||||
output_path = os.path.join(project_dir,
|
||||
"Plugins", "UEWingman", "Source", "UEWingman", "Private", "WingEntities.cpp")
|
||||
|
||||
with open(input_path) as f:
|
||||
data = json.load(f)
|
||||
|
||||
rows = []
|
||||
for key, val in sorted(data.items()):
|
||||
if not key.endswith(";"):
|
||||
continue
|
||||
cps = val["codepoints"]
|
||||
if len(cps) != 1:
|
||||
continue
|
||||
cp = cps[0]
|
||||
if cp > 0xFFFF:
|
||||
continue
|
||||
# Strip leading '&' and trailing ';'
|
||||
name = key[1:-1]
|
||||
rows.append((name, cp))
|
||||
|
||||
with open(output_path, "w") as f:
|
||||
f.write("// Auto-generated by tools/gen-entities.py — do not edit by hand.\n")
|
||||
f.write("// Source: WHATWG HTML named character references (entities.json)\n\n")
|
||||
f.write('#include "WingTokenizer.h"\n\n\n')
|
||||
f.write("WingEntityList::WingEntityList(std::initializer_list<Raw> Data)\n")
|
||||
f.write("{\n")
|
||||
f.write("\tfor (const Raw& Entry : Data)\n")
|
||||
f.write("\t{\n")
|
||||
f.write('\t\tFString XName((const ANSICHAR*)Entry.Name);\n')
|
||||
f.write("\t\tCharToName.Add(Entry.Codepoint, XName);\n")
|
||||
f.write("\t\tNameToChar.Add(XName, Entry.Codepoint);\n")
|
||||
f.write("\t}\n")
|
||||
f.write("}\n\n")
|
||||
f.write("WingEntityList WingEntityList::TheList({\n")
|
||||
for name, cp in rows:
|
||||
f.write(f'\t{{ "{name}", {cp} }},\n')
|
||||
f.write("});\n")
|
||||
|
||||
print(f"Generated {len(rows)} entities -> {output_path}")
|
||||
Reference in New Issue
Block a user