Tokenizer is now done, we also have the new InternalizeID and ExternalizeID

This commit is contained in:
2026-03-28 19:29:15 -04:00
parent 5aef356199
commit 88fa260c9d
10 changed files with 4883 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
#pragma once
#include "CoreMinimal.h"
#include "WingServer.h"
#include "WingHandler.h"
#include "WingTokenizer.h"
#include "Test_Sanitizer.generated.h"
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
UCLASS()
class UWing_Test_Sanitizer : public UObject, public IWingHandler
{
GENERATED_BODY()
public:
UPROPERTY(meta=(Description="The string to sanitize"))
FString Input;
virtual FString GetDescription() const override
{
return TEXT("Test the sanitizer by sanitizing a string and printing the result.");
}
virtual void Handle() override
{
UWingServer::Printf(TEXT("%s\n"), *WingTokenizer::ExternalizeID(Input));
}
};

View File

@@ -0,0 +1,33 @@
#pragma once
#include "CoreMinimal.h"
#include "WingServer.h"
#include "WingHandler.h"
#include "WingTokenizer.h"
#include "Test_Tokenizer.generated.h"
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
UCLASS()
class UWing_Test_Tokenizer : public UObject, public IWingHandler
{
GENERATED_BODY()
public:
UPROPERTY(meta=(Description="The string to tokenize"))
FString Input;
virtual FString GetDescription() const override
{
return TEXT("Test the tokenizer by tokenizing a string and printing the result.");
}
virtual void Handle() override
{
WingTokenizer T(Input);
T.PrintEverything();
}
};

View File

@@ -0,0 +1,41 @@
#pragma once
#include "CoreMinimal.h"
#include "WingServer.h"
#include "WingHandler.h"
#include "WingTokenizer.h"
#include "Test_Unsanitize.generated.h"
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
UCLASS()
class UWing_Test_Unsanitize : public UObject, public IWingHandler
{
GENERATED_BODY()
public:
UPROPERTY(meta=(Description="The sanitized identifier to unsanitize"))
FString Input;
virtual FString GetDescription() const override
{
return TEXT("Test the unsanitizer by unsanitizing a string and printing the result.");
}
virtual void Handle() override
{
FString Error;
FString Result = WingTokenizer::TryInternalizeID(Input, Error);
if (!Error.IsEmpty())
{
UWingServer::Printf(TEXT("Error: %s\n"), *Error);
}
if (!Result.IsEmpty())
{
UWingServer::Printf(TEXT("Result: %s\n"), *Result);
}
}
};

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,302 @@
#include "WingTokenizer.h"
#include "WingServer.h"
void WingCharacterClasses::Assign(Cat Category, FStringView String)
{
int32 OldSize = CharCategory.Num();
int32 NewSize = 0;
for (TCHAR Ch : String) if (Ch >= NewSize) NewSize = Ch + 1;
if (NewSize > OldSize)
{
CharCategory.SetNum(NewSize);
for (int i = OldSize; i < NewSize; i++) CharCategory[i] = Cat::Other;
}
for (TCHAR Ch : String) CharCategory[Ch] = Category;
}
WingCharacterClasses::WingCharacterClasses()
{
// This is the set of printable, visible, non-whitespace characters that
// appear in most ubuntu default fonts. I initially map all of these as
// 'Identifier' characters, but later I swap some of them over to punctuation.
Assign(Cat::Identifier, TEXT(
"!#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefgh"
"ijklmnopqrstuvwxyz{|}~¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐ"
"ÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖ"
"ėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜ"
"ŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢ"
"ƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƾƿǀǁǂǃǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǦǧǨǩǪǫǬǭǮǯǰǴǵǸǹǼ"
"ǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȷʒʼˆˇˉ˘˙˚˛˜˝΄΅ΆΈΉΊ"
"ΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώЀЁЂЃЄ"
"ЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъ"
"ыьэюяѐёђѓєѕіїјљњћќѝўџѲҐґҒғҔҕҖҗҘҙҚқҢңҤҥҪҫҬҭҮүҰұҲҳҺһӀӁӂӃӄӇӈӋӌӏӐӑӒӓӔӕӖӗӘә"
"ӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹẀẁẂẃẄẅỲỳ–—―‘’‚“”„†‡•…‰‹›⁰⁴⁵⁶⁷⁸⁹₀₁₂₃₄₅₆"
"₇₈₉€₹№™Ω⅓⅔⅕⅖⅗⅘⅙⅚⅛⅜⅝⅞∂∆∏∑−∕∙√∞∫≈≠≤≥◊fifl\"\\"
));
// All the punctuation marks that we do anything interesting with.
Assign(Cat::Punctuation, PunctuationString);
// Control characters.
for (int i = 0; i < 32; i++) CharCategory[i] = Cat::Control;
CharCategory[0x7F] = Cat::Control;
}
WingCharacterClasses WingCharacterClasses::TheSet;
void WingTokenizer::Add(TCHAR Type, FString InternalID)
{
Token T;
T.Type = Type;
T.InternalID = MoveTemp(InternalID);
Tokens.Add(T);
}
TCHAR WingTokenizer::FromHex(FStringView Digits, FString &Error)
{
if (Digits.Len() < 1) { Error = "Empty hex escape sequence"; return 0; }
int32 Value = 0;
for (TCHAR Ch : Digits)
{
if (Ch >= '0' && Ch <= '9') Value = Value * 16 + (Ch - '0');
else if (Ch >= 'a' && Ch <= 'f') Value = Value * 16 + (Ch - 'a' + 10);
else if (Ch >= 'A' && Ch <= 'F') Value = Value * 16 + (Ch - 'A' + 10);
else { Error = "Invalid hex digit in escape sequence"; return 0; }
if (Value > 0xFFFF) { Error = "Escape sequence value out of range"; return 0; }
}
return (TCHAR)Value;
}
TCHAR WingTokenizer::FromDecimal(FStringView Digits, FString &Error)
{
if (Digits.Len() < 1) { Error = "Empty decimal escape sequence"; return 0; }
int32 Value = 0;
for (TCHAR Ch : Digits)
{
if (Ch >= '0' && Ch <= '9') Value = Value * 10 + (Ch - '0');
else { Error = "Invalid decimal digit in escape sequence"; return 0; }
if (Value > 0xFFFF) { Error = "Escape sequence value out of range"; return 0; }
}
return (TCHAR)Value;
}
TCHAR WingTokenizer::TokenizeEscapeSequence(FStringView &Rest, FString &Error)
{
if (!Error.IsEmpty()) return 0;
// Search for the semicolon.
int32 SemiPos;
if (!Rest.FindChar(';', SemiPos))
{
Error = "Ampersand escape sequence doesn't end in semicolon";
return 0;
}
if (SemiPos < 3)
{
Error = "Ampersand escape sequence too short";
return 0;
}
TCHAR Result = 0;
if (Rest[1] == '#')
{
if ((Rest[2] == 'x') || (Rest[2] == 'X'))
Result = FromHex(Rest.Mid(3, SemiPos - 3), Error);
else
Result = FromDecimal(Rest.Mid(2, SemiPos - 2), Error);
}
else
{
FString Name(Rest.Mid(1, SemiPos - 1));
Result = WingEntityList::GetChar(Name);
if (Result == 0)
{
Error = FString::Printf(TEXT("Unknown HTML entity: &%s;"), *Name);
return 0;
}
}
Rest = Rest.RightChop(SemiPos + 1);
return Result;
}
FString WingTokenizer::TokenizeIdentifier(FStringView &Rest, FString &Error)
{
if (!Error.IsEmpty()) return FString();
TStringBuilder<512> Decoded;
while (!Rest.IsEmpty() && Error.IsEmpty())
{
TCHAR Ch = Rest[0];
if (Ch == ' ') break;
if (Ch == '.')
{
Decoded.AppendChar(' ');
Rest = Rest.RightChop(1);
continue;
}
if (Ch == '&')
{
Decoded.AppendChar(TokenizeEscapeSequence(Rest, Error));
continue;
}
Cat Category = WingCharacterClasses::GetCat(Ch);
if ((Category == Cat::Identifier) || (Category == Cat::Other))
{
// We accept other characters in case the LLM sends unicode
// that isn't on the whitelist. This is intentional.
Decoded.AppendChar(Ch);
Rest = Rest.RightChop(1);
}
else break;
}
if (!Error.IsEmpty()) return FString();
// We deliberately do not produce an error message for empty identifiers,
// because we can't generate a good message here. We leave it to others
// to deal with that case.
return Decoded.ToString();
}
WingTokenizer::WingTokenizer(const FString& Input)
{
FStringView Rest(Input);
while (!Rest.IsEmpty() && Error.IsEmpty())
{
TCHAR Ch = Rest[0];
if ((Ch == ' ') || (Ch == '\t'))
{
Rest = Rest.RightChop(1);
continue;
}
if (Ch == '=')
{
Add(RestOfLine, FString(Rest.RightChop(1)));
break;
}
if ((Ch == '.') || (Ch == '&'))
{
Add(Identifier, TokenizeIdentifier(Rest, Error));
continue;
}
Cat Category = WingCharacterClasses::GetCat(Ch);
if (Category == Cat::Punctuation)
{
Add(Ch, FString());
Rest = Rest.RightChop(1);
continue;
}
if (Category == Cat::Control)
{
Error = "Control characters in input, not allowed";
break;
}
Add(Identifier, TokenizeIdentifier(Rest, Error));
continue;
}
if (!Error.IsEmpty()) Tokens.Empty();
}
void WingTokenizer::PrintEverything() const
{
if (!Error.IsEmpty())
{
UWingServer::Printf(TEXT("Error: %s\n"), *Error);
}
for (const Token& T : Tokens)
{
TStringBuilder<512> ExtraStr;
for (TCHAR Ch : T.InternalID)
{
if (Ch >= 0x20 && Ch <= 0x7E)
{
ExtraStr.AppendChar(Ch);
ExtraStr.AppendChar(' ');
}
else
{
ExtraStr.Appendf(TEXT("%04X "), (int32)Ch);
}
}
if (T.Type >= 0x20 && T.Type <= 0x7E)
UWingServer::Printf(TEXT("Token '%c': %s\n"), T.Type, *ExtraStr);
else
UWingServer::Printf(TEXT("Token %04X: %s\n"), (int32)T.Type, *ExtraStr);
}
}
FString WingTokenizer::ExternalizeID(const FString &S)
{
TStringBuilder<512> Result;
for (TCHAR Ch : S)
{
if (Ch == ' ') Result.AppendChar('.');
else if (WingCharacterClasses::GetCat(Ch) == Cat::Identifier) Result.AppendChar(Ch);
else
{
Result.AppendChar('&');
FStringView Name = WingEntityList::GetName(Ch);
if (Name.IsEmpty())
{
Result.AppendChar('#');
Result.Appendf(TEXT("%d"), (int32)Ch);
}
else
{
Result.Append(Name);
}
Result.AppendChar(';');
}
}
return Result.ToString();
}
FString WingTokenizer::TryInternalizeID(const FString &S, FString &Error)
{
FStringView Input(S);
Error.Empty();
FString Result = TokenizeIdentifier(Input, Error);
// If there's already an error, annotate with context
if (!Error.IsEmpty())
{
Error = FString::Printf(TEXT("ERROR parsing id %s: %s"), *S, *Error);
return FString();
}
// If the identifier tokenizer stops before consuming the whole
// input, then we need to generate an error message. We do our best
// to generate the most informative error possible.
if (!Input.IsEmpty())
{
Cat Category = WingCharacterClasses::GetCat(Input[0]);
if (Input[0] == ' ')
{
Error = FString::Printf(TEXT("ERROR parsing id %s: in ids, spaces must be escaped"), *S);
}
else if (Category == Cat::Punctuation)
{
Error = FString::Printf(TEXT("ERROR parsing id %s: in ids, these marks must be escaped: %s"),
*S, WingCharacterClasses::PunctuationString);
}
else if (Category == Cat::Control)
{
Error = FString::Printf(TEXT("ERROR parsing id %s: in ids, control characters must be escaped"), *S);
}
else Error = FString::Printf(TEXT("ERROR parsing id %s: unparseable character in id"), *S);
return FString();
}
// One last error case: empty input
if (Result.IsEmpty())
{
Error = TEXT("ERROR: Empty identifiers are not allowed");
return FString();
}
return Result;
}
FString WingTokenizer::CheckInternalizeID(const FString &S)
{
FString Error;
FString Result = TryInternalizeID(S, Error);
if (!Error.IsEmpty())
{
UWingServer::Printf(TEXT("%s\n"), *Error);
UWingServer::SuggestManual(WingManual::Section::IdentifierSanitization);
}
return Result;
}

View File

@@ -65,6 +65,9 @@ FString WingUtils::SanitizeName(const FString &InName)
if (c == ' ') c=L'·'; if (c == ' ') c=L'·';
if (c == '<') c=L''; if (c == '<') c=L'';
if (c == '>') c=L''; if (c == '>') c=L'';
if (c == '(') c=L'';
if (c == ')') c=L'';
if (c == '=') c=L'';
if (c == ',') c=L''; if (c == ',') c=L'';
Name[Dst++] = c; Name[Dst++] = c;
} }
@@ -84,6 +87,9 @@ FString WingUtils::UnsanitizeName(const FString &InName)
if (c == L'·') c=' '; if (c == L'·') c=' ';
if (c == L'') c='<'; if (c == L'') c='<';
if (c == L'') c='>'; if (c == L'') c='>';
if (c == L'') c='(';
if (c == L'') c=')';
if (c == L'') c='=';
if (c == L'') c=','; if (c == L'') c=',';
Name[Dst++] = c; Name[Dst++] = c;
} }

View File

@@ -0,0 +1,169 @@
#pragma once
#include "CoreMinimal.h"
// -----------------------------------------------------------------
//
// WingCharacterClasses
//
// We recognize these disjoint classes of characters:
//
// Punctuation. A small hardwired list of punctuation marks
// that we want to escape, specifically \"'(),.:;<=>&
// These particular punctuation marks were chosen because they
// either need to be escaped for json's sake, or for our
// parser's sake. Any other punctuation is just classified as
// an identifier character.
//
// Identifier characters. A whitelist of about a thousand
// ascii and unicode characters that can be used directly in
// identifiers without any kind of escaping. To get on the list,
// you need the following: to not be an ascii punctuation mark,
// to be printable and visible and not whitespace, and to be
// easily rendered by all of the default ubuntu fonts.
//
// Control Characters. Ascii control characters, including DEL.
//
// Other Characters. Anything else.
//
// -----------------------------------------------------------------
struct WingCharacterClasses
{
enum class Cat : uint8
{
Identifier,
Punctuation,
Control,
Other,
};
static Cat GetCat(TCHAR Ch)
{
int32 Cp = (int32)Ch;
if (Cp < 0 || Cp >= TheSet.CharCategory.Num()) return Cat::Other;
return TheSet.CharCategory[Cp];
}
static constexpr const TCHAR *PunctuationString = TEXT("\\\"'(),.:;<=>&");
private:
TArray<Cat> CharCategory;
WingCharacterClasses();
void Assign(Cat Category, FStringView String);
static WingCharacterClasses TheSet;
};
// -----------------------------------------------------------------
//
// The HTML Entity List.
//
// When escaping identifiers, we use HTML escapes like &lt;
// These work well because they have no conflict with the json
// parser (MCP protocol is json), they are also easy to deal
// with in the tokenizer, and the LLM is already familiar with
// that kind of escaping. The names stored in this table do not
// include the ampersand or the semicolon.
//
// This class doesn't handle hex character codes, this is just
// a lookup table from character to name and back.
//
// -----------------------------------------------------------------
struct WingEntityList
{
static TCHAR GetChar(const FString &Name)
{
TCHAR *Result = TheList.NameToChar.Find(Name);
if (Result == nullptr) return 0;
return *Result;
}
static FStringView GetName(TCHAR Ch)
{
FString *Result = TheList.CharToName.Find(Ch);
if (Result == nullptr) return FStringView();
return *Result;
}
private:
TMap<TCHAR, FString> CharToName;
TMap<FString, TCHAR> NameToChar;
struct Raw { const char *Name; TCHAR Codepoint; };
WingEntityList(std::initializer_list<Raw> Data);
static WingEntityList TheList;
};
struct WingTokenizer
{
using Cat = WingCharacterClasses::Cat;
const TCHAR Identifier = 'i';
const TCHAR RestOfLine = 'r';
// A token has a token type which can be Identifier,
// RestOfLine, or a single-character punctuation mark.
// The InternalID field contains the result of converting
// the token from an external ID to an internal ID.
struct Token
{
TCHAR Type;
FString InternalID;
};
// The string that we tokenized.
FString Input;
// If the tokenization failed, an error message.
FString Error;
// The result, an array of tokens.
TArray<Token> Tokens;
// Tokenize a line of input. The tokens are stored in
// the token array. If there's an error, the error is
// stored in the error field, and the token array is
// cleared. If the tokens contain identifiers,
WingTokenizer(const FString& Input);
// Convert an internal ID into an external ID.
// Spaces are converted to periods. Any other
// non-identifier character is HTML escaped.
static FString ExternalizeID(const FString &S);
// Convert an external ID into an internal ID.
// Periods are converted back to spaces. HTML escapes
// are converted back to raw characters. This could
// fail, for example, if the external name contains an
// invalid HTML escape. If it does, returns empty
// string and sets the error message.
static FString TryInternalizeID(const FString &S, FString &Error);
// Calls TryInternalizeName. If this generates an
// error, prints the error message, suggests the manual
// entry on identifier sanitization, and returns empty
// string.
static FString CheckInternalizeID(const FString &S);
// Print all tokens to the log for debugging.
void PrintEverything() const;
private:
// Add a token to the token array.
void Add(TCHAR Type, FString InternalID);
// Convert numbers to TCHAR. If there's an error, set the error
// message and return zero.
static TCHAR FromHex(FStringView Digits, FString &Error);
static TCHAR FromDecimal(FStringView Digits, FString &Error);
// Tokenize an escape sequence. Attempts to consume a valid escape
// sequence from rest, and return the character indicated. On error,
// sets the error message and returns zero.
static TCHAR TokenizeEscapeSequence(FStringView &Rest, FString &Error);
// Tokenize an identifier. Attempts to consume a valid identifier
// from rest, and return the identifier. On error, sets the error
// message and returns empty string.
static FString TokenizeIdentifier(FStringView &Rest, FString &Error);
};

2233
entities.json Normal file

File diff suppressed because it is too large Load Diff

96
tools/font-glyphs.py Executable file
View File

@@ -0,0 +1,96 @@
#!/usr/bin/env python3
"""
Report which Unicode code points have vector outlines in ALL of the given font files.
Usage: python3 font-glyphs.py font1.ttf font2.ttf ...
"""
import sys
import unicodedata
from fontTools.ttLib import TTFont
from fontTools.pens.statisticsPen import StatisticsPen
def get_vector_codepoints(path):
"""Return the set of code points that have actual vector outlines in the font."""
font = TTFont(path)
cmap = font.getBestCmap()
if cmap is None:
print(f"WARNING: {path} has no cmap table", file=sys.stderr)
return set()
glyf = font.get("glyf") # TrueType outlines
cff = font.get("CFF ") # CFF outlines
result = set()
for codepoint, glyph_name in cmap.items():
has_outline = False
if glyf is not None:
g = glyf.get(glyph_name)
if g is not None and g.numberOfContours != 0:
has_outline = True
if cff is not None:
# CFF fonts store outlines in charstrings.
try:
cs = cff.cff.topDictIndex[0].CharStrings[glyph_name]
pen = StatisticsPen(glyphset=font.getGlyphSet())
cs.draw(pen)
if pen.area != 0:
has_outline = True
except (KeyError, AttributeError):
pass
if has_outline:
result.add(codepoint)
font.close()
return result
def main():
if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} font1.ttf [font2.ttf ...]", file=sys.stderr)
sys.exit(1)
paths = sys.argv[1:]
# Process each font and intersect.
common = None
for path in paths:
cps = get_vector_codepoints(path)
print(f"{len(cps):6d} glyphs {path}")
if common is None:
common = cps
else:
common &= cps
if len(paths) > 1:
print(f"{len(common):6d} glyphs common to all {len(paths)} fonts", file=sys.stderr)
# Build the character string, excluding quote and backslash.
chars = []
for cp in sorted(common):
if cp == ord('"') or cp == ord('\\'):
continue
chars.append(chr(cp))
# Emit C++ file.
print("// Auto-generated by tools/font-glyphs.py — do not edit by hand.")
print(f"// {len(chars)} characters common to all {len(paths)} font(s).")
print()
print("const TCHAR *CommonChars = TEXT(")
# Break into lines of ~70 chars for readability.
line = ""
for ch in chars:
line += ch
if len(line) >= 70:
print(f'\t"{line}"')
line = ""
if line:
print(f'\t"{line}"')
print(");")
if __name__ == "__main__":
main()

54
tools/gen-entities.py Normal file
View File

@@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""Read entities.json (WHATWG HTML named character references) and generate
WingEntities.cpp with a lookup table of { "name", codepoint } rows.
Rules:
- Only entries whose key ends with ';' (skip legacy semicolon-less forms).
- Only entries with exactly one codepoint.
- Codepoint must be <= 0xFFFF (Unreal uses 16-bit TCHAR).
"""
import json, os
script_dir = os.path.dirname(os.path.abspath(__file__))
project_dir = os.path.dirname(script_dir)
input_path = os.path.join(project_dir, "entities.json")
output_path = os.path.join(project_dir,
"Plugins", "UEWingman", "Source", "UEWingman", "Private", "WingEntities.cpp")
with open(input_path) as f:
data = json.load(f)
rows = []
for key, val in sorted(data.items()):
if not key.endswith(";"):
continue
cps = val["codepoints"]
if len(cps) != 1:
continue
cp = cps[0]
if cp > 0xFFFF:
continue
# Strip leading '&' and trailing ';'
name = key[1:-1]
rows.append((name, cp))
with open(output_path, "w") as f:
f.write("// Auto-generated by tools/gen-entities.py — do not edit by hand.\n")
f.write("// Source: WHATWG HTML named character references (entities.json)\n\n")
f.write('#include "WingTokenizer.h"\n\n\n')
f.write("WingEntityList::WingEntityList(std::initializer_list<Raw> Data)\n")
f.write("{\n")
f.write("\tfor (const Raw& Entry : Data)\n")
f.write("\t{\n")
f.write('\t\tFString XName((const ANSICHAR*)Entry.Name);\n')
f.write("\t\tCharToName.Add(Entry.Codepoint, XName);\n")
f.write("\t\tNameToChar.Add(XName, Entry.Codepoint);\n")
f.write("\t}\n")
f.write("}\n\n")
f.write("WingEntityList WingEntityList::TheList({\n")
for name, cp in rows:
f.write(f'\t{{ "{name}", {cp} }},\n')
f.write("});\n")
print(f"Generated {len(rows)} entities -> {output_path}")