diff --git a/Docs/A-Summary-of-our-Lua-Patches.md b/Docs/A-Summary-of-our-Lua-Patches.md index 6296e093..a9854fbe 100644 --- a/Docs/A-Summary-of-our-Lua-Patches.md +++ b/Docs/A-Summary-of-our-Lua-Patches.md @@ -248,3 +248,18 @@ GC Finalizers and weak tables both introduce nondeterminism into Lua execution. Update 1: I'm using GC finalizers in some cases to clean up userdata objects. I think it's safe as long as the only thing the finalizer does is free memory. (NOTE: WHERE?) Update 2: I don't remember using userdata objects at all. I am not sure that Update 1 is the truth any more. + +## Token Literal Syntax Patch + +Tokens are lightuserdata values encoding short alphanumeric strings as base37 numbers (see `Tokens-A-New-Lua-Type.md`). Previously, tokens could only be created in C++ and inserted into the Lua environment via `LuaTokenConstant`. This patch adds a literal syntax to the Lua parser so that tokens can be written directly in Lua source code using the `@` prefix: + +```lua +local x = @null +local y = @found +``` + +The lexer (llex.c) recognizes `@` followed by one or more alphanumeric characters (a-z, 0-9, case insensitive, max 12 characters). It encodes the string as a base37 number using the same encoding as `LuaToken::parse()` in luastack.hpp and produces a `TK_TOKEN` token. The parser (lparser.c) handles `TK_TOKEN` in `simpleexp()` by storing it as a lightuserdata constant in the function's constant table via `luaK_lightuserdataK()` in lcode.c. + +Underscores are not valid in token literals. Writing `@foo_bar` produces a lexer error rather than silently splitting into token `@foo` and identifier `_bar`. + +This patch is live and functioning. diff --git a/luprex/cpp/core/luastack.hpp b/luprex/cpp/core/luastack.hpp index 2d7dae15..ab86b67c 100644 --- a/luprex/cpp/core/luastack.hpp +++ b/luprex/cpp/core/luastack.hpp @@ -413,6 +413,10 @@ private: // and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0. // This fixed-width encoding ensures that numeric ordering matches // lexicographic ordering of the original strings. + // + // WARNING: The Lua lexer in llex.c contains a duplicate of this + // encoding logic (in the '@' token literal case). If you change + // the encoding here, you must update llex.c to match. // Returns zero if the string is empty, too long, or contains // invalid characters. // diff --git a/luprex/cpp/core/pprint.cpp b/luprex/cpp/core/pprint.cpp index 792ae356..09cba149 100644 --- a/luprex/cpp/core/pprint.cpp +++ b/luprex/cpp/core/pprint.cpp @@ -64,7 +64,7 @@ public: } case LUA_TLIGHTUSERDATA: { LuaToken token = LS_.cktoken(val); - (*output_) << "[" << token.str() << "]"; + (*output_) << "@" << token.str(); return; } case LUA_TT_GENERAL: { diff --git a/luprex/ext/eris-master/src/lcode.c b/luprex/ext/eris-master/src/lcode.c index d39de516..e63a5556 100644 --- a/luprex/ext/eris-master/src/lcode.c +++ b/luprex/ext/eris-master/src/lcode.c @@ -342,6 +342,13 @@ int luaK_numberK (FuncState *fs, lua_Number r) { } +int luaK_lightuserdataK (FuncState *fs, void *p) { + TValue o; + setpvalue(&o, p); + return addk(fs, &o, &o); +} + + static int boolK (FuncState *fs, int b) { TValue o; setbvalue(&o, b); diff --git a/luprex/ext/eris-master/src/lcode.h b/luprex/ext/eris-master/src/lcode.h index 6a1424cf..9f051157 100644 --- a/luprex/ext/eris-master/src/lcode.h +++ b/luprex/ext/eris-master/src/lcode.h @@ -53,6 +53,7 @@ LUAI_FUNC void luaK_reserveregs (FuncState *fs, int n); LUAI_FUNC void luaK_checkstack (FuncState *fs, int n); LUAI_FUNC int luaK_stringK (FuncState *fs, TString *s); LUAI_FUNC int luaK_numberK (FuncState *fs, lua_Number r); +LUAI_FUNC int luaK_lightuserdataK (FuncState *fs, void *p); LUAI_FUNC void luaK_dischargevars (FuncState *fs, expdesc *e); LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e); diff --git a/luprex/ext/eris-master/src/llex.c b/luprex/ext/eris-master/src/llex.c index ee6d8030..28cef65f 100644 --- a/luprex/ext/eris-master/src/llex.c +++ b/luprex/ext/eris-master/src/llex.c @@ -39,7 +39,7 @@ static const char *const luaX_tokens [] = { "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", "until", "while", "..", "...", "==", ">=", "<=", "~=", "::", "", - "", "", "" + "", "", "", "" }; @@ -93,6 +93,7 @@ static const char *txtToken (LexState *ls, int token) { case TK_NAME: case TK_STRING: case TK_NUMBER: + case TK_TOKEN: save(ls, '\0'); return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff)); default: @@ -485,6 +486,28 @@ static int llex (LexState *ls, SemInfo *seminfo) { case EOZ: { return TK_EOS; } + case '@': { /* token literal */ + size_t tokval = 0; + int toklen = 0; + save_and_next(ls); + while (1) { + char c = (char)ls->current; + size_t digit; + if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 1; + else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 11; + else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 11; + else break; + tokval = tokval * 37 + digit; + toklen++; + save_and_next(ls); + } + if (toklen == 0 || toklen > 12 || ls->current == '_') + lexerror(ls, "invalid token literal", TK_TOKEN); + /* Pad to fixed width of 12 digits. */ + for (int i = toklen; i < 12; i++) tokval *= 37; + seminfo->p = (void *)tokval; + return TK_TOKEN; + } default: { if (lislalpha(ls->current)) { /* identifier or reserved word? */ TString *ts; diff --git a/luprex/ext/eris-master/src/llex.h b/luprex/ext/eris-master/src/llex.h index a4acdd30..1b8cfc1d 100644 --- a/luprex/ext/eris-master/src/llex.h +++ b/luprex/ext/eris-master/src/llex.h @@ -27,7 +27,7 @@ enum RESERVED { TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, /* other terminal symbols */ TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_DBCOLON, TK_EOS, - TK_NUMBER, TK_NAME, TK_STRING + TK_NUMBER, TK_NAME, TK_STRING, TK_TOKEN }; /* number of reserved words */ @@ -37,6 +37,7 @@ enum RESERVED { typedef union { lua_Number r; TString *ts; + void *p; /* for token (lightuserdata) literals */ } SemInfo; /* semantics information */ diff --git a/luprex/ext/eris-master/src/lparser.c b/luprex/ext/eris-master/src/lparser.c index 9e1a9ca2..e32872f2 100644 --- a/luprex/ext/eris-master/src/lparser.c +++ b/luprex/ext/eris-master/src/lparser.c @@ -947,6 +947,10 @@ static void simpleexp (LexState *ls, expdesc *v) { codestring(ls, v, ls->t.seminfo.ts); break; } + case TK_TOKEN: { + init_exp(v, VK, luaK_lightuserdataK(ls->fs, ls->t.seminfo.p)); + break; + } case TK_NIL: { init_exp(v, VNIL, 0); break;