Add token literals to the lua parser

This commit is contained in:
2026-02-19 00:11:44 -05:00
parent 1fd06f0628
commit 7039c43065
8 changed files with 58 additions and 3 deletions

View File

@@ -248,3 +248,18 @@ GC Finalizers and weak tables both introduce nondeterminism into Lua execution.
Update 1: I'm using GC finalizers in some cases to clean up userdata objects. I think it's safe as long as the only thing the finalizer does is free memory. (NOTE: WHERE?) Update 1: I'm using GC finalizers in some cases to clean up userdata objects. I think it's safe as long as the only thing the finalizer does is free memory. (NOTE: WHERE?)
Update 2: I don't remember using userdata objects at all. I am not sure that Update 1 is the truth any more. Update 2: I don't remember using userdata objects at all. I am not sure that Update 1 is the truth any more.
## Token Literal Syntax Patch
Tokens are lightuserdata values encoding short alphanumeric strings as base37 numbers (see `Tokens-A-New-Lua-Type.md`). Previously, tokens could only be created in C++ and inserted into the Lua environment via `LuaTokenConstant`. This patch adds a literal syntax to the Lua parser so that tokens can be written directly in Lua source code using the `@` prefix:
```lua
local x = @null
local y = @found
```
The lexer (llex.c) recognizes `@` followed by one or more alphanumeric characters (a-z, 0-9, case insensitive, max 12 characters). It encodes the string as a base37 number using the same encoding as `LuaToken::parse()` in luastack.hpp and produces a `TK_TOKEN` token. The parser (lparser.c) handles `TK_TOKEN` in `simpleexp()` by storing it as a lightuserdata constant in the function's constant table via `luaK_lightuserdataK()` in lcode.c.
Underscores are not valid in token literals. Writing `@foo_bar` produces a lexer error rather than silently splitting into token `@foo` and identifier `_bar`.
This patch is live and functioning.

View File

@@ -413,6 +413,10 @@ private:
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0. // and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
// This fixed-width encoding ensures that numeric ordering matches // This fixed-width encoding ensures that numeric ordering matches
// lexicographic ordering of the original strings. // lexicographic ordering of the original strings.
//
// WARNING: The Lua lexer in llex.c contains a duplicate of this
// encoding logic (in the '@' token literal case). If you change
// the encoding here, you must update llex.c to match.
// Returns zero if the string is empty, too long, or contains // Returns zero if the string is empty, too long, or contains
// invalid characters. // invalid characters.
// //

View File

@@ -64,7 +64,7 @@ public:
} }
case LUA_TLIGHTUSERDATA: { case LUA_TLIGHTUSERDATA: {
LuaToken token = LS_.cktoken(val); LuaToken token = LS_.cktoken(val);
(*output_) << "[" << token.str() << "]"; (*output_) << "@" << token.str();
return; return;
} }
case LUA_TT_GENERAL: { case LUA_TT_GENERAL: {

View File

@@ -342,6 +342,13 @@ int luaK_numberK (FuncState *fs, lua_Number r) {
} }
int luaK_lightuserdataK (FuncState *fs, void *p) {
TValue o;
setpvalue(&o, p);
return addk(fs, &o, &o);
}
static int boolK (FuncState *fs, int b) { static int boolK (FuncState *fs, int b) {
TValue o; TValue o;
setbvalue(&o, b); setbvalue(&o, b);

View File

@@ -53,6 +53,7 @@ LUAI_FUNC void luaK_reserveregs (FuncState *fs, int n);
LUAI_FUNC void luaK_checkstack (FuncState *fs, int n); LUAI_FUNC void luaK_checkstack (FuncState *fs, int n);
LUAI_FUNC int luaK_stringK (FuncState *fs, TString *s); LUAI_FUNC int luaK_stringK (FuncState *fs, TString *s);
LUAI_FUNC int luaK_numberK (FuncState *fs, lua_Number r); LUAI_FUNC int luaK_numberK (FuncState *fs, lua_Number r);
LUAI_FUNC int luaK_lightuserdataK (FuncState *fs, void *p);
LUAI_FUNC void luaK_dischargevars (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_dischargevars (FuncState *fs, expdesc *e);
LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e); LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e);
LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e); LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e);

View File

@@ -39,7 +39,7 @@ static const char *const luaX_tokens [] = {
"in", "local", "nil", "not", "or", "repeat", "in", "local", "nil", "not", "or", "repeat",
"return", "then", "true", "until", "while", "return", "then", "true", "until", "while",
"..", "...", "==", ">=", "<=", "~=", "::", "<eof>", "..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
"<number>", "<name>", "<string>" "<number>", "<name>", "<string>", "<token>"
}; };
@@ -93,6 +93,7 @@ static const char *txtToken (LexState *ls, int token) {
case TK_NAME: case TK_NAME:
case TK_STRING: case TK_STRING:
case TK_NUMBER: case TK_NUMBER:
case TK_TOKEN:
save(ls, '\0'); save(ls, '\0');
return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff)); return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
default: default:
@@ -485,6 +486,28 @@ static int llex (LexState *ls, SemInfo *seminfo) {
case EOZ: { case EOZ: {
return TK_EOS; return TK_EOS;
} }
case '@': { /* token literal */
size_t tokval = 0;
int toklen = 0;
save_and_next(ls);
while (1) {
char c = (char)ls->current;
size_t digit;
if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 1;
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 11;
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 11;
else break;
tokval = tokval * 37 + digit;
toklen++;
save_and_next(ls);
}
if (toklen == 0 || toklen > 12 || ls->current == '_')
lexerror(ls, "invalid token literal", TK_TOKEN);
/* Pad to fixed width of 12 digits. */
for (int i = toklen; i < 12; i++) tokval *= 37;
seminfo->p = (void *)tokval;
return TK_TOKEN;
}
default: { default: {
if (lislalpha(ls->current)) { /* identifier or reserved word? */ if (lislalpha(ls->current)) { /* identifier or reserved word? */
TString *ts; TString *ts;

View File

@@ -27,7 +27,7 @@ enum RESERVED {
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE, TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
/* other terminal symbols */ /* other terminal symbols */
TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_DBCOLON, TK_EOS, TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_DBCOLON, TK_EOS,
TK_NUMBER, TK_NAME, TK_STRING TK_NUMBER, TK_NAME, TK_STRING, TK_TOKEN
}; };
/* number of reserved words */ /* number of reserved words */
@@ -37,6 +37,7 @@ enum RESERVED {
typedef union { typedef union {
lua_Number r; lua_Number r;
TString *ts; TString *ts;
void *p; /* for token (lightuserdata) literals */
} SemInfo; /* semantics information */ } SemInfo; /* semantics information */

View File

@@ -947,6 +947,10 @@ static void simpleexp (LexState *ls, expdesc *v) {
codestring(ls, v, ls->t.seminfo.ts); codestring(ls, v, ls->t.seminfo.ts);
break; break;
} }
case TK_TOKEN: {
init_exp(v, VK, luaK_lightuserdataK(ls->fs, ls->t.seminfo.p));
break;
}
case TK_NIL: { case TK_NIL: {
init_exp(v, VNIL, 0); init_exp(v, VNIL, 0);
break; break;