Add token literals to the lua parser
This commit is contained in:
@@ -248,3 +248,18 @@ GC Finalizers and weak tables both introduce nondeterminism into Lua execution.
|
|||||||
Update 1: I'm using GC finalizers in some cases to clean up userdata objects. I think it's safe as long as the only thing the finalizer does is free memory. (NOTE: WHERE?)
|
Update 1: I'm using GC finalizers in some cases to clean up userdata objects. I think it's safe as long as the only thing the finalizer does is free memory. (NOTE: WHERE?)
|
||||||
|
|
||||||
Update 2: I don't remember using userdata objects at all. I am not sure that Update 1 is the truth any more.
|
Update 2: I don't remember using userdata objects at all. I am not sure that Update 1 is the truth any more.
|
||||||
|
|
||||||
|
## Token Literal Syntax Patch
|
||||||
|
|
||||||
|
Tokens are lightuserdata values encoding short alphanumeric strings as base37 numbers (see `Tokens-A-New-Lua-Type.md`). Previously, tokens could only be created in C++ and inserted into the Lua environment via `LuaTokenConstant`. This patch adds a literal syntax to the Lua parser so that tokens can be written directly in Lua source code using the `@` prefix:
|
||||||
|
|
||||||
|
```lua
|
||||||
|
local x = @null
|
||||||
|
local y = @found
|
||||||
|
```
|
||||||
|
|
||||||
|
The lexer (llex.c) recognizes `@` followed by one or more alphanumeric characters (a-z, 0-9, case insensitive, max 12 characters). It encodes the string as a base37 number using the same encoding as `LuaToken::parse()` in luastack.hpp and produces a `TK_TOKEN` token. The parser (lparser.c) handles `TK_TOKEN` in `simpleexp()` by storing it as a lightuserdata constant in the function's constant table via `luaK_lightuserdataK()` in lcode.c.
|
||||||
|
|
||||||
|
Underscores are not valid in token literals. Writing `@foo_bar` produces a lexer error rather than silently splitting into token `@foo` and identifier `_bar`.
|
||||||
|
|
||||||
|
This patch is live and functioning.
|
||||||
|
|||||||
@@ -413,6 +413,10 @@ private:
|
|||||||
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
|
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
|
||||||
// This fixed-width encoding ensures that numeric ordering matches
|
// This fixed-width encoding ensures that numeric ordering matches
|
||||||
// lexicographic ordering of the original strings.
|
// lexicographic ordering of the original strings.
|
||||||
|
//
|
||||||
|
// WARNING: The Lua lexer in llex.c contains a duplicate of this
|
||||||
|
// encoding logic (in the '@' token literal case). If you change
|
||||||
|
// the encoding here, you must update llex.c to match.
|
||||||
// Returns zero if the string is empty, too long, or contains
|
// Returns zero if the string is empty, too long, or contains
|
||||||
// invalid characters.
|
// invalid characters.
|
||||||
//
|
//
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ public:
|
|||||||
}
|
}
|
||||||
case LUA_TLIGHTUSERDATA: {
|
case LUA_TLIGHTUSERDATA: {
|
||||||
LuaToken token = LS_.cktoken(val);
|
LuaToken token = LS_.cktoken(val);
|
||||||
(*output_) << "[" << token.str() << "]";
|
(*output_) << "@" << token.str();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case LUA_TT_GENERAL: {
|
case LUA_TT_GENERAL: {
|
||||||
|
|||||||
@@ -342,6 +342,13 @@ int luaK_numberK (FuncState *fs, lua_Number r) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int luaK_lightuserdataK (FuncState *fs, void *p) {
|
||||||
|
TValue o;
|
||||||
|
setpvalue(&o, p);
|
||||||
|
return addk(fs, &o, &o);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int boolK (FuncState *fs, int b) {
|
static int boolK (FuncState *fs, int b) {
|
||||||
TValue o;
|
TValue o;
|
||||||
setbvalue(&o, b);
|
setbvalue(&o, b);
|
||||||
|
|||||||
@@ -53,6 +53,7 @@ LUAI_FUNC void luaK_reserveregs (FuncState *fs, int n);
|
|||||||
LUAI_FUNC void luaK_checkstack (FuncState *fs, int n);
|
LUAI_FUNC void luaK_checkstack (FuncState *fs, int n);
|
||||||
LUAI_FUNC int luaK_stringK (FuncState *fs, TString *s);
|
LUAI_FUNC int luaK_stringK (FuncState *fs, TString *s);
|
||||||
LUAI_FUNC int luaK_numberK (FuncState *fs, lua_Number r);
|
LUAI_FUNC int luaK_numberK (FuncState *fs, lua_Number r);
|
||||||
|
LUAI_FUNC int luaK_lightuserdataK (FuncState *fs, void *p);
|
||||||
LUAI_FUNC void luaK_dischargevars (FuncState *fs, expdesc *e);
|
LUAI_FUNC void luaK_dischargevars (FuncState *fs, expdesc *e);
|
||||||
LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e);
|
LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e);
|
||||||
LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e);
|
LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e);
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ static const char *const luaX_tokens [] = {
|
|||||||
"in", "local", "nil", "not", "or", "repeat",
|
"in", "local", "nil", "not", "or", "repeat",
|
||||||
"return", "then", "true", "until", "while",
|
"return", "then", "true", "until", "while",
|
||||||
"..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
|
"..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
|
||||||
"<number>", "<name>", "<string>"
|
"<number>", "<name>", "<string>", "<token>"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@@ -93,6 +93,7 @@ static const char *txtToken (LexState *ls, int token) {
|
|||||||
case TK_NAME:
|
case TK_NAME:
|
||||||
case TK_STRING:
|
case TK_STRING:
|
||||||
case TK_NUMBER:
|
case TK_NUMBER:
|
||||||
|
case TK_TOKEN:
|
||||||
save(ls, '\0');
|
save(ls, '\0');
|
||||||
return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
|
return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
|
||||||
default:
|
default:
|
||||||
@@ -485,6 +486,28 @@ static int llex (LexState *ls, SemInfo *seminfo) {
|
|||||||
case EOZ: {
|
case EOZ: {
|
||||||
return TK_EOS;
|
return TK_EOS;
|
||||||
}
|
}
|
||||||
|
case '@': { /* token literal */
|
||||||
|
size_t tokval = 0;
|
||||||
|
int toklen = 0;
|
||||||
|
save_and_next(ls);
|
||||||
|
while (1) {
|
||||||
|
char c = (char)ls->current;
|
||||||
|
size_t digit;
|
||||||
|
if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 1;
|
||||||
|
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 11;
|
||||||
|
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 11;
|
||||||
|
else break;
|
||||||
|
tokval = tokval * 37 + digit;
|
||||||
|
toklen++;
|
||||||
|
save_and_next(ls);
|
||||||
|
}
|
||||||
|
if (toklen == 0 || toklen > 12 || ls->current == '_')
|
||||||
|
lexerror(ls, "invalid token literal", TK_TOKEN);
|
||||||
|
/* Pad to fixed width of 12 digits. */
|
||||||
|
for (int i = toklen; i < 12; i++) tokval *= 37;
|
||||||
|
seminfo->p = (void *)tokval;
|
||||||
|
return TK_TOKEN;
|
||||||
|
}
|
||||||
default: {
|
default: {
|
||||||
if (lislalpha(ls->current)) { /* identifier or reserved word? */
|
if (lislalpha(ls->current)) { /* identifier or reserved word? */
|
||||||
TString *ts;
|
TString *ts;
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ enum RESERVED {
|
|||||||
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
|
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
|
||||||
/* other terminal symbols */
|
/* other terminal symbols */
|
||||||
TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_DBCOLON, TK_EOS,
|
TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_DBCOLON, TK_EOS,
|
||||||
TK_NUMBER, TK_NAME, TK_STRING
|
TK_NUMBER, TK_NAME, TK_STRING, TK_TOKEN
|
||||||
};
|
};
|
||||||
|
|
||||||
/* number of reserved words */
|
/* number of reserved words */
|
||||||
@@ -37,6 +37,7 @@ enum RESERVED {
|
|||||||
typedef union {
|
typedef union {
|
||||||
lua_Number r;
|
lua_Number r;
|
||||||
TString *ts;
|
TString *ts;
|
||||||
|
void *p; /* for token (lightuserdata) literals */
|
||||||
} SemInfo; /* semantics information */
|
} SemInfo; /* semantics information */
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -947,6 +947,10 @@ static void simpleexp (LexState *ls, expdesc *v) {
|
|||||||
codestring(ls, v, ls->t.seminfo.ts);
|
codestring(ls, v, ls->t.seminfo.ts);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case TK_TOKEN: {
|
||||||
|
init_exp(v, VK, luaK_lightuserdataK(ls->fs, ls->t.seminfo.p));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case TK_NIL: {
|
case TK_NIL: {
|
||||||
init_exp(v, VNIL, 0);
|
init_exp(v, VNIL, 0);
|
||||||
break;
|
break;
|
||||||
|
|||||||
Reference in New Issue
Block a user