Add token literals to the lua parser
This commit is contained in:
@@ -248,3 +248,18 @@ GC Finalizers and weak tables both introduce nondeterminism into Lua execution.
|
||||
Update 1: I'm using GC finalizers in some cases to clean up userdata objects. I think it's safe as long as the only thing the finalizer does is free memory. (NOTE: WHERE?)
|
||||
|
||||
Update 2: I don't remember using userdata objects at all. I am not sure that Update 1 is the truth any more.
|
||||
|
||||
## Token Literal Syntax Patch
|
||||
|
||||
Tokens are lightuserdata values encoding short alphanumeric strings as base37 numbers (see `Tokens-A-New-Lua-Type.md`). Previously, tokens could only be created in C++ and inserted into the Lua environment via `LuaTokenConstant`. This patch adds a literal syntax to the Lua parser so that tokens can be written directly in Lua source code using the `@` prefix:
|
||||
|
||||
```lua
|
||||
local x = @null
|
||||
local y = @found
|
||||
```
|
||||
|
||||
The lexer (llex.c) recognizes `@` followed by one or more alphanumeric characters (a-z, 0-9, case insensitive, max 12 characters). It encodes the string as a base37 number using the same encoding as `LuaToken::parse()` in luastack.hpp and produces a `TK_TOKEN` token. The parser (lparser.c) handles `TK_TOKEN` in `simpleexp()` by storing it as a lightuserdata constant in the function's constant table via `luaK_lightuserdataK()` in lcode.c.
|
||||
|
||||
Underscores are not valid in token literals. Writing `@foo_bar` produces a lexer error rather than silently splitting into token `@foo` and identifier `_bar`.
|
||||
|
||||
This patch is live and functioning.
|
||||
|
||||
@@ -413,6 +413,10 @@ private:
|
||||
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
|
||||
// This fixed-width encoding ensures that numeric ordering matches
|
||||
// lexicographic ordering of the original strings.
|
||||
//
|
||||
// WARNING: The Lua lexer in llex.c contains a duplicate of this
|
||||
// encoding logic (in the '@' token literal case). If you change
|
||||
// the encoding here, you must update llex.c to match.
|
||||
// Returns zero if the string is empty, too long, or contains
|
||||
// invalid characters.
|
||||
//
|
||||
|
||||
@@ -64,7 +64,7 @@ public:
|
||||
}
|
||||
case LUA_TLIGHTUSERDATA: {
|
||||
LuaToken token = LS_.cktoken(val);
|
||||
(*output_) << "[" << token.str() << "]";
|
||||
(*output_) << "@" << token.str();
|
||||
return;
|
||||
}
|
||||
case LUA_TT_GENERAL: {
|
||||
|
||||
@@ -342,6 +342,13 @@ int luaK_numberK (FuncState *fs, lua_Number r) {
|
||||
}
|
||||
|
||||
|
||||
int luaK_lightuserdataK (FuncState *fs, void *p) {
|
||||
TValue o;
|
||||
setpvalue(&o, p);
|
||||
return addk(fs, &o, &o);
|
||||
}
|
||||
|
||||
|
||||
static int boolK (FuncState *fs, int b) {
|
||||
TValue o;
|
||||
setbvalue(&o, b);
|
||||
|
||||
@@ -53,6 +53,7 @@ LUAI_FUNC void luaK_reserveregs (FuncState *fs, int n);
|
||||
LUAI_FUNC void luaK_checkstack (FuncState *fs, int n);
|
||||
LUAI_FUNC int luaK_stringK (FuncState *fs, TString *s);
|
||||
LUAI_FUNC int luaK_numberK (FuncState *fs, lua_Number r);
|
||||
LUAI_FUNC int luaK_lightuserdataK (FuncState *fs, void *p);
|
||||
LUAI_FUNC void luaK_dischargevars (FuncState *fs, expdesc *e);
|
||||
LUAI_FUNC int luaK_exp2anyreg (FuncState *fs, expdesc *e);
|
||||
LUAI_FUNC void luaK_exp2anyregup (FuncState *fs, expdesc *e);
|
||||
|
||||
@@ -39,7 +39,7 @@ static const char *const luaX_tokens [] = {
|
||||
"in", "local", "nil", "not", "or", "repeat",
|
||||
"return", "then", "true", "until", "while",
|
||||
"..", "...", "==", ">=", "<=", "~=", "::", "<eof>",
|
||||
"<number>", "<name>", "<string>"
|
||||
"<number>", "<name>", "<string>", "<token>"
|
||||
};
|
||||
|
||||
|
||||
@@ -93,6 +93,7 @@ static const char *txtToken (LexState *ls, int token) {
|
||||
case TK_NAME:
|
||||
case TK_STRING:
|
||||
case TK_NUMBER:
|
||||
case TK_TOKEN:
|
||||
save(ls, '\0');
|
||||
return luaO_pushfstring(ls->L, LUA_QS, luaZ_buffer(ls->buff));
|
||||
default:
|
||||
@@ -485,6 +486,28 @@ static int llex (LexState *ls, SemInfo *seminfo) {
|
||||
case EOZ: {
|
||||
return TK_EOS;
|
||||
}
|
||||
case '@': { /* token literal */
|
||||
size_t tokval = 0;
|
||||
int toklen = 0;
|
||||
save_and_next(ls);
|
||||
while (1) {
|
||||
char c = (char)ls->current;
|
||||
size_t digit;
|
||||
if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 1;
|
||||
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 11;
|
||||
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 11;
|
||||
else break;
|
||||
tokval = tokval * 37 + digit;
|
||||
toklen++;
|
||||
save_and_next(ls);
|
||||
}
|
||||
if (toklen == 0 || toklen > 12 || ls->current == '_')
|
||||
lexerror(ls, "invalid token literal", TK_TOKEN);
|
||||
/* Pad to fixed width of 12 digits. */
|
||||
for (int i = toklen; i < 12; i++) tokval *= 37;
|
||||
seminfo->p = (void *)tokval;
|
||||
return TK_TOKEN;
|
||||
}
|
||||
default: {
|
||||
if (lislalpha(ls->current)) { /* identifier or reserved word? */
|
||||
TString *ts;
|
||||
|
||||
@@ -27,7 +27,7 @@ enum RESERVED {
|
||||
TK_RETURN, TK_THEN, TK_TRUE, TK_UNTIL, TK_WHILE,
|
||||
/* other terminal symbols */
|
||||
TK_CONCAT, TK_DOTS, TK_EQ, TK_GE, TK_LE, TK_NE, TK_DBCOLON, TK_EOS,
|
||||
TK_NUMBER, TK_NAME, TK_STRING
|
||||
TK_NUMBER, TK_NAME, TK_STRING, TK_TOKEN
|
||||
};
|
||||
|
||||
/* number of reserved words */
|
||||
@@ -37,6 +37,7 @@ enum RESERVED {
|
||||
typedef union {
|
||||
lua_Number r;
|
||||
TString *ts;
|
||||
void *p; /* for token (lightuserdata) literals */
|
||||
} SemInfo; /* semantics information */
|
||||
|
||||
|
||||
|
||||
@@ -947,6 +947,10 @@ static void simpleexp (LexState *ls, expdesc *v) {
|
||||
codestring(ls, v, ls->t.seminfo.ts);
|
||||
break;
|
||||
}
|
||||
case TK_TOKEN: {
|
||||
init_exp(v, VK, luaK_lightuserdataK(ls->fs, ls->t.seminfo.p));
|
||||
break;
|
||||
}
|
||||
case TK_NIL: {
|
||||
init_exp(v, VNIL, 0);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user