Add _ to the set of characters allowed in tokens
This commit is contained in:
@@ -101,6 +101,7 @@ Do not use git to make changes (commit, push, branch, etc.). Read-only git comma
|
|||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
- When the user gives a direct command, execute it. But when proposing changes on your own initiative, describe the plan and get approval before editing files.
|
- When the user gives a direct command, execute it. But when proposing changes on your own initiative, describe the plan and get approval before editing files.
|
||||||
|
- If an instruction ends with an ellipsis (`...`), the user has more to say. Wait for the next message before acting.
|
||||||
|
|
||||||
## Coding Conventions
|
## Coding Conventions
|
||||||
|
|
||||||
|
|||||||
@@ -252,7 +252,7 @@ Update 2: I don't remember using userdata objects at all. I am not sure that Upd
|
|||||||
## Token Literal Syntax Patch
|
## Token Literal Syntax Patch
|
||||||
|
|
||||||
Tokens are lightuserdata values encoding short alphanumeric
|
Tokens are lightuserdata values encoding short alphanumeric
|
||||||
strings as base37 numbers (see `Tokens-A-New-Lua-Type.md`).
|
strings as base38 numbers (see `Tokens-A-New-Lua-Type.md`).
|
||||||
This patch adds a literal syntax to the Lua parser so that
|
This patch adds a literal syntax to the Lua parser so that
|
||||||
tokens can be written directly in Lua source code using the
|
tokens can be written directly in Lua source code using the
|
||||||
`@` prefix:
|
`@` prefix:
|
||||||
|
|||||||
@@ -47,16 +47,13 @@ LuaConstantReg *LuaConstantReg::All;
|
|||||||
|
|
||||||
|
|
||||||
eng::string LuaToken::str() const {
|
eng::string LuaToken::str() const {
|
||||||
|
static const char encoding[] =
|
||||||
|
"\0_0123456789abcdefghijklmnopqrstuvwxyz";
|
||||||
uint64_t n = (uint64_t)value;
|
uint64_t n = (uint64_t)value;
|
||||||
char buffer[13] = {};
|
char buffer[13] = {};
|
||||||
for (int i = 11; i >= 0; i--) {
|
for (int i = 11; i >= 0; i--) {
|
||||||
int d = n % 37;
|
buffer[i] = encoding[n % 38];
|
||||||
n /= 37;
|
n /= 38;
|
||||||
if (d >= 1 && d <= 10) {
|
|
||||||
buffer[i] = '0' + (d - 1);
|
|
||||||
} else if (d >= 11 && d <= 36) {
|
|
||||||
buffer[i] = 'a' + (d - 11);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return eng::string(buffer);
|
return eng::string(buffer);
|
||||||
}
|
}
|
||||||
@@ -974,6 +971,9 @@ LuaDefine(unittests_token, "", "Unit tests for LuaToken encoding") {
|
|||||||
LuaAssertStrEq(L, LuaToken("a0").str(), "a0");
|
LuaAssertStrEq(L, LuaToken("a0").str(), "a0");
|
||||||
LuaAssertStrEq(L, LuaToken("0a").str(), "0a");
|
LuaAssertStrEq(L, LuaToken("0a").str(), "0a");
|
||||||
LuaAssertStrEq(L, LuaToken("000000000000").str(), "000000000000");
|
LuaAssertStrEq(L, LuaToken("000000000000").str(), "000000000000");
|
||||||
|
LuaAssertStrEq(L, LuaToken("foo_bar").str(), "foo_bar");
|
||||||
|
LuaAssertStrEq(L, LuaToken("_").str(), "_");
|
||||||
|
LuaAssertStrEq(L, LuaToken("a_b").str(), "a_b");
|
||||||
|
|
||||||
// Test that empty/invalid strings produce the empty token.
|
// Test that empty/invalid strings produce the empty token.
|
||||||
LuaAssert(L, LuaToken(std::string_view("")).empty());
|
LuaAssert(L, LuaToken(std::string_view("")).empty());
|
||||||
@@ -990,6 +990,10 @@ LuaDefine(unittests_token, "", "Unit tests for LuaToken encoding") {
|
|||||||
LuaAssert(L, LuaToken("hello").value > LuaToken("hell").value);
|
LuaAssert(L, LuaToken("hello").value > LuaToken("hell").value);
|
||||||
LuaAssert(L, LuaToken("a0").value > LuaToken("a").value);
|
LuaAssert(L, LuaToken("a0").value > LuaToken("a").value);
|
||||||
LuaAssert(L, LuaToken("a").value != LuaToken("a0").value);
|
LuaAssert(L, LuaToken("a").value != LuaToken("a0").value);
|
||||||
|
LuaAssert(L, LuaToken("0").value > LuaToken("_").value);
|
||||||
|
LuaAssert(L, LuaToken("a").value > LuaToken("9").value);
|
||||||
|
LuaAssert(L, LuaToken("a_b").value > LuaToken("a_a").value);
|
||||||
|
LuaAssert(L, LuaToken("foo_bar").value != LuaToken("foobar").value);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -261,9 +261,10 @@
|
|||||||
// we have a json null.
|
// we have a json null.
|
||||||
//
|
//
|
||||||
// So that finally brings me to what a "token" is. A token is a lightuserdata
|
// So that finally brings me to what a "token" is. A token is a lightuserdata
|
||||||
// containing a short string encoded as a fixed-width base37 number. Tokens
|
// containing a short string encoded as a fixed-width base38 number. Tokens
|
||||||
// may only contain the characters a-z and 0-9, and can be up to 12 characters
|
// may only contain the characters a-z, 0-9, and underscore, and can be up to
|
||||||
// long (since 37^12 fits in 64 bits). In effect, it's a short string, but it's
|
// 12 characters long (since 38^12 fits in 64 bits). In effect, it's a short
|
||||||
|
// string, but it's
|
||||||
// a string that's distinguishable from a normal lua string. It doesn't have
|
// a string that's distinguishable from a normal lua string. It doesn't have
|
||||||
// the same type as a lua string (it shows up as a lightuserdata).
|
// the same type as a lua string (it shows up as a lightuserdata).
|
||||||
// The purpose of tokens is to represent special unique values, like json null.
|
// The purpose of tokens is to represent special unique values, like json null.
|
||||||
@@ -271,7 +272,7 @@
|
|||||||
// To make working with tokens easy, I've created a C++ struct 'LuaToken'.
|
// To make working with tokens easy, I've created a C++ struct 'LuaToken'.
|
||||||
// It stores an int64. You can construct a LuaToken in two different ways:
|
// It stores an int64. You can construct a LuaToken in two different ways:
|
||||||
//
|
//
|
||||||
// LuaToken(0x3D5E30BCAF2EF663)
|
// LuaToken(0x559D0F68151CB900)
|
||||||
// LuaToken("null")
|
// LuaToken("null")
|
||||||
//
|
//
|
||||||
// Those are equivalent. The second form is just as fast as the first,
|
// Those are equivalent. The second form is just as fast as the first,
|
||||||
@@ -408,12 +409,14 @@ enum LuaTableType {
|
|||||||
|
|
||||||
struct LuaToken {
|
struct LuaToken {
|
||||||
private:
|
private:
|
||||||
// Encode a token string as a fixed-width base37 number.
|
// Encode a token string as a fixed-width base38 number.
|
||||||
// Each character is mapped to a digit 1-36 (0 means "no character"),
|
// Each character is mapped to a digit 1-37 (0 means "no character"),
|
||||||
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
|
// and the result is: CH0*38^11 + CH1*38^10 + ... + CH11*38^0.
|
||||||
// This fixed-width encoding ensures that numeric ordering matches
|
// This fixed-width encoding ensures that numeric ordering matches
|
||||||
// lexicographic ordering of the original strings.
|
// lexicographic ordering of the original strings.
|
||||||
//
|
//
|
||||||
|
// Digit mapping: _ → 1, 0-9 → 2-11, a-z → 12-37.
|
||||||
|
//
|
||||||
// WARNING: The Lua lexer in llex.c contains a duplicate of this
|
// WARNING: The Lua lexer in llex.c contains a duplicate of this
|
||||||
// encoding logic (in the '@' token literal case). If you change
|
// encoding logic (in the '@' token literal case). If you change
|
||||||
// the encoding here, you must update llex.c to match.
|
// the encoding here, you must update llex.c to match.
|
||||||
@@ -428,20 +431,22 @@ private:
|
|||||||
for (int i = 0; i < int(str.size()); i++) {
|
for (int i = 0; i < int(str.size()); i++) {
|
||||||
char c = str[i];
|
char c = str[i];
|
||||||
uint64_t digit = 0;
|
uint64_t digit = 0;
|
||||||
if ((c >= '0') && (c <= '9')) {
|
if (c == '_') {
|
||||||
digit = uint64_t(c - '0') + 1;
|
digit = 1;
|
||||||
|
} else if ((c >= '0') && (c <= '9')) {
|
||||||
|
digit = uint64_t(c - '0') + 2;
|
||||||
} else if ((c >= 'a') && (c <= 'z')) {
|
} else if ((c >= 'a') && (c <= 'z')) {
|
||||||
digit = uint64_t(c - 'a') + 11;
|
digit = uint64_t(c - 'a') + 12;
|
||||||
} else if ((c >= 'A') && (c <= 'Z')) {
|
} else if ((c >= 'A') && (c <= 'Z')) {
|
||||||
digit = uint64_t(c - 'A') + 11;
|
digit = uint64_t(c - 'A') + 12;
|
||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
result = result * 37 + digit;
|
result = result * 38 + digit;
|
||||||
}
|
}
|
||||||
// Pad remaining positions with zeros (no character).
|
// Pad remaining positions with zeros (no character).
|
||||||
for (int i = int(str.size()); i < 12; i++) {
|
for (int i = int(str.size()); i < 12; i++) {
|
||||||
result = result * 37;
|
result = result * 38;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -493,18 +493,19 @@ static int llex (LexState *ls, SemInfo *seminfo) {
|
|||||||
while (1) {
|
while (1) {
|
||||||
char c = (char)ls->current;
|
char c = (char)ls->current;
|
||||||
size_t digit;
|
size_t digit;
|
||||||
if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 1;
|
if (c == '_') digit = 1;
|
||||||
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 11;
|
else if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 2;
|
||||||
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 11;
|
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 12;
|
||||||
|
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 12;
|
||||||
else break;
|
else break;
|
||||||
tokval = tokval * 37 + digit;
|
tokval = tokval * 38 + digit;
|
||||||
toklen++;
|
toklen++;
|
||||||
save_and_next(ls);
|
save_and_next(ls);
|
||||||
}
|
}
|
||||||
if (toklen == 0 || toklen > 12 || ls->current == '_')
|
if (toklen == 0 || toklen > 12)
|
||||||
lexerror(ls, "invalid token literal", TK_TOKEN);
|
lexerror(ls, "invalid token literal", TK_TOKEN);
|
||||||
/* Pad to fixed width of 12 digits. */
|
/* Pad to fixed width of 12 digits. */
|
||||||
for (int i = toklen; i < 12; i++) tokval *= 37;
|
for (int i = toklen; i < 12; i++) tokval *= 38;
|
||||||
seminfo->p = (void *)tokval;
|
seminfo->p = (void *)tokval;
|
||||||
return TK_TOKEN;
|
return TK_TOKEN;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user