Add _ to the set of characters allowed in tokens
This commit is contained in:
@@ -47,16 +47,13 @@ LuaConstantReg *LuaConstantReg::All;
|
||||
|
||||
|
||||
eng::string LuaToken::str() const {
|
||||
static const char encoding[] =
|
||||
"\0_0123456789abcdefghijklmnopqrstuvwxyz";
|
||||
uint64_t n = (uint64_t)value;
|
||||
char buffer[13] = {};
|
||||
for (int i = 11; i >= 0; i--) {
|
||||
int d = n % 37;
|
||||
n /= 37;
|
||||
if (d >= 1 && d <= 10) {
|
||||
buffer[i] = '0' + (d - 1);
|
||||
} else if (d >= 11 && d <= 36) {
|
||||
buffer[i] = 'a' + (d - 11);
|
||||
}
|
||||
buffer[i] = encoding[n % 38];
|
||||
n /= 38;
|
||||
}
|
||||
return eng::string(buffer);
|
||||
}
|
||||
@@ -974,6 +971,9 @@ LuaDefine(unittests_token, "", "Unit tests for LuaToken encoding") {
|
||||
LuaAssertStrEq(L, LuaToken("a0").str(), "a0");
|
||||
LuaAssertStrEq(L, LuaToken("0a").str(), "0a");
|
||||
LuaAssertStrEq(L, LuaToken("000000000000").str(), "000000000000");
|
||||
LuaAssertStrEq(L, LuaToken("foo_bar").str(), "foo_bar");
|
||||
LuaAssertStrEq(L, LuaToken("_").str(), "_");
|
||||
LuaAssertStrEq(L, LuaToken("a_b").str(), "a_b");
|
||||
|
||||
// Test that empty/invalid strings produce the empty token.
|
||||
LuaAssert(L, LuaToken(std::string_view("")).empty());
|
||||
@@ -990,6 +990,10 @@ LuaDefine(unittests_token, "", "Unit tests for LuaToken encoding") {
|
||||
LuaAssert(L, LuaToken("hello").value > LuaToken("hell").value);
|
||||
LuaAssert(L, LuaToken("a0").value > LuaToken("a").value);
|
||||
LuaAssert(L, LuaToken("a").value != LuaToken("a0").value);
|
||||
LuaAssert(L, LuaToken("0").value > LuaToken("_").value);
|
||||
LuaAssert(L, LuaToken("a").value > LuaToken("9").value);
|
||||
LuaAssert(L, LuaToken("a_b").value > LuaToken("a_a").value);
|
||||
LuaAssert(L, LuaToken("foo_bar").value != LuaToken("foobar").value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -261,9 +261,10 @@
|
||||
// we have a json null.
|
||||
//
|
||||
// So that finally brings me to what a "token" is. A token is a lightuserdata
|
||||
// containing a short string encoded as a fixed-width base37 number. Tokens
|
||||
// may only contain the characters a-z and 0-9, and can be up to 12 characters
|
||||
// long (since 37^12 fits in 64 bits). In effect, it's a short string, but it's
|
||||
// containing a short string encoded as a fixed-width base38 number. Tokens
|
||||
// may only contain the characters a-z, 0-9, and underscore, and can be up to
|
||||
// 12 characters long (since 38^12 fits in 64 bits). In effect, it's a short
|
||||
// string, but it's
|
||||
// a string that's distinguishable from a normal lua string. It doesn't have
|
||||
// the same type as a lua string (it shows up as a lightuserdata).
|
||||
// The purpose of tokens is to represent special unique values, like json null.
|
||||
@@ -271,7 +272,7 @@
|
||||
// To make working with tokens easy, I've created a C++ struct 'LuaToken'.
|
||||
// It stores an int64. You can construct a LuaToken in two different ways:
|
||||
//
|
||||
// LuaToken(0x3D5E30BCAF2EF663)
|
||||
// LuaToken(0x559D0F68151CB900)
|
||||
// LuaToken("null")
|
||||
//
|
||||
// Those are equivalent. The second form is just as fast as the first,
|
||||
@@ -408,12 +409,14 @@ enum LuaTableType {
|
||||
|
||||
struct LuaToken {
|
||||
private:
|
||||
// Encode a token string as a fixed-width base37 number.
|
||||
// Each character is mapped to a digit 1-36 (0 means "no character"),
|
||||
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
|
||||
// Encode a token string as a fixed-width base38 number.
|
||||
// Each character is mapped to a digit 1-37 (0 means "no character"),
|
||||
// and the result is: CH0*38^11 + CH1*38^10 + ... + CH11*38^0.
|
||||
// This fixed-width encoding ensures that numeric ordering matches
|
||||
// lexicographic ordering of the original strings.
|
||||
//
|
||||
// Digit mapping: _ → 1, 0-9 → 2-11, a-z → 12-37.
|
||||
//
|
||||
// WARNING: The Lua lexer in llex.c contains a duplicate of this
|
||||
// encoding logic (in the '@' token literal case). If you change
|
||||
// the encoding here, you must update llex.c to match.
|
||||
@@ -428,20 +431,22 @@ private:
|
||||
for (int i = 0; i < int(str.size()); i++) {
|
||||
char c = str[i];
|
||||
uint64_t digit = 0;
|
||||
if ((c >= '0') && (c <= '9')) {
|
||||
digit = uint64_t(c - '0') + 1;
|
||||
if (c == '_') {
|
||||
digit = 1;
|
||||
} else if ((c >= '0') && (c <= '9')) {
|
||||
digit = uint64_t(c - '0') + 2;
|
||||
} else if ((c >= 'a') && (c <= 'z')) {
|
||||
digit = uint64_t(c - 'a') + 11;
|
||||
digit = uint64_t(c - 'a') + 12;
|
||||
} else if ((c >= 'A') && (c <= 'Z')) {
|
||||
digit = uint64_t(c - 'A') + 11;
|
||||
digit = uint64_t(c - 'A') + 12;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
result = result * 37 + digit;
|
||||
result = result * 38 + digit;
|
||||
}
|
||||
// Pad remaining positions with zeros (no character).
|
||||
for (int i = int(str.size()); i < 12; i++) {
|
||||
result = result * 37;
|
||||
result = result * 38;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -493,18 +493,19 @@ static int llex (LexState *ls, SemInfo *seminfo) {
|
||||
while (1) {
|
||||
char c = (char)ls->current;
|
||||
size_t digit;
|
||||
if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 1;
|
||||
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 11;
|
||||
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 11;
|
||||
if (c == '_') digit = 1;
|
||||
else if (c >= '0' && c <= '9') digit = (size_t)(c - '0') + 2;
|
||||
else if (c >= 'a' && c <= 'z') digit = (size_t)(c - 'a') + 12;
|
||||
else if (c >= 'A' && c <= 'Z') digit = (size_t)(c - 'A') + 12;
|
||||
else break;
|
||||
tokval = tokval * 37 + digit;
|
||||
tokval = tokval * 38 + digit;
|
||||
toklen++;
|
||||
save_and_next(ls);
|
||||
}
|
||||
if (toklen == 0 || toklen > 12 || ls->current == '_')
|
||||
if (toklen == 0 || toklen > 12)
|
||||
lexerror(ls, "invalid token literal", TK_TOKEN);
|
||||
/* Pad to fixed width of 12 digits. */
|
||||
for (int i = toklen; i < 12; i++) tokval *= 37;
|
||||
for (int i = toklen; i < 12; i++) tokval *= 38;
|
||||
seminfo->p = (void *)tokval;
|
||||
return TK_TOKEN;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user