Better support for serialization and sorting of the Token data type
This commit is contained in:
@@ -48,19 +48,17 @@ LuaConstantReg *LuaConstantReg::All;
|
||||
|
||||
eng::string LuaToken::str() const {
|
||||
uint64_t n = (uint64_t)value;
|
||||
char buffer[20];
|
||||
int pos = 19;
|
||||
buffer[pos] = 0;
|
||||
while (n > 0) {
|
||||
uint64_t digit = (n % 36);
|
||||
n /= 36;
|
||||
if (digit < 10) {
|
||||
buffer[--pos] = '0' + digit;
|
||||
} else {
|
||||
buffer[--pos] = 'a' + (digit - 10);
|
||||
char buffer[13] = {};
|
||||
for (int i = 11; i >= 0; i--) {
|
||||
int d = n % 37;
|
||||
n /= 37;
|
||||
if (d >= 1 && d <= 10) {
|
||||
buffer[i] = '0' + (d - 1);
|
||||
} else if (d >= 11 && d <= 36) {
|
||||
buffer[i] = 'a' + (d - 11);
|
||||
}
|
||||
}
|
||||
return eng::string(buffer + pos, 19 - pos);
|
||||
return eng::string(buffer);
|
||||
}
|
||||
|
||||
static int panicf(lua_State *L) {
|
||||
@@ -612,7 +610,7 @@ bool LuaCoreStack::tangetclass(LuaSlot classobj, LuaSlot tab) {
|
||||
|
||||
bool LuaCoreStack::issortablekey(LuaSlot s) const {
|
||||
int type = lua_type(L_, s);
|
||||
return (type == LUA_TBOOLEAN) || (type == LUA_TNUMBER) || (type == LUA_TSTRING);
|
||||
return (type == LUA_TBOOLEAN) || (type == LUA_TNUMBER) || (type == LUA_TSTRING) || (type == LUA_TLIGHTUSERDATA);
|
||||
}
|
||||
|
||||
void LuaCoreStack::movesortablekey(LuaSlot key, LuaCoreStack &otherstack, LuaSlot otherslot) {
|
||||
@@ -633,6 +631,10 @@ void LuaCoreStack::movesortablekey(LuaSlot key, LuaCoreStack &otherstack, LuaSlo
|
||||
lua_replace(otherstack.L_, otherslot);
|
||||
break;
|
||||
}
|
||||
case LUA_TLIGHTUSERDATA:
|
||||
lua_pushlightuserdata(otherstack.L_, lua_touserdata(L_, key));
|
||||
lua_replace(otherstack.L_, otherslot);
|
||||
break;
|
||||
default:
|
||||
assert(false && "movesortablekey: not a sortable key");
|
||||
}
|
||||
@@ -960,3 +962,35 @@ LuaDefine(keywords_finalcheckthrow, "table", kwdoc) {
|
||||
return LS.result();
|
||||
}
|
||||
|
||||
LuaDefine(unittests_token, "", "Unit tests for LuaToken encoding") {
|
||||
// Test round-trip encoding for various strings.
|
||||
LuaAssertStrEq(L, LuaToken("a").str(), "a");
|
||||
LuaAssertStrEq(L, LuaToken("z").str(), "z");
|
||||
LuaAssertStrEq(L, LuaToken("0").str(), "0");
|
||||
LuaAssertStrEq(L, LuaToken("9").str(), "9");
|
||||
LuaAssertStrEq(L, LuaToken("null").str(), "null");
|
||||
LuaAssertStrEq(L, LuaToken("hello").str(), "hello");
|
||||
LuaAssertStrEq(L, LuaToken("zzzzzzzzzzzz").str(), "zzzzzzzzzzzz");
|
||||
LuaAssertStrEq(L, LuaToken("a0").str(), "a0");
|
||||
LuaAssertStrEq(L, LuaToken("0a").str(), "0a");
|
||||
LuaAssertStrEq(L, LuaToken("000000000000").str(), "000000000000");
|
||||
|
||||
// Test that empty/invalid strings produce the empty token.
|
||||
LuaAssert(L, LuaToken(std::string_view("")).empty());
|
||||
LuaAssert(L, LuaToken(std::string_view("hello world")).empty());
|
||||
LuaAssert(L, LuaToken(std::string_view("aaaaaaaaaaaaa")).empty()); // 13 chars
|
||||
|
||||
// Test that numeric ordering matches lexicographic ordering.
|
||||
LuaAssert(L, LuaToken("a").value > LuaToken("0").value);
|
||||
LuaAssert(L, LuaToken("b").value > LuaToken("a").value);
|
||||
LuaAssert(L, LuaToken("aa").value > LuaToken("a").value);
|
||||
LuaAssert(L, LuaToken("ab").value > LuaToken("aa").value);
|
||||
LuaAssert(L, LuaToken("b").value > LuaToken("az").value);
|
||||
LuaAssert(L, LuaToken("ba").value > LuaToken("az").value);
|
||||
LuaAssert(L, LuaToken("hello").value > LuaToken("hell").value);
|
||||
LuaAssert(L, LuaToken("a0").value > LuaToken("a").value);
|
||||
LuaAssert(L, LuaToken("a").value != LuaToken("a0").value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -261,9 +261,9 @@
|
||||
// we have a json null.
|
||||
//
|
||||
// So that finally brings me to what a "token" is. A token is a lightuserdata
|
||||
// containing a short string encoded as a base36 number. Tokens may only
|
||||
// contain the characters a-z and 0-9, and can be up to 12 characters long
|
||||
// (since 36^12 fits in 64 bits). In effect, it's a short string, but it's
|
||||
// containing a short string encoded as a fixed-width base37 number. Tokens
|
||||
// may only contain the characters a-z and 0-9, and can be up to 12 characters
|
||||
// long (since 37^12 fits in 64 bits). In effect, it's a short string, but it's
|
||||
// a string that's distinguishable from a normal lua string. It doesn't have
|
||||
// the same type as a lua string (it shows up as a lightuserdata).
|
||||
// The purpose of tokens is to represent special unique values, like json null.
|
||||
@@ -271,7 +271,7 @@
|
||||
// To make working with tokens easy, I've created a C++ struct 'LuaToken'.
|
||||
// It stores an int64. You can construct a LuaToken in two different ways:
|
||||
//
|
||||
// LuaToken(0x10FAA9)
|
||||
// LuaToken(0x3D5E30BCAF2EF663)
|
||||
// LuaToken("null")
|
||||
//
|
||||
// Those are equivalent. The second form is just as fast as the first,
|
||||
@@ -408,34 +408,36 @@ enum LuaTableType {
|
||||
|
||||
struct LuaToken {
|
||||
private:
|
||||
// Convert a base36 number into a token. If the base36 number is
|
||||
// not valid, or if it exceeds 64 bits, then return zero.
|
||||
// Encode a token string as a fixed-width base37 number.
|
||||
// Each character is mapped to a digit 1-36 (0 means "no character"),
|
||||
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
|
||||
// This fixed-width encoding ensures that numeric ordering matches
|
||||
// lexicographic ordering of the original strings.
|
||||
// Returns zero if the string is empty, too long, or contains
|
||||
// invalid characters.
|
||||
//
|
||||
static constexpr uint64_t parse(std::string_view str) {
|
||||
if (str.size() > 12) return 0;
|
||||
if (str.empty()) return 0;
|
||||
|
||||
uint64_t result = 0;
|
||||
uint64_t maxint = uint64_t(-1);
|
||||
|
||||
// Leading zeros are not allowed.
|
||||
if ((!str.empty()) && (str[0]=='0')) return 0;
|
||||
|
||||
for (int i = 0; i < int(str.size()); i++) {
|
||||
char c = str[i];
|
||||
uint64_t digit = 0;
|
||||
if ((c >= '0') && (c <= '9')) {
|
||||
digit = uint64_t(c - '0');
|
||||
digit = uint64_t(c - '0') + 1;
|
||||
} else if ((c >= 'a') && (c <= 'z')) {
|
||||
digit = uint64_t(c - 'a' + 10);
|
||||
digit = uint64_t(c - 'a') + 11;
|
||||
} else if ((c >= 'A') && (c <= 'Z')) {
|
||||
digit = uint64_t(c - 'A' + 10);
|
||||
digit = uint64_t(c - 'A') + 11;
|
||||
} else {
|
||||
return maxint;
|
||||
return 0;
|
||||
}
|
||||
// Multiply existing number by 36, then add the digit.
|
||||
// We have two checks to prevent integer overflow.
|
||||
if (result > (maxint / 36)) return 0;
|
||||
result *= 36;
|
||||
if (digit > (maxint - result)) return 0;
|
||||
result += digit;
|
||||
result = result * 37 + digit;
|
||||
}
|
||||
// Pad remaining positions with zeros (no character).
|
||||
for (int i = int(str.size()); i < 12; i++) {
|
||||
result = result * 37;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -450,7 +452,7 @@ public:
|
||||
|
||||
// Construct a token from a string.
|
||||
//
|
||||
// If the string is not a valid base36 number, then this
|
||||
// If the string is not a valid token, then this
|
||||
// initializes the token to the empty token (zero)
|
||||
//
|
||||
LuaToken(std::string_view s) : value(parse(s)) {}
|
||||
|
||||
@@ -794,15 +794,15 @@ LuaDefine(genlt, "obj1,obj2",
|
||||
"|* Numbers are compared in the obvious numeric manner."
|
||||
"|* Strings are compared alphabetically."
|
||||
"|* Booleans are compared with false being less than true."
|
||||
"|* Tokens are compared alphabetically."
|
||||
"|* Tables are all considered equal to other tables."
|
||||
"|* Functions are all considered equal to other functions."
|
||||
"|* Coroutines are all considered equal to other coroutines."
|
||||
"|"
|
||||
"|* Numbers are less than strings."
|
||||
"|* Strings are less than booleans."
|
||||
"|* Booleans are less than functions."
|
||||
"|* Functions are less than coroutines."
|
||||
"|* Coroutines are less than tables."
|
||||
"|Values of different types are printed in this order:"
|
||||
"|"
|
||||
"| Numbers, Strings, Booleans, Tokens,"
|
||||
"| Functions, Threads, Tables, and NIL"
|
||||
"|") {
|
||||
LuaArg o1,o2;
|
||||
LuaRet lt;
|
||||
|
||||
@@ -161,6 +161,7 @@ static void transmit_value_debug_string(StreamBuffer *sb, eng::ostringstream &os
|
||||
case LUA_TLIGHTUSERDATA: {
|
||||
LuaToken token(sb->read_uint64());
|
||||
oss << "[" << token.str() << "]";
|
||||
return;
|
||||
}
|
||||
case LUA_TT_GENERAL: {
|
||||
oss << "table " << sb->read_int32();
|
||||
|
||||
Reference in New Issue
Block a user