Better support for serialization and sorting of the Token data type

This commit is contained in:
2026-02-18 23:23:59 -05:00
parent 0de2a50843
commit 1fd06f0628
6 changed files with 98 additions and 44 deletions

View File

@@ -261,9 +261,9 @@
// we have a json null.
//
// So that finally brings me to what a "token" is. A token is a lightuserdata
// containing a short string encoded as a base36 number. Tokens may only
// contain the characters a-z and 0-9, and can be up to 12 characters long
// (since 36^12 fits in 64 bits). In effect, it's a short string, but it's
// containing a short string encoded as a fixed-width base37 number. Tokens
// may only contain the characters a-z and 0-9, and can be up to 12 characters
// long (since 37^12 fits in 64 bits). In effect, it's a short string, but it's
// a string that's distinguishable from a normal lua string. It doesn't have
// the same type as a lua string (it shows up as a lightuserdata).
// The purpose of tokens is to represent special unique values, like json null.
@@ -271,7 +271,7 @@
// To make working with tokens easy, I've created a C++ struct 'LuaToken'.
// It stores an int64. You can construct a LuaToken in two different ways:
//
// LuaToken(0x10FAA9)
// LuaToken(0x3D5E30BCAF2EF663)
// LuaToken("null")
//
// Those are equivalent. The second form is just as fast as the first,
@@ -408,34 +408,36 @@ enum LuaTableType {
struct LuaToken {
private:
// Convert a base36 number into a token. If the base36 number is
// not valid, or if it exceeds 64 bits, then return zero.
// Encode a token string as a fixed-width base37 number.
// Each character is mapped to a digit 1-36 (0 means "no character"),
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
// This fixed-width encoding ensures that numeric ordering matches
// lexicographic ordering of the original strings.
// Returns zero if the string is empty, too long, or contains
// invalid characters.
//
static constexpr uint64_t parse(std::string_view str) {
if (str.size() > 12) return 0;
if (str.empty()) return 0;
uint64_t result = 0;
uint64_t maxint = uint64_t(-1);
// Leading zeros are not allowed.
if ((!str.empty()) && (str[0]=='0')) return 0;
for (int i = 0; i < int(str.size()); i++) {
char c = str[i];
uint64_t digit = 0;
if ((c >= '0') && (c <= '9')) {
digit = uint64_t(c - '0');
digit = uint64_t(c - '0') + 1;
} else if ((c >= 'a') && (c <= 'z')) {
digit = uint64_t(c - 'a' + 10);
digit = uint64_t(c - 'a') + 11;
} else if ((c >= 'A') && (c <= 'Z')) {
digit = uint64_t(c - 'A' + 10);
digit = uint64_t(c - 'A') + 11;
} else {
return maxint;
return 0;
}
// Multiply existing number by 36, then add the digit.
// We have two checks to prevent integer overflow.
if (result > (maxint / 36)) return 0;
result *= 36;
if (digit > (maxint - result)) return 0;
result += digit;
result = result * 37 + digit;
}
// Pad remaining positions with zeros (no character).
for (int i = int(str.size()); i < 12; i++) {
result = result * 37;
}
return result;
}
@@ -450,7 +452,7 @@ public:
// Construct a token from a string.
//
// If the string is not a valid base36 number, then this
// If the string is not a valid token, then this
// initializes the token to the empty token (zero)
//
LuaToken(std::string_view s) : value(parse(s)) {}