Better support for serialization and sorting of the Token data type

This commit is contained in:
2026-02-18 23:23:59 -05:00
parent 0de2a50843
commit 1fd06f0628
6 changed files with 98 additions and 44 deletions

View File

@@ -69,3 +69,12 @@ Tokens can get passed to Unreal in a variety of ways. For example, in animation
## Usage
Tokens are mainly intended as sentinels and special reserved values. The JSON null example above is the motivating case, but tokens can represent any short reserved constant the engine needs.
## Serialization and Difference Transmission
I believe that we properly serialize and difference transmit tokens.
* the serialize_lua function handles tokens explicitly
* the difference transmitter has code for tokens
* eris always saved lightuserdata as 64-bit numbers

View File

@@ -48,19 +48,17 @@ LuaConstantReg *LuaConstantReg::All;
eng::string LuaToken::str() const {
uint64_t n = (uint64_t)value;
char buffer[20];
int pos = 19;
buffer[pos] = 0;
while (n > 0) {
uint64_t digit = (n % 36);
n /= 36;
if (digit < 10) {
buffer[--pos] = '0' + digit;
} else {
buffer[--pos] = 'a' + (digit - 10);
char buffer[13] = {};
for (int i = 11; i >= 0; i--) {
int d = n % 37;
n /= 37;
if (d >= 1 && d <= 10) {
buffer[i] = '0' + (d - 1);
} else if (d >= 11 && d <= 36) {
buffer[i] = 'a' + (d - 11);
}
}
return eng::string(buffer + pos, 19 - pos);
return eng::string(buffer);
}
static int panicf(lua_State *L) {
@@ -612,7 +610,7 @@ bool LuaCoreStack::tangetclass(LuaSlot classobj, LuaSlot tab) {
bool LuaCoreStack::issortablekey(LuaSlot s) const {
int type = lua_type(L_, s);
return (type == LUA_TBOOLEAN) || (type == LUA_TNUMBER) || (type == LUA_TSTRING);
return (type == LUA_TBOOLEAN) || (type == LUA_TNUMBER) || (type == LUA_TSTRING) || (type == LUA_TLIGHTUSERDATA);
}
void LuaCoreStack::movesortablekey(LuaSlot key, LuaCoreStack &otherstack, LuaSlot otherslot) {
@@ -633,6 +631,10 @@ void LuaCoreStack::movesortablekey(LuaSlot key, LuaCoreStack &otherstack, LuaSlo
lua_replace(otherstack.L_, otherslot);
break;
}
case LUA_TLIGHTUSERDATA:
lua_pushlightuserdata(otherstack.L_, lua_touserdata(L_, key));
lua_replace(otherstack.L_, otherslot);
break;
default:
assert(false && "movesortablekey: not a sortable key");
}
@@ -960,3 +962,35 @@ LuaDefine(keywords_finalcheckthrow, "table", kwdoc) {
return LS.result();
}
LuaDefine(unittests_token, "", "Unit tests for LuaToken encoding") {
// Test round-trip encoding for various strings.
LuaAssertStrEq(L, LuaToken("a").str(), "a");
LuaAssertStrEq(L, LuaToken("z").str(), "z");
LuaAssertStrEq(L, LuaToken("0").str(), "0");
LuaAssertStrEq(L, LuaToken("9").str(), "9");
LuaAssertStrEq(L, LuaToken("null").str(), "null");
LuaAssertStrEq(L, LuaToken("hello").str(), "hello");
LuaAssertStrEq(L, LuaToken("zzzzzzzzzzzz").str(), "zzzzzzzzzzzz");
LuaAssertStrEq(L, LuaToken("a0").str(), "a0");
LuaAssertStrEq(L, LuaToken("0a").str(), "0a");
LuaAssertStrEq(L, LuaToken("000000000000").str(), "000000000000");
// Test that empty/invalid strings produce the empty token.
LuaAssert(L, LuaToken(std::string_view("")).empty());
LuaAssert(L, LuaToken(std::string_view("hello world")).empty());
LuaAssert(L, LuaToken(std::string_view("aaaaaaaaaaaaa")).empty()); // 13 chars
// Test that numeric ordering matches lexicographic ordering.
LuaAssert(L, LuaToken("a").value > LuaToken("0").value);
LuaAssert(L, LuaToken("b").value > LuaToken("a").value);
LuaAssert(L, LuaToken("aa").value > LuaToken("a").value);
LuaAssert(L, LuaToken("ab").value > LuaToken("aa").value);
LuaAssert(L, LuaToken("b").value > LuaToken("az").value);
LuaAssert(L, LuaToken("ba").value > LuaToken("az").value);
LuaAssert(L, LuaToken("hello").value > LuaToken("hell").value);
LuaAssert(L, LuaToken("a0").value > LuaToken("a").value);
LuaAssert(L, LuaToken("a").value != LuaToken("a0").value);
return 0;
}

View File

@@ -261,9 +261,9 @@
// we have a json null.
//
// So that finally brings me to what a "token" is. A token is a lightuserdata
// containing a short string encoded as a base36 number. Tokens may only
// contain the characters a-z and 0-9, and can be up to 12 characters long
// (since 36^12 fits in 64 bits). In effect, it's a short string, but it's
// containing a short string encoded as a fixed-width base37 number. Tokens
// may only contain the characters a-z and 0-9, and can be up to 12 characters
// long (since 37^12 fits in 64 bits). In effect, it's a short string, but it's
// a string that's distinguishable from a normal lua string. It doesn't have
// the same type as a lua string (it shows up as a lightuserdata).
// The purpose of tokens is to represent special unique values, like json null.
@@ -271,7 +271,7 @@
// To make working with tokens easy, I've created a C++ struct 'LuaToken'.
// It stores an int64. You can construct a LuaToken in two different ways:
//
// LuaToken(0x10FAA9)
// LuaToken(0x3D5E30BCAF2EF663)
// LuaToken("null")
//
// Those are equivalent. The second form is just as fast as the first,
@@ -408,34 +408,36 @@ enum LuaTableType {
struct LuaToken {
private:
// Convert a base36 number into a token. If the base36 number is
// not valid, or if it exceeds 64 bits, then return zero.
// Encode a token string as a fixed-width base37 number.
// Each character is mapped to a digit 1-36 (0 means "no character"),
// and the result is: CH0*37^11 + CH1*37^10 + ... + CH11*37^0.
// This fixed-width encoding ensures that numeric ordering matches
// lexicographic ordering of the original strings.
// Returns zero if the string is empty, too long, or contains
// invalid characters.
//
static constexpr uint64_t parse(std::string_view str) {
if (str.size() > 12) return 0;
if (str.empty()) return 0;
uint64_t result = 0;
uint64_t maxint = uint64_t(-1);
// Leading zeros are not allowed.
if ((!str.empty()) && (str[0]=='0')) return 0;
for (int i = 0; i < int(str.size()); i++) {
char c = str[i];
uint64_t digit = 0;
if ((c >= '0') && (c <= '9')) {
digit = uint64_t(c - '0');
digit = uint64_t(c - '0') + 1;
} else if ((c >= 'a') && (c <= 'z')) {
digit = uint64_t(c - 'a' + 10);
digit = uint64_t(c - 'a') + 11;
} else if ((c >= 'A') && (c <= 'Z')) {
digit = uint64_t(c - 'A' + 10);
digit = uint64_t(c - 'A') + 11;
} else {
return maxint;
return 0;
}
// Multiply existing number by 36, then add the digit.
// We have two checks to prevent integer overflow.
if (result > (maxint / 36)) return 0;
result *= 36;
if (digit > (maxint - result)) return 0;
result += digit;
result = result * 37 + digit;
}
// Pad remaining positions with zeros (no character).
for (int i = int(str.size()); i < 12; i++) {
result = result * 37;
}
return result;
}
@@ -450,7 +452,7 @@ public:
// Construct a token from a string.
//
// If the string is not a valid base36 number, then this
// If the string is not a valid token, then this
// initializes the token to the empty token (zero)
//
LuaToken(std::string_view s) : value(parse(s)) {}

View File

@@ -794,15 +794,15 @@ LuaDefine(genlt, "obj1,obj2",
"|* Numbers are compared in the obvious numeric manner."
"|* Strings are compared alphabetically."
"|* Booleans are compared with false being less than true."
"|* Tokens are compared alphabetically."
"|* Tables are all considered equal to other tables."
"|* Functions are all considered equal to other functions."
"|* Coroutines are all considered equal to other coroutines."
"|"
"|* Numbers are less than strings."
"|* Strings are less than booleans."
"|* Booleans are less than functions."
"|* Functions are less than coroutines."
"|* Coroutines are less than tables."
"|Values of different types are printed in this order:"
"|"
"| Numbers, Strings, Booleans, Tokens,"
"| Functions, Threads, Tables, and NIL"
"|") {
LuaArg o1,o2;
LuaRet lt;

View File

@@ -161,6 +161,7 @@ static void transmit_value_debug_string(StreamBuffer *sb, eng::ostringstream &os
case LUA_TLIGHTUSERDATA: {
LuaToken token(sb->read_uint64());
oss << "[" << token.str() << "]";
return;
}
case LUA_TT_GENERAL: {
oss << "table " << sb->read_int32();

View File

@@ -329,13 +329,19 @@ LUA_API void lua_arith (lua_State *L, int op) {
lua_unlock(L);
}
/* Numbers are first, to help out pretty-printers that want
* to print the keys of a table in order. */
static int sortorder(int t) {
switch (t) {
case LUA_TNIL: return 0;
case LUA_TNUMBER: return 1;
case LUA_TSTRING: return 2;
case LUA_TBOOLEAN: return 3;
case LUA_TTABLE: return 1000000;
case LUA_TNUMBER: return 0;
case LUA_TSTRING: return 1;
case LUA_TBOOLEAN: return 2;
case LUA_TLIGHTUSERDATA: return 3;
case LUA_TFUNCTION: return 4;
case LUA_TTHREAD: return 5;
case LUA_TUSERDATA: return 6;
case LUA_TTABLE: return 7;
case LUA_TNIL: return 1000000;
default: return t+1000;
}
}
@@ -356,6 +362,8 @@ LUA_API int lua_genlt (lua_State *L, int index1, int index2) {
i = luaV_lessthan(L, o1, o2);
} else if (t1 == LUA_TBOOLEAN) {
i = bvalue(o1) < bvalue(o2);
} else if (t1 == LUA_TLIGHTUSERDATA) {
i = pvalue(o1) < pvalue(o2);
} else {
return 0;
}