diff --git a/luprex/core/Makefile b/luprex/core/Makefile index ba3cc7db..035e3cd9 100644 --- a/luprex/core/Makefile +++ b/luprex/core/Makefile @@ -78,6 +78,7 @@ CORE_OBJ_FILES=\ obj/globaldb.o\ obj/sched.o\ obj/http.o\ + obj/json.o\ obj/table.o\ obj/gui.o\ obj/luasnap.o\ diff --git a/luprex/core/cpp/idalloc.cpp b/luprex/core/cpp/idalloc.cpp index c4a908d4..b77c7baa 100644 --- a/luprex/core/cpp/idalloc.cpp +++ b/luprex/core/cpp/idalloc.cpp @@ -87,12 +87,12 @@ void IdGlobalPool::deserialize(StreamBuffer *sb) { eng::string IdGlobalPool::debug_string() const { eng::ostringstream oss; - oss << "next_batch:" << util::hex64() << next_batch_ << " "; - oss << "next_id:" << util::hex64() << next_id_ << " "; - oss << "next_seqno: " << util::hex64() << next_seqno_ << " "; + oss << "next_batch:" << util::hex64.val(next_batch_) << " "; + oss << "next_id:" << util::hex64.val(next_id_) << " "; + oss << "next_seqno: " << util::hex64.val(next_seqno_) << " "; oss << "salvaged:"; for (const int64_t val : salvaged_) { - oss << " " << util::hex64() << val; + oss << " " << util::hex64.val(val); } return oss.str(); } @@ -253,9 +253,9 @@ eng::string IdPlayerPool::debug_string() const { oss << "cap:" << fifo_capacity_ << " ids:"; for (int i = 0; i < int(ranges_.size()); i++) { if (i > 0) oss << ","; - oss << util::hex64() << ranges_[i]; + oss << util::hex64.val(ranges_[i]); } - oss << " seqno:" << util::hex64() << next_seqno_; + oss << " seqno:" << util::hex64.val(next_seqno_); return oss.str(); } diff --git a/luprex/core/cpp/json.cpp b/luprex/core/cpp/json.cpp new file mode 100644 index 00000000..b3c85037 --- /dev/null +++ b/luprex/core/cpp/json.cpp @@ -0,0 +1,724 @@ +#include "json.hpp" +#include "luastack.hpp" +#include "util.hpp" +#include +#include +#include +#include +#include +#include + + +#define NOINDENT_LEVEL 1000 + +LuaTokenConstant(json_null, "null", ""); +LuaTokenConstant(json_object, "object", ""); + +static void indent(eng::ostringstream &oss, int level) { + if (level < NOINDENT_LEVEL) { + oss << std::endl; + for (int i = 0; i < level; i++) { + oss << " "; + } + } +} + +static bool length_exceeded(eng::ostringstream &oss, int maxlen) { + return oss.tellp() > maxlen; +} + +template +inline void store_error(eng::ostringstream &oss, const ARGS & ... args) { + oss.str(""); + util::send_to_stream(oss, args...); +} + +static void store_length_error(eng::ostringstream &oss, int maxlen) { + store_error(oss, "maximum json length exceeded: ", maxlen); +} + +static bool use_array_representation(lua_State *L) { + int top = lua_gettop(L); + int nfound = 0; + while (true) { + lua_rawgeti(L, top, nfound + 1); + bool null = lua_isnil(L, -1); + lua_settop(L, top); + if (null) break; + nfound += 1; + } + return (nfound == lua_nkeys(L, top)); +} + +static bool encode_key(lua_State *L, eng::ostringstream &oss); +static bool encode_value(lua_State *L, eng::ostringstream &oss, int level, int maxlen); + + +// The goal here is to emit a double in such a way that +// when we read it back in, we get the *exact* same number. +// +// In the worst case, you can accomplish this by using 17 +// digits of precision - that's enough to uniquely identify +// all double values (see the following URL). However, 17 +// digits tends to produce unnecessary repeating decimals. +// So we try 16 digits first, which tends to remove those +// repeating decimals, but sometimes produces losses. +// If that doesn't work, we fall back to 17 digits. +// +// https://randomascii.wordpress.com/2012/03/08/float-precisionfrom-zero-to-100-digits-2/ +// +static void encode_double_lossless(double value, eng::ostringstream &oss) { + char buffer[80]; + sprintf(buffer, "%.16g", value); + if (strtod(buffer, nullptr) != value) { + sprintf(buffer, "%.17g", value); + assert(strtod(buffer, nullptr) == value); + } + oss << buffer; +} + +static bool encode_nil(lua_State *L, eng::ostringstream &oss) { + oss << "null"; + return true; +} + +static bool encode_token(lua_State *L, eng::ostringstream &oss) { + LuaToken token(lua_touserdata(L, -1)); + if (token == LuaToken("jsonnull")) { + oss << "null"; + return true; + } else { + store_error(oss, "cannot encode token: [", token.str(), "]"); + return false; + } +} + +static bool encode_number(lua_State *L, eng::ostringstream &oss) { + lua_Number value = lua_tonumber(L, -1); + if (std::isnan(value) || std::isinf(value)) { + store_error(oss, "cannot encode infinity or NAN"); + return false; + } + int64_t ivalue = int64_t(value); + if (double(ivalue) == value) { + oss << ivalue; + } else { + encode_double_lossless(value, oss); + } + return true; +} + +static bool encode_number_key(lua_State *L, eng::ostringstream &oss) { + lua_Number value = lua_tonumber(L, -1); + int64_t ivalue = int64_t(value); + if (double(ivalue) != value) { + store_error(oss, "cannot encode floating point numbers in table keys"); + return false; + } + if (ivalue >= 0) { + oss << "\"\\uE000+" << ivalue << '"'; + } else { + oss << "\"\\uE000-" << -ivalue << '"'; + } + return true; +} + +static bool encode_boolean(lua_State *L, eng::ostringstream &oss) { + int flag = lua_toboolean(L, -1); + oss << (flag ? "true" : "false"); + return true; +} + +static bool encode_string(lua_State *L, eng::ostringstream &oss) { + size_t len; + const char *s = lua_tolstring(L, -1, &len); + std::string_view str(s, len); + oss << '"'; + if (sv::valid_utf8(str) && !sv::has_prefix(str, "")) { + // Output the string in the straightforward way, + // using traditional json escaping. + for (char c : str) { + switch (c) { + case '\\': oss << "\\\\"; break; + case '"' : oss << "\\\""; break; + case '\b': oss << "\\b"; break; + case '\f': oss << "\\f"; break; + case '\r': oss << "\\r"; break; + case '\n': oss << "\\n"; break; + case '\t': oss << "\\t"; break; + default: { + if (c < 32) { + oss << "\\u" << util::hex16.val(c); + } else { + oss << c; + } + } + } + } + } else { + // Output as a base64-encoded string. + oss << "\\uE000="; + util::base64_encode(str, &oss); + } + oss << '"'; + return true; +} + +static bool encode_array(lua_State *L, eng::ostringstream &oss, int level, int maxlen) { + lua_checkstack(L, 20); + int top = lua_gettop(L); + oss << "["; + level ++; + int i = 1; + while (true) { + lua_rawgeti(L, top, i); + if (lua_isnil(L, -1)) break; + if (i > 1) oss << ","; + indent(oss, level); + bool ok = encode_value(L, oss, level, maxlen); + lua_settop(L, top); + if (!ok) return false; + if (length_exceeded(oss, maxlen)) { + store_length_error(oss, maxlen); + return false; + } + i += 1; + } + lua_settop(L, top); + level --; + indent(oss, level); + oss << "]"; + return true; +} + +static bool encode_object(lua_State *L, eng::ostringstream &oss, int level, int maxlen) { + lua_checkstack(L, 20); + int top = lua_gettop(L); + oss << "{"; + level ++; + lua_pushnil(L); + int i = 1; + while (lua_next(L, top) != 0) { + // Check for [json.object]=true, if so skip. + if (lua_islightuserdata(L, -2) && + lua_isboolean(L, -1) && + (LuaToken(lua_touserdata(L, -2)) == LuaToken("object")) && + (lua_toboolean(L, -1) == 1)) { + lua_pop(L, 1); + continue; + } + + lua_pushvalue(L, -2); + // Stack now has key, value, key + assert(lua_gettop(L) == top + 3); + if (i > 1) oss << ","; + indent(oss, level); + bool ok = encode_key(L, oss); + if (!ok) { + lua_settop(L, top); + return false; + } + if (length_exceeded(oss, maxlen)) { + store_length_error(oss, maxlen); + lua_settop(L, top); + return false; + } + lua_pop(L, 1); + // Stack now has key, value + assert(lua_gettop(L) == top + 2); + oss << ((level < NOINDENT_LEVEL) ? " : " : ":"); + ok = encode_value(L, oss, level, maxlen); + assert(lua_gettop(L) == top + 2); + if (!ok) { + lua_settop(L, top); + return false; + } + if (length_exceeded(oss, maxlen)) { + store_length_error(oss, maxlen); + lua_settop(L, top); + return false; + } + lua_pop(L, 1); + // Stack now just has key. + assert(lua_gettop(L) == top + 1); + i += 1; + } + // Stack should be back to where we started. + assert(lua_gettop(L) == top); + level --; + indent(oss, level); + oss << "}"; + return true; +} + +static bool encode_key(lua_State *L, eng::ostringstream &oss) { + int type = lua_type(L, -1); + switch (type) { + case LUA_TSTRING: return encode_string(L, oss); + case LUA_TNUMBER: return encode_number_key(L, oss); + case LUA_TBOOLEAN: + case LUA_TTABLE: { + store_error(oss, "cannot encode '", lua_typename(L, type), "' in table keys"); + return false; + } + default: { + store_error(oss, "cannot encode '", lua_typename(L, type), "'"); + return false; + } + } +} + +static bool encode_value(lua_State *L, eng::ostringstream &oss, int level, int maxlen) { + int type = lua_type(L, -1); + switch (type) { + case LUA_TNIL: return encode_nil(L, oss); + case LUA_TNUMBER: return encode_number(L, oss); + case LUA_TBOOLEAN: return encode_boolean(L, oss); + case LUA_TSTRING: return encode_string(L, oss); + case LUA_TLIGHTUSERDATA: return encode_token(L, oss); + case LUA_TTABLE: { + if (use_array_representation(L)) { + return encode_array(L, oss, level, maxlen); + } else { + return encode_object(L, oss, level, maxlen); + } + } + default: { + store_error(oss, "cannot encode '", lua_typename(L, type), "'"); + return false; + } + } +} + +static bool decode_value(lua_State *L, std::string_view &v); + +static bool decode_id(lua_State *L, std::string_view &v) { + std::string_view id = sv::read_ascii_identifier(v); + if (id == "null") lua_pushlightuserdata(L, LuaToken("null").voidvalue()); + else if (id == "true") lua_pushboolean(L, 1); + else if (id == "false") lua_pushboolean(L, 0); + else return false; + return true; +} + +static bool decode_number(lua_State *L, std::string_view &v) { + std::string_view n = sv::read_number(v, true, true, true, true); + if (n.empty()) return false; + + // If it's an integer, make sure it fits in a lua double + // losslessly. If it's a double, some loss in precision + // is OK. + if (sv::valid_number(n, true, true, false, false)) { + int64_t i = sv::to_int64(n); + if (!LuaStack::int64_storable(i)) return false; + lua_pushnumber(L, double(i)); + return true; + } else { + double d = sv::to_double(n); + if (std::isnan(d) || std::isinf(d)) return false; + lua_pushnumber(L, d); + return true; + } +} + +static bool decode_base64_string(lua_State *L, std::string_view &v) { + // We've already read the starting quote and the E000 + // escape sequence at this point. + + // Skip the equal sign. + if (!sv::read_prefix(v, "=")) return false; + + // Find the end of the quoted string. + const char *p = v.data(); + const char *l = p + v.size(); + while (true) { + if (p == l) return false; + if (*p < 32) return false; + if (*p == '"') break; + p++; + } + std::string_view b64 = v.substr(0, p - v.data()); + v.remove_prefix(b64.size() + 1); + eng::ostringstream oss; + if (!util::base64_decode(b64, &oss)) return false; + eng::string str = oss.str(); + lua_pushlstring(L, str.c_str(), str.size()); + return true; +} + +static bool decode_int_string(lua_State *L, std::string_view &v) { + // We've already read the starting quote and the E000 + // escape sequence at this point. + + // Parse the number and the closing quote. + std::string_view n = sv::read_number(v, true, true, false, false); + if (n.empty()) return false; + if (!sv::read_prefix(v, "\"")) { + return false; + } + + // Make sure the number fits in a lua double, + // and push it on the stack. + int64_t i = sv::to_int64(n); + if (!LuaStack::int64_storable(i)) { + return false; + } + lua_pushnumber(L, double(i)); + return true; +} + +static bool decode_standard_string(lua_State *L, std::string_view &v) { + // We've already read the starting quote at this point. + eng::ostringstream oss; + while (true) { + // Get the next codepoint. + int32_t c = sv::read_codepoint_utf8(v); + + // If it's a control character or invalid codepoint, reject. + if (c < 32) return false; + + // If it is an unescaped quote, that's end of string. + if (c == '"') break; + + // If it's a backslash, then deal with the escape sequence. + if (c == '\\') { + char next = sv::read_ascii_char(v); + switch (next) { + case '"': oss << '"'; break; + case '\\': oss << '\\'; break; + case '/': oss << '/'; break; + case 'r': oss << '\r'; break; + case 'n': oss <<'\n'; break; + case 'b': oss << '\b'; break; + case 'f': oss << '\f'; break; + case 't': oss << '\t'; break; + case 'u': { + std::string_view hexdigits = sv::read_nbytes(v, 4); + if (hexdigits.size() != 4) return false; + uint64_t codepoint = sv::to_hex64(hexdigits, 0x10000); + if (codepoint >= 0x10000) return false; + if (!util::write_codepoint_utf8(codepoint, &oss)) return false; + break; + } + default: return false; + } + continue; + } + + // Any other codepoint should be echoed into stream. + util::write_codepoint_utf8(c, &oss); + } + eng::string result = oss.str(); + lua_pushlstring(L, result.c_str(), result.size()); + return true; +} + +static bool decode_string(lua_State *L, std::string_view &v) { + if (!sv::read_prefix(v, "\"")) return false; + + // Check for codepoint E000, the escape sequence. + if (sv::read_prefix(v, "") || + sv::read_prefix(v, "\\uE000") || + sv::read_prefix(v, "\\ue000")) { + char c = sv::zfront(v); + if (c == '=') return decode_base64_string(L, v); + else if ((c=='-') || (c=='+')) return decode_int_string(L, v); + else return false; + } else { + return decode_standard_string(L, v); + } +} + +static bool decode_array(lua_State *L, std::string_view &v) { + if (!sv::read_prefix(v, "[")) return false; + lua_newtable(L); + int tabpos = lua_gettop(L); + int next = 1; + while (true) { + v = sv::ltrim(v); + if (sv::zfront(v) == ']') { + v.remove_prefix(1); + return true; + } + if (!decode_value(L, v)) { + return false; + } + v = sv::ltrim(v); + if (sv::zfront(v) == ',') { + v.remove_prefix(1); + } + lua_rawseti(L, tabpos, next++); + } +} + +static bool decode_object(lua_State *L, std::string_view &v) { + if (!sv::read_prefix(v, "{")) return false; + lua_newtable(L); + int tabpos = lua_gettop(L); + while (true) { + v = sv::ltrim(v); + if (sv::zfront(v) == '}') { + v.remove_prefix(1); + return true; + } + if (!decode_string(L, v)) { + return false; + } + v = sv::ltrim(v); + if (!sv::read_prefix(v, ":")) { + return false; + } + if (!decode_value(L, v)) { + return false; + } + v = sv::ltrim(v); + if (sv::zfront(v) == ',') { + v.remove_prefix(1); + } + lua_rawset(L, tabpos); + } +} + +// Decode a single value. +// +// On success, pushes the value on the stack and returns true. +// On failure, pushes NIL on the stack and returns false. +// +static bool decode_value(lua_State *L, std::string_view &v) { + lua_checkstack(L, 20); + int top = lua_gettop(L); + + // Skip blanks. + v = sv::ltrim(v); + + // Try to read something. + char c = sv::zfront(v); + bool result; + if (c == '"') result = decode_string(L, v); + else if (c == '[') result = decode_array(L, v); + else if (c == '{') result = decode_object(L, v); + else if (sv::ascii_isalpha(c)) result = decode_id(L, v); + else result = decode_number(L, v); + + // On failure, the decode routines may leave junk + // on the stack, in which case it's our job to clean up. + if (result == false) { + lua_settop(L, top); + lua_pushnil(L); + } + + // Now there should be exactly one new value on the stack. + assert(lua_gettop(L) == top + 1); + return result; +} + +namespace json { + +eng::string encode(LuaStack &LS, LuaSlot in, eng::string &out, bool indent, int maxlen) { + eng::ostringstream oss; + + // Call the recursive encoder. Clean up any crap on the lua stack afterward. + int top = lua_gettop(LS.state()); + lua_pushvalue(LS.state(), in.index()); + bool ok = encode_value(LS.state(), oss, indent ? 0 : NOINDENT_LEVEL, maxlen); + lua_settop(LS.state(), top); + + // One last check for overruns. + if (ok && length_exceeded(oss, maxlen)) { + store_length_error(oss, maxlen); + ok = false; + } + + // Produce the return value. + if (ok) { + out = oss.str(); + return ""; + } else { + out = ""; + return oss.str(); + } +} + +bool decode(LuaStack &LS, LuaSlot out, std::string_view v) { + lua_State *L = LS.state(); + + // Try to read a single value from the view. + bool ok = decode_value(L, v); + lua_replace(L, out.index()); + if (!ok) return false; + + // There should be nothing left of the input text. + if (v.size() > 0) { + lua_pushnil(L); + lua_replace(L, out.index()); + return false; + } + + // Special case: if the top-level result is jsonnull, + // then change it to nil. + if (LS.istoken(out)) { + LS.set(out, LuaNil); + } + + return true; +} + +} // namespace util + + +LuaDefine(json_encode, "data, indent, maxlen", + "|Encode a lua data structure returning a json string." + "|" + "|Data is the value being encoded. Indent is a flag," + "|if it's true, then the json is indented nicely," + "|otherwise, it is packed tightly. Maxlen is the maximum" + "|length in bytes of the encoded json string." + "|" + "|Usually, Lua data translates straightforwardly to json." + "|However, there are a number of special cases to be" + "|aware of:" + "|" + "|- Closures and threads cannot be encoded. These will" + "| cause the encoder to abort." + "|" + "|- The numbers infinity and NAN cannot be encoded." + "| Both of these will cause the encoder to abort." + "|" + "|- You must specify a size-limit to the encoded" + "| string. Exceeding the size limit causes the" + "| encoder to abort." + "|" + "|- Recursive data structures will cause the encoder to" + "| loop infinitely until the size-limit is exceeded," + "| causing the encoder to abort." + "|" + "|- There is no way to represent math.huge or math.nan in" + "| json. Encoding math.nan will cause the encoder to abort," + "| as expected. However, encoding math.huge will emit null," + "| which is probably not what you would expect." + "|" + "|- Lua tables cannot contain 'nil', but json objects and" + "| arrays can contain null. If you want the encoder to" + "| emit a json object or array containing null, you must" + "| use token json.null to represent null." + "|" + "|- Json objects, like lua tables, are key-value stores." + "| However, json objects can only have string keys. Our" + "| encoder uses a workaround to transparently" + "| allow mixing string and integer keys in json tables." + "| See 'encoding difficult data' below." + "|" + "|- Json strings are required to be valid utf-8. Our encoder" + "| uses a workaround to transparently allow the use of" + "| arbitrary 8-bit-clean strings. See 'encoding difficult" + "| data' below." + "|" + "|- Lua tables containing contiguous integer keys from 1-n are" + "| autodetected to be json arrays. Empty tables are also" + "| emitted as json arrays. All other tables are emitted" + "| as json objects." + "|" + "|- You can force a table to be emitted as a json object" + "| by putting the key-value pair table[json.object]=true" + "| into the table. This special key is not emitted, but" + "| it triggers json object mode. This is the only way" + "| to emit an empty json object (a truly empty table is" + "| emitted as a json array.)" + "|" + "|Encoding Difficult Data:" + "|" + "|Normally, json doesn't allow integer table keys, and it" + "|doesn't allow strings that aren't valid utf-8. Our" + "|json encoder and decoder, on the other hand, can" + "|encode and decode integer table keys and 8-bit-clean" + "|strings transparently. This is accomplished without" + "|violating the json specification, by encoding such" + "|values as utf-8 strings:" + "|" + "| '123' (encoded integer 123)" + "| '=aGVsbG8=' (binary string encoded as base64)" + "|" + "|Those encodings start with utf-8 codepoint E000." + "|This codepoint probably shows up in your text editor" + "|as a little rectangle. When the decoder sees codepoint" + "|E000 at the beginning of a string, it automatically" + "|decodes the string back into its original form." + "|" + "|The one price for this behavior is that the encoder" + "|cannot literally emit strings that start" + "|with codepoint E000. If the encoder detects such a" + "|string, it will emit it as a base64-encoded string." + "|This should be uncommon, since codepoint E000 is" + "|reserved." + "|" + "|Note that integers are only encoded when they are" + "|used as table keys. Otherwise, numbers are emitted" + "|straightforwardly." + "|") { + LuaArg data, indent, maxlen; + LuaRet encoded; + LuaStack LS(L, data, indent, maxlen, encoded); + eng::string out; + eng::string error = json::encode(LS, data, out, LS.ckboolean(indent), LS.ckint(maxlen)); + if (!error.empty()) { + luaL_error(L, "%s", error.c_str()); + LS.set(encoded, LuaNil); + return LS.result(); + } else { + LS.set(encoded, out); + return LS.result(); + } +} + +LuaDefine(json_decode, "data", + "|Decode a json expression into a lua data structure." + "|" + "|Data that was generated by our own encoder is almost" + "|8-bit clean. That includes difficult cases, like" + "|binary strings, floating point numbers, and tables" + "|with mixed string and integer keys. The exception" + "|are the kinds of data that can't be encoded at all:" + "|See doc(json.encode) for details about what" + "|can and cannot be encoded." + "|" + "|Some json may contain 'null' inside objects and" + "|arrays. Lua tables can't store nil, so instead, we" + "|store the token json.null. If that's not what you" + "|want, you can use json.stripnulls to strip out" + "|the json.null values from a data structure and" + "|replace them with nil." + "|" + "|") { + LuaArg encoded; + LuaRet data; + LuaStack LS(L, encoded, data); + std::string_view v = LS.ckstringview(encoded); + bool ok = json::decode(LS, data, v); + if (!ok) { + luaL_error(L, "invalid json string."); + } + return LS.result(); +} + +// LuaDefine(base64_encode, "data", "") { +// LuaArg str; +// LuaRet ret; +// LuaStack LS(L, str, ret); +// eng::string cstr = LS.ckstring(str); +// eng::ostringstream oss; +// util::base64_encode(cstr, &oss); +// LS.set(ret, oss.str()); +// return LS.result(); +// } + +// LuaDefine(base64_decode, "data", "") { +// LuaArg str; +// LuaRet ret; +// LuaStack LS(L, str, ret); +// eng::string cstr = LS.ckstring(str); +// eng::ostringstream oss; +// util::base64_decode(cstr, &oss); +// LS.set(ret, oss.str()); +// return LS.result(); +// } + diff --git a/luprex/core/cpp/json.hpp b/luprex/core/cpp/json.hpp new file mode 100644 index 00000000..a1dcd018 --- /dev/null +++ b/luprex/core/cpp/json.hpp @@ -0,0 +1,33 @@ +// Encode lua data structure into json, and decode them again. +// +// See the doc(http.jsonencode) to read about limitations of the encoder. +// +#ifndef JSON_HPP +#define JSON_HPP + +#include "luastack.hpp" +#include "wrap-string.hpp" +#include + +namespace json { + // Encode json. + // + // See doc(http.jsonencode) for a lot more information. + // + // Returns an error message. If the error message is an + // empty string, then the encoding was successful. + // + eng::string encode(LuaStack &LS, LuaSlot in, eng::string &out, bool indent, int maxlen); + + // Decode json. + // + // See doc(http.jsondecode) for a lot more information. + // + // The only error condition is syntactically invalid json. + // In that case, we return false. + // + bool decode(LuaStack &LS, LuaSlot out, std::string_view in); +} + +#endif // JSON_HPP + diff --git a/luprex/core/cpp/luastack.cpp b/luprex/core/cpp/luastack.cpp index 75904bbb..c938fa4b 100644 --- a/luprex/core/cpp/luastack.cpp +++ b/luprex/core/cpp/luastack.cpp @@ -2,6 +2,7 @@ #include #include #include +#include LuaSpecial LuaRegistry(LUA_REGISTRYINDEX); LuaNilMarker LuaNil; @@ -17,6 +18,15 @@ LuaFunctionReg::LuaFunctionReg(const char *n, const char *a, const char *d, bool All = this; } +LuaConstantReg::LuaConstantReg(const char *n, const char *d, LuaToken tokenvalue, lua_Number numbervalue) { + name_ = n; + docs_ = d; + tokenvalue_ = tokenvalue; + numbervalue_ = numbervalue; + next_ = All; + All = this; +} + const LuaFunctionReg *LuaFunctionReg::lookup(lua_CFunction fn) { for (const LuaFunctionReg *r = All; r != 0; r = r->next_) { if (r->func_ == fn) { @@ -27,6 +37,20 @@ const LuaFunctionReg *LuaFunctionReg::lookup(lua_CFunction fn) { } LuaFunctionReg *LuaFunctionReg::All; +LuaConstantReg *LuaConstantReg::All; + + +eng::string LuaToken::str() const { + uint64_t token = (uint64_t)value; + char buffer[9]; + for (int i = 0; i < 8; i++) { + unsigned char c = token; + buffer[7-i] = c; + token >>= 8; + } + buffer[8] = 0; + return eng::string(buffer); +} static int panicf(lua_State *L) { const char *p = lua_tostring(L, -1); @@ -107,11 +131,23 @@ eng::string LuaStack::ckstring(LuaSlot s) const { return eng::string(str, len); } +std::string_view LuaStack::ckstringview(LuaSlot s) const { + luaL_checktype(L_, s, LUA_TSTRING); + size_t len; + const char *str = lua_tolstring(L_, s, &len); + return std::string_view(str, len); +} + lua_State *LuaStack::ckthread(LuaSlot s) const { luaL_checktype(L_, s, LUA_TTHREAD); return lua_tothread(L_, s); } +LuaToken LuaStack::cktoken(LuaSlot s) const { + luaL_checktype(L_, s, LUA_TLIGHTUSERDATA); + return LuaToken(lua_touserdata(L_, s)); +} + void LuaStack::count_slots_finalize(int narg, int nvar, int nret) { narg_ = narg; nret_ = nret; diff --git a/luprex/core/cpp/luastack.hpp b/luprex/core/cpp/luastack.hpp index 2d427984..4c5d852c 100644 --- a/luprex/core/cpp/luastack.hpp +++ b/luprex/core/cpp/luastack.hpp @@ -226,6 +226,40 @@ int LuaTypeTagValue(lua_State *L) { return 0; } #define LUA_TT_GLOBALDB 22 #define LUA_TT_CLASS 23 +// We use lightuserdata to store 'tokens': short +// strings of 8 characters or less. These tokens +// are useful as unique markers. The 8 characters +// are packed into a uint64. + +struct LuaToken { +private: + static constexpr uint64_t literal_to_token(const char *str) { + uint64_t result = 0; + for (int i = 0; i < 8; i++) { + unsigned char c = *str; + result = (result << 8) + c; + if (*str) str++; + } + return result; + } +public: + uint64_t value; + + template + LuaToken(T arg) = delete; + + constexpr LuaToken(const char *str) : value(literal_to_token(str)) {} + LuaToken(uint64_t v) : value(v) {} + LuaToken(void *v) : value((uint64_t)v) {} + LuaToken() : value(0) {} + + bool empty() const { return value == 0; } + bool operator ==(const LuaToken &other) const { return value == other.value; } + void *voidvalue() const { return (void*)value; } + + eng::string str() const; +}; + class LuaStack : public eng::nevernew { private: int narg_; @@ -302,6 +336,7 @@ private: void push_any_value(lua_Integer s) const { lua_pushinteger(L_, s); } void push_any_value(lua_CFunction s) const { lua_pushcfunction(L_, s); } void push_any_value(bool b) const { lua_pushboolean(L_, b ? 1:0); } + void push_any_value(LuaToken token) const { lua_pushlightuserdata(L_, (void*)(token.value)); } // Push multiple values on the stack, in order, by type. template @@ -354,6 +389,7 @@ public: bool isboolean(LuaSlot s) const { return lua_type(L_, s) == LUA_TBOOLEAN; } bool isnil(LuaSlot s) const { return lua_type(L_, s) == LUA_TNIL; } bool iscfunction(LuaSlot s) const { return lua_iscfunction(L_, s) != 0; } + bool istoken(LuaSlot s) const { return lua_islightuserdata(L_, s) != 0; } void checktable(LuaSlot index) const { checktype(index, LUA_TTABLE); } void checkstring(LuaSlot index) const { checktype(index, LUA_TSTRING); } @@ -362,13 +398,16 @@ public: void checkfunction(LuaSlot index) const { checktype(index, LUA_TFUNCTION); } void checkboolean(LuaSlot index) const { checktype(index, LUA_TBOOLEAN); } void checknil(LuaSlot index) const { checktype(index, LUA_TNIL); } + void checktoken(LuaSlot index) const { checktype(index, LUA_TLIGHTUSERDATA); } bool ckboolean(LuaSlot s) const; lua_Integer ckinteger(LuaSlot s) const; int ckint(LuaSlot s) const; lua_Number cknumber(LuaSlot s) const; eng::string ckstring(LuaSlot s) const; + std::string_view ckstringview(LuaSlot s) const; lua_State *ckthread(LuaSlot s) const; + LuaToken cktoken(LuaSlot s) const; void clearmetatable(LuaSlot tab) const; void setmetatable(LuaSlot tab, LuaSlot mt) const; @@ -472,8 +511,29 @@ public: // Lua flagbits manipulation: visited bit. bool getvisited(LuaSlot tab) const; void setvisited(LuaSlot tab, bool visited) const; + + // Return true if the int64 value can be stored as a lua number. + static bool int64_storable(int64_t v) { return (v <= MAXINT) && (v >= -MAXINT); } }; +class LuaConstantReg : public eng::nevernew { +private: + const char *name_; + const char *docs_; + LuaToken tokenvalue_; + lua_Number numbervalue_; + LuaConstantReg *next_; + +public: + static LuaConstantReg *All; + LuaConstantReg(const char *name, const char *docs, LuaToken tokenvalue, lua_Number numbervalue); + + const char *get_name() const { return name_; } + const char *get_docs() const { return docs_; } + LuaToken get_tokenvalue() const { return tokenvalue_; } + lua_Number get_numbervalue() const { return numbervalue_; } + LuaConstantReg *next() const { return next_; } +}; class LuaFunctionReg : public eng::nevernew { private: @@ -498,6 +558,11 @@ public: void set_func(lua_CFunction fn) { func_ = fn; } }; +#define LuaTokenConstant(name, tvalue, docs) \ + LuaConstantReg reg_##name(#name, docs, LuaToken(tvalue), 0); + +#define LuaNumberConstant(name, nvalue, docs) \ + LuaConstantReg reg_##name(#name, docs, LuaToken(), nvalue); #define LuaDefine(name, args, docs) \ int lfn_##name(lua_State *L); \ @@ -518,4 +583,5 @@ public: #define LuaStringify(x) #x #define LuaAssert(L, x) if (!(x)) { luaL_error((L), "Assert failed: %s (file %s line %d)", LuaStringify(x), __FILE__, __LINE__); } #define LuaAssertStrEq(L, x, y) { eng::string _s1_(x); eng::string _s2_(y); if (_s1_ != _s2_) luaL_error((L), "Assert failed: value=%s (file %s line %d)", _s1_.c_str(), __FILE__, __LINE__); } + #endif // LUASTACK_HPP diff --git a/luprex/core/cpp/pprint.cpp b/luprex/core/cpp/pprint.cpp index eada8c22..61a2c918 100644 --- a/luprex/core/cpp/pprint.cpp +++ b/luprex/core/cpp/pprint.cpp @@ -5,6 +5,7 @@ #include "table.hpp" #include +#include void atomic_print(LuaStack &LS, LuaSlot val, bool quote, std::ostream *os) { @@ -23,11 +24,15 @@ void atomic_print(LuaStack &LS, LuaSlot val, bool quote, std::ostream *os) { return; case LUA_TNUMBER: { double value = LS.cknumber(val); - int64_t ivalue = int64_t(value); - if (double(ivalue) == value) { - (*os) << ivalue; + if (std::isnan(value)) { + (*os) << "nan"; } else { - (*os) << value; + int64_t ivalue = int64_t(value); + if (double(ivalue) == value) { + (*os) << ivalue; + } else { + (*os) << value; + } } return; } @@ -38,6 +43,11 @@ void atomic_print(LuaStack &LS, LuaSlot val, bool quote, std::ostream *os) { (*os) << ""; return; } + case LUA_TLIGHTUSERDATA: { + LuaToken token = LS.cktoken(val); + (*os) << "[" << token.str() << "]"; + return; + } default: (*os) << "<" << lua_typename(LS.state(), tt) << ">"; return; diff --git a/luprex/core/cpp/source.cpp b/luprex/core/cpp/source.cpp index 9ef21c4a..ccb9142e 100644 --- a/luprex/core/cpp/source.cpp +++ b/luprex/core/cpp/source.cpp @@ -56,8 +56,7 @@ LuaDefine(classname, "classtable", "get the class name from a class table") { return LS.result(); } -static void get_reg_name(const LuaFunctionReg *reg, std::string_view &classname, std::string_view &funcname) { - std::string_view name(reg->get_name()); +static void get_reg_name(std::string_view name, std::string_view &classname, std::string_view &funcname) { size_t upos = name.find('_'); if (upos == std::string_view::npos) { funcname = name; @@ -280,7 +279,7 @@ static void source_load_cfunctions(lua_State *L) { if ((func != nullptr) && (!r->get_sandbox())) { std::string_view classname; std::string_view funcname; - get_reg_name(r, classname, funcname); + get_reg_name(r->get_name(), classname, funcname); if (classname.empty()) { LS.getglobaltable(classobj); LS.rawset(classobj, funcname, func); @@ -293,6 +292,31 @@ static void source_load_cfunctions(lua_State *L) { LS.result(); } +// Load all the 'LuaConstant' constants into the lua state. +// +static void source_load_cconstants(lua_State *L) { + LuaVar classobj, value; + LuaStack LS(L, classobj, value); + for (auto r = LuaConstantReg::All; r != nullptr; r=r->next()) { + if (r->get_tokenvalue().empty()) { + LS.set(value, r->get_numbervalue()); + } else { + LS.set(value, r->get_tokenvalue()); + } + std::string_view classname; + std::string_view funcname; + get_reg_name(r->get_name(), classname, funcname); + if (classname.empty()) { + LS.getglobaltable(classobj); + LS.rawset(classobj, funcname, value); + } else { + LS.makeclass(classobj, classname); + LS.rawset(classobj, funcname, value); + } + } + LS.result(); +} + // Run all the closures from the source database. // static eng::string source_load_lfunctions(lua_State *L) { @@ -341,18 +365,12 @@ static eng::string source_load_lfunctions(lua_State *L) { eng::string SourceDB::rebuild() { lua_State *L = lua_state_; - LuaVar mathclass; - LuaStack LS(L, mathclass); + LuaVar mathclass, httpclass, jsonnull; + LuaStack LS(L, mathclass, httpclass, jsonnull); source_clear_globals(L); source_load_cfunctions(L); + source_load_cconstants(L); eng::string errs = source_load_lfunctions(L); - - // A few builtin constants. These are hardwired. - LS.makeclass(mathclass, "math"); - LS.rawset(mathclass, "pi", M_PI); - LS.rawset(mathclass, "huge", HUGE_VAL); - LS.rawset(mathclass, "maxint", LuaStack::MAXINT); - LS.result(); return errs; } @@ -466,7 +484,7 @@ void SourceDB::register_lua_builtins() { for (auto reg = LuaFunctionReg::All; reg != nullptr; reg=reg->next()) { std::string_view funcname; std::string_view classname; - get_reg_name(reg, classname, funcname); + get_reg_name(reg->get_name(), classname, funcname); if (classname.empty()) { LS.getglobaltable(classtab); } else { @@ -524,7 +542,7 @@ eng::string SourceDB::function_docs(const LuaStack &LS0, LuaSlot fn) { } std::string_view classname; std::string_view funcname; - get_reg_name(reg, classname, funcname); + get_reg_name(reg->get_name(), classname, funcname); eng::ostringstream oss; util::StringVec docs = util::split_docstring(reg->get_docs()); oss << "function "; @@ -747,6 +765,11 @@ LuaDefineBuiltin(math_sqrt, "x", "return the square root of x"); LuaDefineBuiltin(math_tan, "x", "return the tangent of x in radians"); LuaDefineBuiltin(math_tanh, "x", "return the hyperbolic tangent of x in radians"); LuaSandboxBuiltin(math_log10, "", ""); +LuaNumberConstant(math_pi, M_PI, ""); +LuaNumberConstant(math_huge, HUGE_VAL, ""); +LuaNumberConstant(math_nan, NAN, ""); +LuaNumberConstant(math_maxint, LuaStack::MAXINT, ""); + // math.random and math.randomseed are in world-accessor.cpp, because // generating random numbers must manipulate global state which is // stored in the world model. diff --git a/luprex/core/cpp/util.cpp b/luprex/core/cpp/util.cpp index dd3c2ecd..0f22c9cc 100644 --- a/luprex/core/cpp/util.cpp +++ b/luprex/core/cpp/util.cpp @@ -65,8 +65,10 @@ bool valid_double(string_view value) { int64_t to_int64(string_view value, int64_t errval) { int64_t result; - const char *last = value.data() + value.size(); - auto r = std::from_chars(value.data(), last, result, 10); + const char *p = value.data(); + const char *last = p + value.size(); + if ((p < last) && (*p == '+')) p++; + auto r = std::from_chars(p, last, result, 10); if (r.ec != std::errc()) return errval; if (r.ptr != last) return errval; return result; @@ -74,6 +76,7 @@ int64_t to_int64(string_view value, int64_t errval) { uint64_t to_hex64(string_view value, uint64_t errval) { uint64_t result; + if (sv::zfront(value) == '-') return errval; const char *last = value.data() + value.size(); auto r = std::from_chars(value.data(), last, result, 16); if (r.ec != std::errc()) return errval; @@ -204,6 +207,15 @@ string_view read_to_line(string_view &source) { return result; } +bool read_prefix(string_view &source, string_view prefix) { + if (0 == source.compare(0, prefix.size(), prefix)) { + source.remove_prefix(prefix.size()); + return true; + } else { + return false; + } +} + string_view read_to_space(string_view &source) { size_t pos1 = 0; while ((pos1 < source.size()) && (!ascii_isspace(source[pos1]))) { @@ -243,57 +255,119 @@ string_view read_ascii_identifier(string_view &source) { return result; } +std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp) { + const char *p = source.data(); + const char *l = p + source.size(); + if (p == l) return source.substr(0, 0); + char sign = *p; + if (sign == '+') { + if (!plus) return source.substr(0, 0); + p++; + } + if (sign == '-') { + if (!minus) return source.substr(0, 0); + p++; + } + if (p == l) return source.substr(0, 0); + bool have_digits = false; + while ((p < l) && (ascii_isdigit(*p))) { + have_digits = true; + p++; + } + if ((p < l) && dec && (*p == '.')) { + p++; + while ((p < l) && (ascii_isdigit(*p))) { + have_digits = true; + p++; + } + } + if (!have_digits) return source.substr(0, 0); + if ((p < l) && exp && ((*p == 'e')||(*p == 'E'))) { + p++; + if ((p < l) && ((*p == '+') || (*p == '-'))) { + p++; + } + bool have_exp = false; + while ((p < l) && (ascii_isdigit(*p))) { + have_exp = true; + p++; + } + if (!have_exp) return source.substr(0, 0); + } + string_view result = source.substr(0, p - source.data()); + source.remove_prefix(result.size()); + return result; +} + +int32_t read_ascii_char(string_view &source) { + if (source.empty()) return -1; + int32_t result = source.front(); + source.remove_prefix(1); + return result; +} + +int32_t read_codepoint_utf8(string_view &source) { + size_t size = source.size(); + if (size == 0) return -1; + const unsigned char *bytes = (const unsigned char *)source.data(); + int codepoint; + size_t seqlen; + if ((bytes[0] & 0x80) == 0x00) { + // U+0000 to U+007F + codepoint = (bytes[0] & 0x7F); + seqlen = 1; + } else if ((bytes[0] & 0xE0) == 0xC0) { + // U+0080 to U+07FF + codepoint = (bytes[0] & 0x1F); + seqlen = 2; + } else if ((bytes[0] & 0xF0) == 0xE0) { + // U+0800 to U+FFFF + codepoint = (bytes[0] & 0x0F); + seqlen = 3; + } else if ((bytes[0] & 0xF8) == 0xF0) { + // U+10000 to U+10FFFF + codepoint = (bytes[0] & 0x07); + seqlen = 4; + } else { + return -1; + } + + if (seqlen > size) { + return -1; + } + + for (size_t i = 1; i < seqlen; ++i) { + if ((bytes[i] & 0xC0) != 0x80) return -1; + codepoint = (codepoint << 6) | (bytes[i] & 0x3F); + } + + if ((codepoint > 0x10FFFF) || + ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) || + ((codepoint <= 0x007F) && (seqlen != 1)) || + ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) || + ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) || + ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) { + return -1; + } + + source.remove_prefix(seqlen); + return codepoint; +} + bool valid_utf8(string_view s) { - const unsigned char *bytes = (const unsigned char *)s.data(); - const unsigned char *tail = bytes + s.size(); - unsigned int codepoint; - int seqlen; - - while (bytes < tail) { - if ((bytes[0] & 0x80) == 0x00) { - // U+0000 to U+007F - codepoint = (bytes[0] & 0x7F); - seqlen = 1; - } else if ((bytes[0] & 0xE0) == 0xC0) { - // U+0080 to U+07FF - codepoint = (bytes[0] & 0x1F); - seqlen = 2; - } else if ((bytes[0] & 0xF0) == 0xE0) { - // U+0800 to U+FFFF - codepoint = (bytes[0] & 0x0F); - seqlen = 3; - } else if ((bytes[0] & 0xF8) == 0xF0) { - // U+10000 to U+10FFFF - codepoint = (bytes[0] & 0x07); - seqlen = 4; - } else { - return false; - } - - if (bytes + seqlen > tail) { - return false; - } - - for (int i = 1; i < seqlen; ++i) { - if ((bytes[i] & 0xC0) != 0x80) return false; - codepoint = (codepoint << 6) | (bytes[i] & 0x3F); - } - - if ((codepoint > 0x10FFFF) || - ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) || - ((codepoint <= 0x007F) && (seqlen != 1)) || - ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) || - ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) || - ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) { - return false; - } - - bytes += seqlen; + while (!s.empty()) { + int32_t codepoint = read_codepoint_utf8(s); + if (codepoint < 0) return false; } return true; } +bool valid_number(string_view s, bool plus, bool minus, bool dec, bool exp) { + read_number(s, plus, minus, dec, exp); + return s.empty(); +} + } // namespace sv @@ -334,6 +408,8 @@ void quote_string(const eng::string &s, std::ostream *os) { (*os) << (usesinglequote ? "\"" : "\\\""); } else if (c == '\'') { (*os) << (usesinglequote ? "\\'" : "'"); + } else if (c == '\\') { + (*os) << "\\\\"; } else { (*os) << c; } @@ -344,7 +420,7 @@ void quote_string(const eng::string &s, std::ostream *os) { case '\t': (*os) << "\\t"; break; case '\r': (*os) << "\\r"; break; default: - (*os) << "\\" << std::setfill('0') << std::setw(3) << value; + (*os) << "\\" << dec.width(3).fill('0').val(value); break; } } @@ -352,6 +428,52 @@ void quote_string(const eng::string &s, std::ostream *os) { (*os) << (usesinglequote ? '\'' : '"'); } +void base64_encode(std::string_view str, std::ostream *oss) { + const char *encode_tab = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + const char *s = str.data(); + size_t size = str.size(); + for (size_t i = 0; i < size; i += 3) { + uint32_t block = ((unsigned char)(s[i])) << 16; + if (i + 1 < size) block |= ((unsigned char)(s[i + 1])) << 8; + if (i + 2 < size) block |= ((unsigned char)(s[i + 2])); + (*oss) << encode_tab[(block>>18)&0x3F]; + (*oss) << encode_tab[(block>>12)&0x3F]; + (*oss) << ((i + 1 < size) ? encode_tab[(block>>6)&0x3F] : '='); + (*oss) << ((i + 2 < size) ? encode_tab[(block>>0)&0x3F] : '='); + } +} + +bool base64_decode(std::string_view str, std::ostream *oss) { + uint32_t chunk = 0; + int fill = 0; + int skip = 0; + bool clean = true; + for (int i = 0; i < int(str.size()); i++) { + char c = str[i]; + uint32_t value; + + if ((c >= 'A') && (c <= 'Z')) value = c - 'A'; + else if ((c >= 'a') && (c <= 'z')) value = c - 'a' + 26; + else if ((c >= '0') && (c <= '9')) value = c - '0' + 52; + else if (c == '+') value = 62; + else if (c == '/') value = 63; + else if (c == '=') { value = 0; skip ++; } + else { clean=false; continue; } + + chunk = (chunk << 6) | value; + fill ++; + if (fill == 4) { + oss->put((chunk>>16) & 0xFF); + if (skip < 2) oss->put((chunk>>8) & 0xFF); + if (skip < 1) oss->put(chunk & 0xFF); + chunk = 0; fill = 0; skip = 0; + } + } + if (fill != 0) clean = false; + return clean; +} + IdVector id_vector_create(int64_t id1, int64_t id2, int64_t id3, int64_t id4) { IdVector result; if (id1 >= 0) result.push_back(id1); @@ -406,8 +528,7 @@ HashValue hash_id_vector(const IdVector &idv) { eng::string hash_to_hex(const HashValue &hv) { eng::ostringstream oss; - oss << std::hex << std::setw(16) << std::setfill('0') << hv.first; - oss << std::hex << std::setw(16) << std::setfill('0') << hv.second; + oss << hex64.val(hv.first) << hex64.val(hv.second); return oss.str(); } static inline uint64_t Rot64(uint64_t x, int k) @@ -530,6 +651,52 @@ eng::string toupper(eng::string input) { return input; } +static void buffer_codepoint_utf8(int32_t scp, char *buffer) { + uint32_t cp = (uint32_t)scp; + unsigned char *c = (unsigned char *)buffer; + if (cp <= 0x7F) { + c[0] = cp; + c[1] = 0; + } + else if (cp <= 0x7FF) { + c[0] = (cp>>6)+192; + c[1] = (cp&63)+128; + c[2] = 0; + } + else if (cp <= 0xFFFF) { + if (0xd800 <= cp && cp <= 0xdfff) { + c[0] = 0; + } else { + c[0] = (cp>>12)+224; + c[1] = ((cp>>6)&63)+128; + c[2] = (cp&63)+128; + c[3] = 0; + } + } + else if (cp <= 0x10FFFF) { + c[0] = (cp>>18)+240; + c[1] = ((cp>>12)&63)+128; + c[2] = ((cp>>6)&63)+128; + c[3] = (cp&63)+128; + c[4] = 0; + } else { + c[0] = 0; + } +} + +eng::string get_codepoint_utf8(uint32_t cp) { + char buffer[5]; + buffer_codepoint_utf8(cp, buffer); + return eng::string(buffer); +} + +bool write_codepoint_utf8(int32_t cp, std::ostream *s) { + char buffer[5]; + buffer_codepoint_utf8(cp, buffer); + (*s) << buffer; + return buffer[0] != 0; +} + double distance_squared(double x1, double y1, double x2, double y2) { double dx = x1 - x2; double dy = y1 - y2; @@ -549,35 +716,20 @@ eng::string XYZ::debug_string() const { return oss.str(); } - } // namespace util -std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) { - oss << "0x" << std::setw(16) << std::setfill('0') << std::hex; - return oss; -} -std::ostream &operator<<(std::ostream &oss, const util::hex32 &v) { - oss << "0x" << std::setw(8) << std::setfill('0') << std::hex; - return oss; -} - -std::ostream &operator<<(std::ostream &oss, const util::hex16 &v) { - oss << "0x" << std::setw(4) << std::setfill('0') << std::hex; - return oss; -} - -std::ostream &operator<<(std::ostream &oss, const util::hex8 &v) { - oss << "0x" << std::setw(2) << std::setfill('0') << std::hex; - return oss; +static std::string_view read_number_x(const char *p, bool plus, bool minus, bool dec, bool exp) { + std::string_view source = p; + return sv::read_number(source, plus, minus, dec, exp); } LuaDefine(unittests_util, "", "some unit tests") { // test str_to_int64, str_to_double LuaAssert(L, sv::to_int64("123") == 123); - LuaAssert(L, sv::to_int64("123.4") == INT64_MIN); - LuaAssert(L, sv::to_int64("12ab") == INT64_MIN); - LuaAssert(L, sv::to_int64("") == INT64_MIN); + LuaAssert(L, sv::to_int64("123.4") == INT64_MAX); + LuaAssert(L, sv::to_int64("12ab") == INT64_MAX); + LuaAssert(L, sv::to_int64("") == INT64_MAX); LuaAssert(L, sv::to_double("123.5") == 123.5); LuaAssert(L, std::isnan(sv::to_double("12ab"))); LuaAssert(L, std::isnan(sv::to_double(""))); @@ -689,6 +841,20 @@ LuaDefine(unittests_util, "", "some unit tests") { LuaAssert(L, util::hash_to_double(0x1000000000000000) == 1.0/16.0); LuaAssert(L, util::hash_to_double(0x7000000000000000) == 7.0/16.0); LuaAssert(L, util::hash_to_double(0xF000000000000000) == 15.0/16.0); + + // Test read_number allowing everything. + LuaAssert(L, read_number_x("123x", true, true, true, true) == "123"); + LuaAssert(L, read_number_x("123.3x", true, true, true, true) == "123.3"); + LuaAssert(L, read_number_x("123.x", true, true, true, true) == "123."); + LuaAssert(L, read_number_x("123..x", true, true, true, true) == "123."); + LuaAssert(L, read_number_x("-123x", true, true, true, true) == "-123"); + LuaAssert(L, read_number_x("+123x", true, true, true, true) == "+123"); + LuaAssert(L, read_number_x("+-123x", true, true, true, true) == ""); + LuaAssert(L, read_number_x("-123.02e05x", true, true, true, true) == "-123.02e05"); + LuaAssert(L, read_number_x("-123e-5x", true, true, true, true) == "-123e-5"); + LuaAssert(L, read_number_x("-123e+5x", true, true, true, true) == "-123e+5"); + LuaAssert(L, read_number_x("-123e+x", true, true, true, true) == ""); + return 0; } diff --git a/luprex/core/cpp/util.hpp b/luprex/core/cpp/util.hpp index 45bbcf17..43ec4ada 100644 --- a/luprex/core/cpp/util.hpp +++ b/luprex/core/cpp/util.hpp @@ -26,6 +26,9 @@ #include #include #include +#include +#include +#include #include "luastack.hpp" #include "spookyv2.hpp" @@ -59,9 +62,17 @@ bool valid_double(string_view v); bool valid_int64(string_view v); bool valid_hex64(string_view v); -// Parse numbers as int32, int64, or double. Returns errval on failure. +// Convert strings to numbers. Returns errval on failure. +// +// The integer parser accepts a sequence of digits, +// with or without a + or - sign. The hex parser +// does not allow a + or - sign. For both the int64 +// and hex64 parser, it is a failure if the number +// does not fit in 64 bits. The double parser does +// not accept the strings 'nan' or 'inf'. +// double to_double(string_view v, double errval = std::numeric_limits::quiet_NaN()); -int64_t to_int64(string_view v, int64_t errval = std::numeric_limits::min()); +int64_t to_int64(string_view v, int64_t errval = std::numeric_limits::max()); uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits::max()); // Trim whitspace from a string_view. @@ -115,6 +126,13 @@ string_view read_to_sep(string_view &source, char sep); // string_view read_to_line(string_view &source); +// Read a prefix string from a string_view. +// +// Returns false if the string view doesn't start with +// the specified prefix. +// +bool read_prefix(string_view &source, string_view prefix); + // Read from a string_view until whitespace is reached. // // If there's any whitespace in the source, returns the text @@ -136,9 +154,46 @@ string_view read_nbytes(string_view &source, int nbytes); // string_view read_ascii_identifier(string_view &source); +// Read a number from a string view +// +// This is basically a regex pattern matching routine +// hardwired with the regex for numbers. You must +// specify which of the following parts of the regex +// are allowed or not: +// +// * plus sign +// * minus sign +// * decimal point +// * scientific notation exponents +// +// Returns the number as a string_view. There is +// no guarantee that the number is small enough to +// fit into any particular number of bits. This +// always uses base 10. +// +std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp); + +// Read an ascii character from a string. +// +// Returns -1 if the string is empty. +// +int32_t read_ascii_char(string_view &source); + +// Read a UTF8 codepoint from a string_view. +// +// If the next thing in the string_view isn't a valid +// codepoint, returns -1 and doesn't update the view. +// +int32_t read_codepoint_utf8(string_view &source); + // Return true if the string is valid utf-8. bool valid_utf8(string_view s); +// Return true if the number conforms to the spec. +// See read_number for more information. +// +bool valid_number(string_view v, bool plus, bool minus, bool dec, bool exp); + } // namespace sv namespace util { @@ -175,6 +230,16 @@ double profiling_clock(); // Output a string to a stream using Lua string escaping and quoting. void quote_string(const eng::string &str, std::ostream *os); +// base64 encode. +void base64_encode(std::string_view v, std::ostream *oss); + +// base64 decode. +// +// Returns true if the base64 was 'clean' base64, as +// opposed to base64 with extraneous characters. +// +bool base64_decode(std::string_view v, std::ostream *oss); + // ID vector quick create. IdVector id_vector_create(int64_t id1=-1, int64_t id2=-1, int64_t id3=-1, int64_t id4=-1); @@ -219,6 +284,14 @@ eng::string repeat_string(const eng::string &a, int n); eng::string tolower(eng::string input); eng::string toupper(eng::string input); +// Convert a codepoint number into a utf8 string. +// If the codepoint is invalid, returns empty string. +eng::string get_codepoint_utf8(int32_t cp); + +// Write a codepoint in utf8 to a stream. +// If the codepoint is invalid, writes nothing and returns false. +bool write_codepoint_utf8(int32_t cp, std::ostream *out); + // Calculate distance between two points double distance_squared(double x1, double y1, double x2, double y2); @@ -249,12 +322,6 @@ struct XYZ { eng::string debug_string() const; }; -// These are formatting directives that can be sent to a std::ostream. -class hex64 {}; -class hex32 {}; -class hex16 {}; -class hex8 {}; - class NullStreamBuffer : public std::streambuf { public: @@ -264,24 +331,62 @@ public: // send_to_stream: send all arguments to the specified stream. inline void send_to_stream(std::ostream &os) {} template -inline void send_to_stream(std::ostream &os, ARG arg, REST & ... rest) { +inline void send_to_stream(std::ostream &os, const ARG &arg, const REST & ... rest) { os << arg; send_to_stream(os, rest...); } // ss: convert all arguments to a string by sending them to a stringstream. template -inline eng::string ss(ARGS & ... args) { +inline eng::string ss(const ARGS & ... args) { eng::ostringstream oss; send_to_stream(oss, args...); return oss.str(); } +// This is a better way to do std::setfill, std::hex, std::setprecision +// +// Usage examples: +// std::cout << util::hex.width(5).fill('0').val(123) +// std::cout << util::dec.fill('$').precision(val(123) +// +template +class FormattedNumber { +public: + VALUE value_; + bool hex_; + int width_; + char fill_; + int precision_; + + constexpr FormattedNumber(VALUE v, bool h, int w, char f, int p) + : value_(v), hex_(h), width_(w), fill_(f), precision_(p) {} + + constexpr FormattedNumber width(int w) const { return FormattedNumber(value_, hex_, w, fill_, precision_); } + constexpr FormattedNumber fill(char f) const { return FormattedNumber(value_, hex_, width_, f, precision_); } + constexpr FormattedNumber precision(int p) const { return FormattedNumber(value_, hex_, width_, fill_, p); } + + template + constexpr FormattedNumber val(NVALUE v) const { return FormattedNumber(v, hex_, width_, fill_, precision_); } +}; + +constexpr auto hex = FormattedNumber(0, true, 0, '0', 6); +constexpr auto hex8 = FormattedNumber(0, true, 2, '0', 6); +constexpr auto hex16 = FormattedNumber(0, true, 4, '0', 6); +constexpr auto hex32 = FormattedNumber(0, true, 8, '0', 6); +constexpr auto hex64 = FormattedNumber(0, true, 16, '0', 6); +constexpr auto dec = FormattedNumber(0, false, 0, ' ', 6); + } // namespace util -std::ostream &operator<<(std::ostream &oss, const util::hex64 &v); -std::ostream &operator<<(std::ostream &oss, const util::hex32 &v); -std::ostream &operator<<(std::ostream &oss, const util::hex16 &v); -std::ostream &operator<<(std::ostream &oss, const util::hex8 &v); +template +inline std::ostream &operator<<(std::ostream &oss, util::FormattedNumber n) { + if (n.hex_) oss << std::hex; + else oss << std::dec; + oss << std::setprecision(n.precision_) << std::setfill(n.fill_) << std::setw(n.width_) << n.value_; + oss << std::dec << std::setfill(' ') << std::setprecision(6); + return oss; +} + #endif // UTIL_HPP diff --git a/luprex/core/cpp/world-difftab.cpp b/luprex/core/cpp/world-difftab.cpp index 1dad1037..59f0c10f 100644 --- a/luprex/core/cpp/world-difftab.cpp +++ b/luprex/core/cpp/world-difftab.cpp @@ -56,6 +56,10 @@ static bool equivalent_values(LuaStack &MLS, LuaSlot mval, LuaSlot mtnmap, if (SLS.type(sval) != LUA_TSTRING) return false; return MLS.ckstring(mval) == SLS.ckstring(sval); } + case LUA_TLIGHTUSERDATA: { + if (SLS.type(sval) != LUA_TLIGHTUSERDATA) return false; + return MLS.cktoken(mval) == SLS.cktoken(sval); + } case LUA_TFUNCTION: { // Cannot really compare. Just return true if the types match. return SLS.type(sval) == MLS.type(mval); @@ -105,6 +109,11 @@ static void transmit_value(LuaStack &MLS, LuaSlot mval, LuaSlot mtnmap, StreamBu sb->write_string(MLS.ckstring(mval)); return; } + case LUA_TLIGHTUSERDATA: { + sb->write_uint8(LUA_TLIGHTUSERDATA); + sb->write_uint64(MLS.cktoken(mval).value); + return; + } case LUA_TT_GENERAL: { int midx = get_table_number(MLS, mval, mtnmap); if (midx == 0) { @@ -151,6 +160,10 @@ static void transmit_value_debug_string(StreamBuffer *sb, eng::ostringstream &os oss << sb->read_string(); return; } + case LUA_TLIGHTUSERDATA: { + LuaToken token(sb->read_uint64()); + oss << "[" << token.str() << "]"; + } case LUA_TT_GENERAL: { oss << "table " << sb->read_int32(); return; @@ -270,6 +283,12 @@ static void set_transmitted_value(LuaStack &LS, LuaSlot tangibles, LuaSlot ntmap LS.set(target, value); return; } + case LUA_TLIGHTUSERDATA: { + LuaToken value(sb->read_uint64()); + DebugLine(dbc) << dbinfo << "[" << value.str() << "]"; + LS.set(target, value); + return; + } case LUA_TT_GENERAL: { int index = sb->read_int32(); DebugLine(dbc) << dbinfo << "table " << index;