#include "json.hpp" #include "luastack.hpp" #include "util.hpp" #include #include #include #include #include #include #define NOINDENT_LEVEL 1000 LuaTokenConstant(json_null, "null", ""); LuaTokenConstant(json_object, "object", ""); LuaTokenConstant(json_error, "error", ""); static void indent(eng::ostringstream &oss, int level) { if (level < NOINDENT_LEVEL) { oss << std::endl; for (int i = 0; i < level; i++) { oss << " "; } } } static bool length_exceeded(eng::ostringstream &oss, int maxlen) { return oss.tellp() > maxlen; } template inline void store_error(eng::ostringstream &oss, const ARGS & ... args) { oss.str(""); util::send_to_stream(oss, args...); } static void store_length_error(eng::ostringstream &oss, int maxlen) { store_error(oss, "maximum json length exceeded: ", maxlen); } static bool use_array_representation(lua_State *L) { int top = lua_gettop(L); int nfound = 0; while (true) { lua_rawgeti(L, top, nfound + 1); bool null = lua_isnil(L, -1); lua_settop(L, top); if (null) break; nfound += 1; } return (nfound == lua_nkeys(L, top)); } static bool encode_key(lua_State *L, eng::ostringstream &oss); static bool encode_value(lua_State *L, eng::ostringstream &oss, int level, int maxlen); // The goal here is to emit a double in such a way that // when we read it back in, we get the *exact* same number. // // In the worst case, you can accomplish this by using 17 // digits of precision - that's enough to uniquely identify // all double values (see the following URL). However, 17 // digits tends to produce unnecessary repeating decimals. // So we try 16 digits first, which tends to remove those // repeating decimals, but sometimes produces losses. // If that doesn't work, we fall back to 17 digits. // // https://randomascii.wordpress.com/2012/03/08/float-precisionfrom-zero-to-100-digits-2/ // static void encode_double_lossless(double value, eng::ostringstream &oss) { char buffer[80]; sprintf(buffer, "%.16g", value); if (strtod(buffer, nullptr) != value) { sprintf(buffer, "%.17g", value); assert(strtod(buffer, nullptr) == value); } oss << buffer; } static bool encode_nil(lua_State *L, eng::ostringstream &oss) { oss << "null"; return true; } static bool encode_token(lua_State *L, eng::ostringstream &oss) { LuaToken token(lua_touserdata(L, -1)); if (token == LuaToken("jsonnull")) { oss << "null"; return true; } else { store_error(oss, "cannot encode token: [", token.str(), "]"); return false; } } static bool encode_number(lua_State *L, eng::ostringstream &oss) { lua_Number value = lua_tonumber(L, -1); if (std::isnan(value) || std::isinf(value)) { store_error(oss, "cannot encode infinity or NAN"); return false; } int64_t ivalue = int64_t(value); if (double(ivalue) == value) { oss << ivalue; } else { encode_double_lossless(value, oss); } return true; } static bool encode_number_key(lua_State *L, eng::ostringstream &oss) { lua_Number value = lua_tonumber(L, -1); int64_t ivalue = int64_t(value); if (double(ivalue) != value) { store_error(oss, "cannot encode floating point numbers in table keys"); return false; } if (ivalue >= 0) { oss << "\"\\uE000+" << ivalue << '"'; } else { oss << "\"\\uE000-" << -ivalue << '"'; } return true; } static bool encode_boolean(lua_State *L, eng::ostringstream &oss) { int flag = lua_toboolean(L, -1); oss << (flag ? "true" : "false"); return true; } static bool encode_string(lua_State *L, eng::ostringstream &oss) { size_t len; const char *s = lua_tolstring(L, -1, &len); std::string_view str(s, len); oss << '"'; if (sv::valid_utf8(str) && !sv::has_prefix(str, "")) { while (!str.empty()) { int32_t cp = sv::read_codepoint_utf8(str); assert(cp >= 0); switch (cp) { case '\\': oss << "\\\\"; break; case '"' : oss << "\\\""; break; case '\b': oss << "\\b"; break; case '\f': oss << "\\f"; break; case '\r': oss << "\\r"; break; case '\n': oss << "\\n"; break; case '\t': oss << "\\t"; break; default: { if (cp < 32) { oss << "\\u" << util::hex16.val(cp); } else { bool ok = util::write_codepoint_utf8(cp, &oss); assert(ok); } } } } } else { // Output as a base64-encoded string. oss << "\\uE000="; util::base64_encode(str, &oss); } oss << '"'; return true; } static bool encode_array(lua_State *L, eng::ostringstream &oss, int level, int maxlen) { lua_checkstack(L, 20); int top = lua_gettop(L); oss << "["; level ++; int i = 1; while (true) { lua_rawgeti(L, top, i); if (lua_isnil(L, -1)) break; if (i > 1) oss << ","; indent(oss, level); bool ok = encode_value(L, oss, level, maxlen); lua_settop(L, top); if (!ok) return false; if (length_exceeded(oss, maxlen)) { store_length_error(oss, maxlen); return false; } i += 1; } lua_settop(L, top); level --; indent(oss, level); oss << "]"; return true; } static bool encode_object(lua_State *L, eng::ostringstream &oss, int level, int maxlen) { lua_checkstack(L, 20); int top = lua_gettop(L); oss << "{"; level ++; lua_pushnil(L); int i = 1; while (lua_next(L, top) != 0) { // Check for [json.object]=true, if so skip. if (lua_islightuserdata(L, -2) && lua_isboolean(L, -1) && (LuaToken(lua_touserdata(L, -2)) == LuaToken("object")) && (lua_toboolean(L, -1) == 1)) { lua_pop(L, 1); continue; } lua_pushvalue(L, -2); // Stack now has key, value, key assert(lua_gettop(L) == top + 3); if (i > 1) oss << ","; indent(oss, level); bool ok = encode_key(L, oss); if (!ok) { lua_settop(L, top); return false; } if (length_exceeded(oss, maxlen)) { store_length_error(oss, maxlen); lua_settop(L, top); return false; } lua_pop(L, 1); // Stack now has key, value assert(lua_gettop(L) == top + 2); oss << ((level < NOINDENT_LEVEL) ? " : " : ":"); ok = encode_value(L, oss, level, maxlen); assert(lua_gettop(L) == top + 2); if (!ok) { lua_settop(L, top); return false; } if (length_exceeded(oss, maxlen)) { store_length_error(oss, maxlen); lua_settop(L, top); return false; } lua_pop(L, 1); // Stack now just has key. assert(lua_gettop(L) == top + 1); i += 1; } // Stack should be back to where we started. assert(lua_gettop(L) == top); level --; indent(oss, level); oss << "}"; return true; } static bool encode_key(lua_State *L, eng::ostringstream &oss) { int type = lua_type(L, -1); switch (type) { case LUA_TSTRING: return encode_string(L, oss); case LUA_TNUMBER: return encode_number_key(L, oss); case LUA_TBOOLEAN: case LUA_TTABLE: { store_error(oss, "cannot encode '", lua_typename(L, type), "' in table keys"); return false; } default: { store_error(oss, "cannot encode '", lua_typename(L, type), "'"); return false; } } } static bool encode_value(lua_State *L, eng::ostringstream &oss, int level, int maxlen) { int type = lua_type(L, -1); switch (type) { case LUA_TNIL: return encode_nil(L, oss); case LUA_TNUMBER: return encode_number(L, oss); case LUA_TBOOLEAN: return encode_boolean(L, oss); case LUA_TSTRING: return encode_string(L, oss); case LUA_TLIGHTUSERDATA: return encode_token(L, oss); case LUA_TTABLE: { if (use_array_representation(L)) { return encode_array(L, oss, level, maxlen); } else { return encode_object(L, oss, level, maxlen); } } default: { store_error(oss, "cannot encode '", lua_typename(L, type), "'"); return false; } } } static bool decode_value(lua_State *L, std::string_view &v); static bool decode_id(lua_State *L, std::string_view &v) { std::string_view id = sv::read_ascii_identifier(v); if (id == "null") lua_pushlightuserdata(L, LuaToken("null").voidvalue()); else if (id == "true") lua_pushboolean(L, 1); else if (id == "false") lua_pushboolean(L, 0); else return false; return true; } static bool decode_number(lua_State *L, std::string_view &v) { std::string_view n = sv::read_number(v, true, true, true, true); if (n.empty()) return false; // If it's an integer, make sure it fits in a lua double // losslessly. If it's a double, some loss in precision // is OK. if (sv::valid_number(n, true, true, false, false)) { int64_t i = sv::to_int64(n); if (!LuaCoreStack::int64_storable(i)) return false; lua_pushnumber(L, double(i)); return true; } else { double d = sv::to_double(n); if (std::isnan(d) || std::isinf(d)) return false; lua_pushnumber(L, d); return true; } } static bool decode_base64_string(lua_State *L, std::string_view &v) { // We've already read the starting quote and the E000 // escape sequence at this point. // Skip the equal sign. if (!sv::read_prefix(v, "=")) return false; // Find the end of the quoted string. const char *p = v.data(); const char *l = p + v.size(); while (true) { if (p == l) return false; if (*p < 32) return false; if (*p == '"') break; p++; } std::string_view b64 = v.substr(0, p - v.data()); v.remove_prefix(b64.size() + 1); eng::ostringstream oss; if (!util::base64_decode(b64, &oss)) return false; eng::string str = oss.str(); lua_pushlstring(L, str.c_str(), str.size()); return true; } static bool decode_int_string(lua_State *L, std::string_view &v) { // We've already read the starting quote and the E000 // escape sequence at this point. // Parse the number and the closing quote. std::string_view n = sv::read_number(v, true, true, false, false); if (n.empty()) return false; if (!sv::read_prefix(v, "\"")) { return false; } // Make sure the number fits in a lua double, // and push it on the stack. int64_t i = sv::to_int64(n); if (!LuaCoreStack::int64_storable(i)) { return false; } lua_pushnumber(L, double(i)); return true; } static bool decode_standard_string(lua_State *L, std::string_view &v) { // We've already read the starting quote at this point. eng::ostringstream oss; while (true) { // Get the next codepoint. int32_t c = sv::read_codepoint_utf8(v); // If it's a control character or invalid codepoint, reject. if (c < 32) return false; // If it is an unescaped quote, that's end of string. if (c == '"') break; // If it's a backslash, then deal with the escape sequence. if (c == '\\') { char next = sv::read_ascii_char(v); switch (next) { case '"': oss << '"'; break; case '\\': oss << '\\'; break; case '/': oss << '/'; break; case 'r': oss << '\r'; break; case 'n': oss <<'\n'; break; case 'b': oss << '\b'; break; case 'f': oss << '\f'; break; case 't': oss << '\t'; break; case 'u': { std::string_view hexdigits = sv::read_nbytes(v, 4); if (hexdigits.size() != 4) return false; uint64_t codepoint = sv::to_hex64(hexdigits, 0x10000); if (codepoint >= 0x10000) return false; if (!util::write_codepoint_utf8(codepoint, &oss)) return false; break; } default: return false; } continue; } // Any other codepoint should be echoed into stream. util::write_codepoint_utf8(c, &oss); } eng::string result = oss.str(); lua_pushlstring(L, result.c_str(), result.size()); return true; } static bool decode_string(lua_State *L, std::string_view &v) { if (!sv::read_prefix(v, "\"")) return false; // Check for codepoint E000, the escape sequence. if (sv::read_prefix(v, "") || sv::read_prefix(v, "\\uE000") || sv::read_prefix(v, "\\ue000")) { char c = sv::zfront(v); if (c == '=') return decode_base64_string(L, v); else if ((c=='-') || (c=='+')) return decode_int_string(L, v); else return false; } else { return decode_standard_string(L, v); } } static bool decode_array(lua_State *L, std::string_view &v) { if (!sv::read_prefix(v, "[")) return false; lua_newtable(L); int tabpos = lua_gettop(L); int next = 1; while (true) { v = sv::ltrim(v); if (sv::zfront(v) == ']') { v.remove_prefix(1); return true; } if (!decode_value(L, v)) { return false; } v = sv::ltrim(v); if (sv::zfront(v) == ',') { v.remove_prefix(1); } lua_rawseti(L, tabpos, next++); } } static bool decode_object(lua_State *L, std::string_view &v) { if (!sv::read_prefix(v, "{")) return false; lua_newtable(L); int tabpos = lua_gettop(L); while (true) { v = sv::ltrim(v); if (sv::zfront(v) == '}') { v.remove_prefix(1); return true; } if (!decode_string(L, v)) { return false; } v = sv::ltrim(v); if (!sv::read_prefix(v, ":")) { return false; } if (!decode_value(L, v)) { return false; } v = sv::ltrim(v); if (sv::zfront(v) == ',') { v.remove_prefix(1); } lua_rawset(L, tabpos); } } // Decode a single value. // // On success, pushes the value on the stack and returns true. // On failure, pushes NIL on the stack and returns false. // static bool decode_value(lua_State *L, std::string_view &v) { lua_checkstack(L, 20); int top = lua_gettop(L); // Skip blanks. v = sv::ltrim(v); // Try to read something. char c = sv::zfront(v); bool result; if (c == '"') result = decode_string(L, v); else if (c == '[') result = decode_array(L, v); else if (c == '{') result = decode_object(L, v); else if (sv::ascii_isalpha(c)) result = decode_id(L, v); else result = decode_number(L, v); // On failure, the decode routines may leave junk // on the stack, in which case it's our job to clean up. if (result == false) { lua_settop(L, top); lua_pushnil(L); } // Now there should be exactly one new value on the stack. assert(lua_gettop(L) == top + 1); return result; } namespace json { eng::string encode(LuaCoreStack &LS, LuaSlot in, eng::string &out, bool indent, int maxlen) { eng::ostringstream oss; // Call the recursive encoder. Clean up any crap on the lua stack afterward. int top = lua_gettop(LS.state()); lua_pushvalue(LS.state(), in.index()); bool ok = encode_value(LS.state(), oss, indent ? 0 : NOINDENT_LEVEL, maxlen); lua_settop(LS.state(), top); // One last check for overruns. if (ok && length_exceeded(oss, maxlen)) { store_length_error(oss, maxlen); ok = false; } // Produce the return value. if (ok) { out = oss.str(); return ""; } else { out = ""; return oss.str(); } } bool decode(LuaCoreStack &LS, LuaSlot out, std::string_view v) { lua_State *L = LS.state(); // Try to read a single value from the view. int top = lua_gettop(L); bool ok = decode_value(L, v); lua_replace(L, out.index()); lua_settop(L, top); if (!ok) { LS.set(out, LuaToken("error")); return false; } // Special case: if the top level value is 'null', change // it to 'nil.' if (LS.istoken(out)) { LS.set(out, LuaNil); } // There should be nothing left of the input text. if (v.size() > 0) { LS.set(out, LuaToken("error")); return false; } return true; } } // namespace util LuaDefine(json_encode, "data, indent, maxlen", "|Encode a lua data structure returning a json string." "|" "|Data is the value being encoded. Indent is a flag," "|if it's true, then the json is indented nicely," "|otherwise, it is packed tightly. Maxlen is the maximum" "|length in bytes of the encoded json string." "|" "|Usually, Lua data translates straightforwardly to json." "|However, there are a number of special cases to be" "|aware of:" "|" "|- Closures and threads cannot be encoded. These will" "| cause the encoder to abort." "|" "|- The numbers infinity and NAN cannot be encoded." "| Both of these will cause the encoder to abort." "|" "|- You must specify a size-limit to the encoded" "| string. Exceeding the size limit causes the" "| encoder to abort." "|" "|- Recursive data structures will cause the encoder to" "| loop infinitely until the size-limit is exceeded," "| causing the encoder to abort." "|" "|- There is no way to represent math.huge or math.nan in" "| json. Encoding math.nan will cause the encoder to abort," "| as expected. However, encoding math.huge will emit null," "| which is probably not what you would expect." "|" "|- Lua tables cannot contain 'nil', but json objects and" "| arrays can contain null. If you want the encoder to" "| emit a json object or array containing null, you must" "| use token json.null to represent null." "|" "|- Json objects, like lua tables, are key-value stores." "| However, json objects can only have string keys. Our" "| encoder uses a workaround to transparently" "| allow mixing string and integer keys in json tables." "| See 'encoding difficult data' below." "|" "|- Json strings are required to be valid utf-8. Our encoder" "| uses a workaround to transparently allow the use of" "| arbitrary 8-bit-clean strings. See 'encoding difficult" "| data' below." "|" "|- Lua tables containing contiguous integer keys from 1-n are" "| autodetected to be json arrays. Empty tables are also" "| emitted as json arrays. All other tables are emitted" "| as json objects." "|" "|- You can force a table to be emitted as a json object" "| by putting the key-value pair table[json.object]=true" "| into the table. This special key is not emitted, but" "| it triggers json object mode. This is the only way" "| to emit an empty json object (a truly empty table is" "| emitted as a json array.)" "|" "|Encoding Difficult Data:" "|" "|Normally, json doesn't allow integer table keys, and it" "|doesn't allow strings that aren't valid utf-8. Our" "|json encoder and decoder, on the other hand, can" "|encode and decode integer table keys and 8-bit-clean" "|strings transparently. This is accomplished without" "|violating the json specification, by encoding such" "|values as utf-8 strings:" "|" "| '123' (encoded integer 123)" "| '=aGVsbG8=' (binary string encoded as base64)" "|" "|Those encodings start with utf-8 codepoint E000." "|This codepoint probably shows up in your text editor" "|as a little rectangle. When the decoder sees codepoint" "|E000 at the beginning of a string, it automatically" "|decodes the string back into its original form." "|" "|The one price for this behavior is that the encoder" "|cannot literally emit strings that start" "|with codepoint E000. If the encoder detects such a" "|string, it will emit it as a base64-encoded string." "|This should be uncommon, since codepoint E000 is" "|reserved." "|" "|Note that integers are only encoded when they are" "|used as table keys. Otherwise, numbers are emitted" "|straightforwardly." "|") { LuaArg data, indent, maxlen; LuaRet encoded; LuaDefStack LS(L, data, indent, maxlen, encoded); eng::string out; eng::string error = json::encode(LS, data, out, LS.ckboolean(indent), LS.ckint(maxlen)); if (!error.empty()) { luaL_error(L, "%s", error.c_str()); LS.set(encoded, LuaNil); return LS.result(); } else { LS.set(encoded, out); return LS.result(); } } LuaDefine(json_decode, "data", "|Decode a json expression into a lua data structure." "|" "|Data that was generated by our own encoder is almost" "|8-bit clean. That includes difficult cases, like" "|binary strings, floating point numbers, and tables" "|with mixed string and integer keys. The exception" "|are the kinds of data that can't be encoded at all:" "|See doc(json.encode) for details about what" "|can and cannot be encoded." "|" "|Some json may contain 'null' inside objects and" "|arrays. Lua tables can't store nil, so instead, we" "|store the token json.null. If that's not what you" "|want, you can use json.stripnulls to strip out" "|the json.null values from a data structure and" "|replace them with nil." "|" "|") { LuaArg encoded; LuaRet data; LuaDefStack LS(L, encoded, data); std::string_view v = LS.ckstringview(encoded); bool ok = json::decode(LS, data, v); if (!ok) { luaL_error(L, "invalid json string."); } return LS.result(); } // LuaDefine(base64_encode, "data", "") { // LuaArg str; // LuaRet ret; // LuaDefStack LS(L, str, ret); // eng::string cstr = LS.ckstring(str); // eng::ostringstream oss; // util::base64_encode(cstr, &oss); // LS.set(ret, oss.str()); // return LS.result(); // } // LuaDefine(base64_decode, "data", "") { // LuaArg str; // LuaRet ret; // LuaDefStack LS(L, str, ret); // eng::string cstr = LS.ckstring(str); // eng::ostringstream oss; // util::base64_decode(cstr, &oss); // LS.set(ret, oss.str()); // return LS.result(); // }