Files
integration/luprex/cpp/core/json.cpp

730 lines
23 KiB
C++

#include "json.hpp"
#include "luastack.hpp"
#include "util.hpp"
#include <string_view>
#include <ostream>
#include <cmath>
#include <iomanip>
#include <cstdio>
#include <cstdlib>
#define NOINDENT_LEVEL 1000
LuaTokenConstant(json_null, "null", "");
LuaTokenConstant(json_object, "object", "");
LuaTokenConstant(json_error, "error", "");
static void indent(eng::ostringstream &oss, int level) {
if (level < NOINDENT_LEVEL) {
oss << std::endl;
for (int i = 0; i < level; i++) {
oss << " ";
}
}
}
static bool length_exceeded(eng::ostringstream &oss, int maxlen) {
return oss.tellp() > maxlen;
}
template <class... ARGS>
inline void store_error(eng::ostringstream &oss, const ARGS & ... args) {
oss.str("");
util::send_to_stream(oss, args...);
}
static void store_length_error(eng::ostringstream &oss, int maxlen) {
store_error(oss, "maximum json length exceeded: ", maxlen);
}
static bool use_array_representation(lua_State *L) {
int top = lua_gettop(L);
int nfound = 0;
while (true) {
lua_rawgeti(L, top, nfound + 1);
bool null = lua_isnil(L, -1);
lua_settop(L, top);
if (null) break;
nfound += 1;
}
return (nfound == lua_nkeys(L, top));
}
static bool encode_key(lua_State *L, eng::ostringstream &oss);
static bool encode_value(lua_State *L, eng::ostringstream &oss, int level, int maxlen);
// The goal here is to emit a double in such a way that
// when we read it back in, we get the *exact* same number.
//
// In the worst case, you can accomplish this by using 17
// digits of precision - that's enough to uniquely identify
// all double values (see the following URL). However, 17
// digits tends to produce unnecessary repeating decimals.
// So we try 16 digits first, which tends to remove those
// repeating decimals, but sometimes produces losses.
// If that doesn't work, we fall back to 17 digits.
//
// https://randomascii.wordpress.com/2012/03/08/float-precisionfrom-zero-to-100-digits-2/
//
static void encode_double_lossless(double value, eng::ostringstream &oss) {
char buffer[80];
sprintf(buffer, "%.16g", value);
if (strtod(buffer, nullptr) != value) {
sprintf(buffer, "%.17g", value);
assert(strtod(buffer, nullptr) == value);
}
oss << buffer;
}
static bool encode_nil(lua_State *L, eng::ostringstream &oss) {
oss << "null";
return true;
}
static bool encode_token(lua_State *L, eng::ostringstream &oss) {
LuaToken token(lua_touserdata(L, -1));
if (token == ltoken_json_null) {
oss << "null";
return true;
} else {
store_error(oss, "cannot encode token: [", token.str(), "]");
return false;
}
}
static bool encode_number(lua_State *L, eng::ostringstream &oss) {
lua_Number value = lua_tonumber(L, -1);
if (std::isnan(value) || std::isinf(value)) {
store_error(oss, "cannot encode infinity or NAN");
return false;
}
int64_t ivalue = int64_t(value);
if (double(ivalue) == value) {
oss << ivalue;
} else {
encode_double_lossless(value, oss);
}
return true;
}
static bool encode_number_key(lua_State *L, eng::ostringstream &oss) {
lua_Number value = lua_tonumber(L, -1);
int64_t ivalue = int64_t(value);
if (double(ivalue) != value) {
store_error(oss, "cannot encode floating point numbers in table keys");
return false;
}
if (ivalue >= 0) {
oss << "\"\\uE000+" << ivalue << '"';
} else {
oss << "\"\\uE000-" << -ivalue << '"';
}
return true;
}
static bool encode_boolean(lua_State *L, eng::ostringstream &oss) {
int flag = lua_toboolean(L, -1);
oss << (flag ? "true" : "false");
return true;
}
static bool encode_string(lua_State *L, eng::ostringstream &oss) {
size_t len;
const char *s = lua_tolstring(L, -1, &len);
std::string_view str(s, len);
oss << '"';
if (sv::valid_utf8(str) && !sv::has_prefix(str, "")) {
while (!str.empty()) {
int32_t cp = sv::read_codepoint_utf8(str);
assert(cp >= 0);
switch (cp) {
case '\\': oss << "\\\\"; break;
case '"' : oss << "\\\""; break;
case '\b': oss << "\\b"; break;
case '\f': oss << "\\f"; break;
case '\r': oss << "\\r"; break;
case '\n': oss << "\\n"; break;
case '\t': oss << "\\t"; break;
default: {
if (cp < 32) {
oss << "\\u" << util::hex16.val(cp);
} else {
bool ok = util::write_codepoint_utf8(cp, &oss);
assert(ok);
}
}
}
}
} else {
// Output as a base64-encoded string.
oss << "\\uE000=";
util::base64_encode(str, &oss);
}
oss << '"';
return true;
}
static bool encode_array(lua_State *L, eng::ostringstream &oss, int level, int maxlen) {
lua_checkstack(L, 20);
int top = lua_gettop(L);
oss << "[";
level ++;
int i = 1;
while (true) {
lua_rawgeti(L, top, i);
if (lua_isnil(L, -1)) break;
if (i > 1) oss << ",";
indent(oss, level);
bool ok = encode_value(L, oss, level, maxlen);
lua_settop(L, top);
if (!ok) return false;
if (length_exceeded(oss, maxlen)) {
store_length_error(oss, maxlen);
return false;
}
i += 1;
}
lua_settop(L, top);
level --;
indent(oss, level);
oss << "]";
return true;
}
static bool encode_object(lua_State *L, eng::ostringstream &oss, int level, int maxlen) {
lua_checkstack(L, 20);
int top = lua_gettop(L);
oss << "{";
level ++;
lua_pushnil(L);
int i = 1;
while (lua_next(L, top) != 0) {
// Check for [json.object]=true, if so skip.
if (lua_islightuserdata(L, -2) &&
lua_isboolean(L, -1) &&
(LuaToken(lua_touserdata(L, -2)) == LuaToken("object")) &&
(lua_toboolean(L, -1) == 1)) {
lua_pop(L, 1);
continue;
}
lua_pushvalue(L, -2);
// Stack now has key, value, key
assert(lua_gettop(L) == top + 3);
if (i > 1) oss << ",";
indent(oss, level);
bool ok = encode_key(L, oss);
if (!ok) {
lua_settop(L, top);
return false;
}
if (length_exceeded(oss, maxlen)) {
store_length_error(oss, maxlen);
lua_settop(L, top);
return false;
}
lua_pop(L, 1);
// Stack now has key, value
assert(lua_gettop(L) == top + 2);
oss << ((level < NOINDENT_LEVEL) ? " : " : ":");
ok = encode_value(L, oss, level, maxlen);
assert(lua_gettop(L) == top + 2);
if (!ok) {
lua_settop(L, top);
return false;
}
if (length_exceeded(oss, maxlen)) {
store_length_error(oss, maxlen);
lua_settop(L, top);
return false;
}
lua_pop(L, 1);
// Stack now just has key.
assert(lua_gettop(L) == top + 1);
i += 1;
}
// Stack should be back to where we started.
assert(lua_gettop(L) == top);
level --;
indent(oss, level);
oss << "}";
return true;
}
static bool encode_key(lua_State *L, eng::ostringstream &oss) {
int type = lua_type(L, -1);
switch (type) {
case LUA_TSTRING: return encode_string(L, oss);
case LUA_TNUMBER: return encode_number_key(L, oss);
case LUA_TBOOLEAN:
case LUA_TTABLE: {
store_error(oss, "cannot encode '", lua_typename(L, type), "' in table keys");
return false;
}
default: {
store_error(oss, "cannot encode '", lua_typename(L, type), "'");
return false;
}
}
}
static bool encode_value(lua_State *L, eng::ostringstream &oss, int level, int maxlen) {
int type = lua_type(L, -1);
switch (type) {
case LUA_TNIL: return encode_nil(L, oss);
case LUA_TNUMBER: return encode_number(L, oss);
case LUA_TBOOLEAN: return encode_boolean(L, oss);
case LUA_TSTRING: return encode_string(L, oss);
case LUA_TLIGHTUSERDATA: return encode_token(L, oss);
case LUA_TTABLE: {
if (use_array_representation(L)) {
return encode_array(L, oss, level, maxlen);
} else {
return encode_object(L, oss, level, maxlen);
}
}
default: {
store_error(oss, "cannot encode '", lua_typename(L, type), "'");
return false;
}
}
}
static bool decode_value(lua_State *L, std::string_view &v);
static bool decode_id(lua_State *L, std::string_view &v) {
std::string_view id = sv::read_ascii_identifier(v);
if (id == "null") lua_pushlightuserdata(L, LuaToken("null").voidvalue());
else if (id == "true") lua_pushboolean(L, 1);
else if (id == "false") lua_pushboolean(L, 0);
else return false;
return true;
}
static bool decode_number(lua_State *L, std::string_view &v) {
std::string_view n = sv::read_number(v, true, true, true, true);
if (n.empty()) return false;
// If it's an integer, make sure it fits in a lua double
// losslessly. If it's a double, some loss in precision
// is OK.
if (sv::valid_number(n, true, true, false, false)) {
int64_t i = sv::to_int64(n);
if (!LuaCoreStack::validinteger(i)) return false;
lua_pushnumber(L, double(i));
return true;
} else {
double d = sv::to_double(n);
if (std::isnan(d) || std::isinf(d)) return false;
lua_pushnumber(L, d);
return true;
}
}
static bool decode_base64_string(lua_State *L, std::string_view &v) {
// We've already read the starting quote and the E000
// escape sequence at this point.
// Skip the equal sign.
if (!sv::read_prefix(v, "=")) return false;
// Find the end of the quoted string.
const char *p = v.data();
const char *l = p + v.size();
while (true) {
if (p == l) return false;
if (*p < 32) return false;
if (*p == '"') break;
p++;
}
std::string_view b64 = v.substr(0, p - v.data());
v.remove_prefix(b64.size() + 1);
eng::ostringstream oss;
if (!util::base64_decode(b64, &oss)) return false;
eng::string str = oss.str();
lua_pushlstring(L, str.c_str(), str.size());
return true;
}
static bool decode_int_string(lua_State *L, std::string_view &v) {
// We've already read the starting quote and the E000
// escape sequence at this point.
// Parse the number and the closing quote.
std::string_view n = sv::read_number(v, true, true, false, false);
if (n.empty()) return false;
if (!sv::read_prefix(v, "\"")) {
return false;
}
// Make sure the number fits in a lua double,
// and push it on the stack.
int64_t i = sv::to_int64(n);
if (!LuaCoreStack::validinteger(i)) {
return false;
}
lua_pushnumber(L, double(i));
return true;
}
static bool decode_standard_string(lua_State *L, std::string_view &v) {
// We've already read the starting quote at this point.
eng::ostringstream oss;
while (true) {
// Get the next codepoint.
int32_t c = sv::read_codepoint_utf8(v);
// If it's a control character or invalid codepoint, reject.
if (c < 32) return false;
// If it is an unescaped quote, that's end of string.
if (c == '"') break;
// If it's a backslash, then deal with the escape sequence.
if (c == '\\') {
char next = sv::read_ascii_char(v);
switch (next) {
case '"': oss << '"'; break;
case '\\': oss << '\\'; break;
case '/': oss << '/'; break;
case 'r': oss << '\r'; break;
case 'n': oss <<'\n'; break;
case 'b': oss << '\b'; break;
case 'f': oss << '\f'; break;
case 't': oss << '\t'; break;
case 'u': {
std::string_view hexdigits = sv::read_nbytes(v, 4);
if (hexdigits.size() != 4) return false;
uint64_t codepoint = sv::to_hex64(hexdigits, 0x10000);
if (codepoint >= 0x10000) return false;
if (!util::write_codepoint_utf8(codepoint, &oss)) return false;
break;
}
default: return false;
}
continue;
}
// Any other codepoint should be echoed into stream.
util::write_codepoint_utf8(c, &oss);
}
eng::string result = oss.str();
lua_pushlstring(L, result.c_str(), result.size());
return true;
}
static bool decode_string(lua_State *L, std::string_view &v) {
if (!sv::read_prefix(v, "\"")) return false;
// Check for codepoint E000, the escape sequence.
if (sv::read_prefix(v, "") ||
sv::read_prefix(v, "\\uE000") ||
sv::read_prefix(v, "\\ue000")) {
char c = sv::zfront(v);
if (c == '=') return decode_base64_string(L, v);
else if ((c=='-') || (c=='+')) return decode_int_string(L, v);
else return false;
} else {
return decode_standard_string(L, v);
}
}
static bool decode_array(lua_State *L, std::string_view &v) {
if (!sv::read_prefix(v, "[")) return false;
lua_newtable(L);
int tabpos = lua_gettop(L);
int next = 1;
while (true) {
v = sv::ltrim(v);
if (sv::zfront(v) == ']') {
v.remove_prefix(1);
return true;
}
if (!decode_value(L, v)) {
return false;
}
v = sv::ltrim(v);
if (sv::zfront(v) == ',') {
v.remove_prefix(1);
}
lua_rawseti(L, tabpos, next++);
}
}
static bool decode_object(lua_State *L, std::string_view &v) {
if (!sv::read_prefix(v, "{")) return false;
lua_newtable(L);
int tabpos = lua_gettop(L);
while (true) {
v = sv::ltrim(v);
if (sv::zfront(v) == '}') {
v.remove_prefix(1);
return true;
}
if (!decode_string(L, v)) {
return false;
}
v = sv::ltrim(v);
if (!sv::read_prefix(v, ":")) {
return false;
}
if (!decode_value(L, v)) {
return false;
}
v = sv::ltrim(v);
if (sv::zfront(v) == ',') {
v.remove_prefix(1);
}
lua_rawset(L, tabpos);
}
}
// Decode a single value.
//
// On success, pushes the value on the stack and returns true.
// On failure, pushes NIL on the stack and returns false.
//
static bool decode_value(lua_State *L, std::string_view &v) {
lua_checkstack(L, 20);
int top = lua_gettop(L);
// Skip blanks.
v = sv::ltrim(v);
// Try to read something.
char c = sv::zfront(v);
bool result;
if (c == '"') result = decode_string(L, v);
else if (c == '[') result = decode_array(L, v);
else if (c == '{') result = decode_object(L, v);
else if (sv::ascii_isalpha(c)) result = decode_id(L, v);
else result = decode_number(L, v);
// On failure, the decode routines may leave junk
// on the stack, in which case it's our job to clean up.
if (result == false) {
lua_settop(L, top);
lua_pushnil(L);
}
// Now there should be exactly one new value on the stack.
assert(lua_gettop(L) == top + 1);
return result;
}
namespace json {
eng::string encode(LuaCoreStack &LS, LuaSlot in, eng::string &out, bool indent, int maxlen) {
eng::ostringstream oss;
// Call the recursive encoder. Clean up any crap on the lua stack afterward.
int top = lua_gettop(LS.state());
lua_pushvalue(LS.state(), in.index());
bool ok = encode_value(LS.state(), oss, indent ? 0 : NOINDENT_LEVEL, maxlen);
lua_settop(LS.state(), top);
// One last check for overruns.
if (ok && length_exceeded(oss, maxlen)) {
store_length_error(oss, maxlen);
ok = false;
}
// Produce the return value.
if (ok) {
out = oss.str();
return "";
} else {
out = "";
return oss.str();
}
}
bool decode(LuaCoreStack &LS, LuaSlot out, std::string_view v) {
lua_State *L = LS.state();
// Try to read a single value from the view.
int top = lua_gettop(L);
bool ok = decode_value(L, v);
lua_replace(L, out.index());
lua_settop(L, top);
if (!ok) {
LS.set(out, LuaToken("error"));
return false;
}
// Special case: if the top level value is 'null', change
// it to 'nil.'
if (LS.istoken(out)) {
LS.set(out, LuaNil);
}
// There should be nothing left of the input text.
if (v.size() > 0) {
LS.set(out, LuaToken("error"));
return false;
}
return true;
}
} // namespace util
LuaDefine(json_encode, "data, indent, maxlen",
"|Encode a lua data structure returning a json string."
"|"
"|Data is the value being encoded. Indent is a flag,"
"|if it's true, then the json is indented nicely,"
"|otherwise, it is packed tightly. Maxlen is the maximum"
"|length in bytes of the encoded json string."
"|"
"|Usually, Lua data translates straightforwardly to json."
"|However, there are a number of special cases to be"
"|aware of:"
"|"
"|- Closures and threads cannot be encoded. These will"
"| cause the encoder to abort."
"|"
"|- The numbers infinity and NAN cannot be encoded."
"| Both of these will cause the encoder to abort."
"|"
"|- You must specify a size-limit to the encoded"
"| string. Exceeding the size limit causes the"
"| encoder to abort."
"|"
"|- Recursive data structures will cause the encoder to"
"| loop infinitely until the size-limit is exceeded,"
"| causing the encoder to abort."
"|"
"|- There is no way to represent math.huge or math.nan in"
"| json. Encoding math.nan will cause the encoder to abort,"
"| as expected. However, encoding math.huge will emit null,"
"| which is probably not what you would expect."
"|"
"|- Lua tables cannot contain 'nil', but json objects and"
"| arrays can contain null. If you want the encoder to"
"| emit a json object or array containing null, you must"
"| use token json.null to represent null."
"|"
"|- Json objects, like lua tables, are key-value stores."
"| However, json objects can only have string keys. Our"
"| encoder uses a workaround to transparently"
"| allow mixing string and integer keys in json tables."
"| See 'encoding difficult data' below."
"|"
"|- Json strings are required to be valid utf-8. Our encoder"
"| uses a workaround to transparently allow the use of"
"| arbitrary 8-bit-clean strings. See 'encoding difficult"
"| data' below."
"|"
"|- Lua tables containing contiguous integer keys from 1-n are"
"| autodetected to be json arrays. Empty tables are also"
"| emitted as json arrays. All other tables are emitted"
"| as json objects."
"|"
"|- You can force a table to be emitted as a json object"
"| by putting the key-value pair table[json.object]=true"
"| into the table. This special key is not emitted, but"
"| it triggers json object mode. This is the only way"
"| to emit an empty json object (a truly empty table is"
"| emitted as a json array.)"
"|"
"|Encoding Difficult Data:"
"|"
"|Normally, json doesn't allow integer table keys, and it"
"|doesn't allow strings that aren't valid utf-8. Our"
"|json encoder and decoder, on the other hand, can"
"|encode and decode integer table keys and 8-bit-clean"
"|strings transparently. This is accomplished without"
"|violating the json specification, by encoding such"
"|values as utf-8 strings:"
"|"
"| '123' (encoded integer 123)"
"| '=aGVsbG8=' (binary string encoded as base64)"
"|"
"|Those encodings start with utf-8 codepoint E000."
"|This codepoint probably shows up in your text editor"
"|as a little rectangle. When the decoder sees codepoint"
"|E000 at the beginning of a string, it automatically"
"|decodes the string back into its original form."
"|"
"|The one price for this behavior is that the encoder"
"|cannot literally emit strings that start"
"|with codepoint E000. If the encoder detects such a"
"|string, it will emit it as a base64-encoded string."
"|This should be uncommon, since codepoint E000 is"
"|reserved."
"|"
"|Note that integers are only encoded when they are"
"|used as table keys. Otherwise, numbers are emitted"
"|straightforwardly."
"|") {
LuaArg data, indent, maxlen;
LuaRet encoded;
LuaDefStack LS(L, data, indent, maxlen, encoded);
eng::string out;
eng::string error = json::encode(LS, data, out, LS.ckboolean(indent), LS.ckint(maxlen));
if (!error.empty()) {
luaL_error(L, "%s", error.c_str());
LS.set(encoded, LuaNil);
return LS.result();
} else {
LS.set(encoded, out);
return LS.result();
}
}
LuaDefine(json_decode, "data",
"|Decode a json expression into a lua data structure."
"|"
"|Data that was generated by our own encoder is almost"
"|8-bit clean. That includes difficult cases, like"
"|binary strings, floating point numbers, and tables"
"|with mixed string and integer keys. The exception"
"|are the kinds of data that can't be encoded at all:"
"|See doc(json.encode) for details about what"
"|can and cannot be encoded."
"|"
"|Some json may contain 'null' inside objects and"
"|arrays. Lua tables can't store nil, so instead, we"
"|store the token json.null. If that's not what you"
"|want, you can use json.stripnulls to strip out"
"|the json.null values from a data structure and"
"|replace them with nil."
"|"
"|") {
LuaArg encoded;
LuaRet data;
LuaDefStack LS(L, encoded, data);
std::string_view v = LS.ckstringview(encoded);
bool ok = json::decode(LS, data, v);
if (!ok) {
luaL_error(L, "invalid json string.");
}
return LS.result();
}
// LuaDefine(base64_encode, "data", "") {
// LuaArg str;
// LuaRet ret;
// LuaDefStack LS(L, str, ret);
// eng::string cstr = LS.ckstring(str);
// eng::ostringstream oss;
// util::base64_encode(cstr, &oss);
// LS.set(ret, oss.str());
// return LS.result();
// }
// LuaDefine(base64_decode, "data", "") {
// LuaArg str;
// LuaRet ret;
// LuaDefStack LS(L, str, ret);
// eng::string cstr = LS.ckstring(str);
// eng::ostringstream oss;
// util::base64_decode(cstr, &oss);
// LS.set(ret, oss.str());
// return LS.result();
// }