json.encode and json.decode finished. Also lots of refactoring.

This commit is contained in:
2022-06-06 23:03:26 -04:00
parent f03a48b0a6
commit 779d9e20b8
11 changed files with 1292 additions and 109 deletions

View File

@@ -65,8 +65,10 @@ bool valid_double(string_view value) {
int64_t to_int64(string_view value, int64_t errval) {
int64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 10);
const char *p = value.data();
const char *last = p + value.size();
if ((p < last) && (*p == '+')) p++;
auto r = std::from_chars(p, last, result, 10);
if (r.ec != std::errc()) return errval;
if (r.ptr != last) return errval;
return result;
@@ -74,6 +76,7 @@ int64_t to_int64(string_view value, int64_t errval) {
uint64_t to_hex64(string_view value, uint64_t errval) {
uint64_t result;
if (sv::zfront(value) == '-') return errval;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 16);
if (r.ec != std::errc()) return errval;
@@ -204,6 +207,15 @@ string_view read_to_line(string_view &source) {
return result;
}
bool read_prefix(string_view &source, string_view prefix) {
if (0 == source.compare(0, prefix.size(), prefix)) {
source.remove_prefix(prefix.size());
return true;
} else {
return false;
}
}
string_view read_to_space(string_view &source) {
size_t pos1 = 0;
while ((pos1 < source.size()) && (!ascii_isspace(source[pos1]))) {
@@ -243,57 +255,119 @@ string_view read_ascii_identifier(string_view &source) {
return result;
}
std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp) {
const char *p = source.data();
const char *l = p + source.size();
if (p == l) return source.substr(0, 0);
char sign = *p;
if (sign == '+') {
if (!plus) return source.substr(0, 0);
p++;
}
if (sign == '-') {
if (!minus) return source.substr(0, 0);
p++;
}
if (p == l) return source.substr(0, 0);
bool have_digits = false;
while ((p < l) && (ascii_isdigit(*p))) {
have_digits = true;
p++;
}
if ((p < l) && dec && (*p == '.')) {
p++;
while ((p < l) && (ascii_isdigit(*p))) {
have_digits = true;
p++;
}
}
if (!have_digits) return source.substr(0, 0);
if ((p < l) && exp && ((*p == 'e')||(*p == 'E'))) {
p++;
if ((p < l) && ((*p == '+') || (*p == '-'))) {
p++;
}
bool have_exp = false;
while ((p < l) && (ascii_isdigit(*p))) {
have_exp = true;
p++;
}
if (!have_exp) return source.substr(0, 0);
}
string_view result = source.substr(0, p - source.data());
source.remove_prefix(result.size());
return result;
}
int32_t read_ascii_char(string_view &source) {
if (source.empty()) return -1;
int32_t result = source.front();
source.remove_prefix(1);
return result;
}
int32_t read_codepoint_utf8(string_view &source) {
size_t size = source.size();
if (size == 0) return -1;
const unsigned char *bytes = (const unsigned char *)source.data();
int codepoint;
size_t seqlen;
if ((bytes[0] & 0x80) == 0x00) {
// U+0000 to U+007F
codepoint = (bytes[0] & 0x7F);
seqlen = 1;
} else if ((bytes[0] & 0xE0) == 0xC0) {
// U+0080 to U+07FF
codepoint = (bytes[0] & 0x1F);
seqlen = 2;
} else if ((bytes[0] & 0xF0) == 0xE0) {
// U+0800 to U+FFFF
codepoint = (bytes[0] & 0x0F);
seqlen = 3;
} else if ((bytes[0] & 0xF8) == 0xF0) {
// U+10000 to U+10FFFF
codepoint = (bytes[0] & 0x07);
seqlen = 4;
} else {
return -1;
}
if (seqlen > size) {
return -1;
}
for (size_t i = 1; i < seqlen; ++i) {
if ((bytes[i] & 0xC0) != 0x80) return -1;
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
}
if ((codepoint > 0x10FFFF) ||
((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
((codepoint <= 0x007F) && (seqlen != 1)) ||
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
return -1;
}
source.remove_prefix(seqlen);
return codepoint;
}
bool valid_utf8(string_view s)
{
const unsigned char *bytes = (const unsigned char *)s.data();
const unsigned char *tail = bytes + s.size();
unsigned int codepoint;
int seqlen;
while (bytes < tail) {
if ((bytes[0] & 0x80) == 0x00) {
// U+0000 to U+007F
codepoint = (bytes[0] & 0x7F);
seqlen = 1;
} else if ((bytes[0] & 0xE0) == 0xC0) {
// U+0080 to U+07FF
codepoint = (bytes[0] & 0x1F);
seqlen = 2;
} else if ((bytes[0] & 0xF0) == 0xE0) {
// U+0800 to U+FFFF
codepoint = (bytes[0] & 0x0F);
seqlen = 3;
} else if ((bytes[0] & 0xF8) == 0xF0) {
// U+10000 to U+10FFFF
codepoint = (bytes[0] & 0x07);
seqlen = 4;
} else {
return false;
}
if (bytes + seqlen > tail) {
return false;
}
for (int i = 1; i < seqlen; ++i) {
if ((bytes[i] & 0xC0) != 0x80) return false;
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
}
if ((codepoint > 0x10FFFF) ||
((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
((codepoint <= 0x007F) && (seqlen != 1)) ||
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
return false;
}
bytes += seqlen;
while (!s.empty()) {
int32_t codepoint = read_codepoint_utf8(s);
if (codepoint < 0) return false;
}
return true;
}
bool valid_number(string_view s, bool plus, bool minus, bool dec, bool exp) {
read_number(s, plus, minus, dec, exp);
return s.empty();
}
} // namespace sv
@@ -334,6 +408,8 @@ void quote_string(const eng::string &s, std::ostream *os) {
(*os) << (usesinglequote ? "\"" : "\\\"");
} else if (c == '\'') {
(*os) << (usesinglequote ? "\\'" : "'");
} else if (c == '\\') {
(*os) << "\\\\";
} else {
(*os) << c;
}
@@ -344,7 +420,7 @@ void quote_string(const eng::string &s, std::ostream *os) {
case '\t': (*os) << "\\t"; break;
case '\r': (*os) << "\\r"; break;
default:
(*os) << "\\" << std::setfill('0') << std::setw(3) << value;
(*os) << "\\" << dec.width(3).fill('0').val(value);
break;
}
}
@@ -352,6 +428,52 @@ void quote_string(const eng::string &s, std::ostream *os) {
(*os) << (usesinglequote ? '\'' : '"');
}
void base64_encode(std::string_view str, std::ostream *oss) {
const char *encode_tab =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
const char *s = str.data();
size_t size = str.size();
for (size_t i = 0; i < size; i += 3) {
uint32_t block = ((unsigned char)(s[i])) << 16;
if (i + 1 < size) block |= ((unsigned char)(s[i + 1])) << 8;
if (i + 2 < size) block |= ((unsigned char)(s[i + 2]));
(*oss) << encode_tab[(block>>18)&0x3F];
(*oss) << encode_tab[(block>>12)&0x3F];
(*oss) << ((i + 1 < size) ? encode_tab[(block>>6)&0x3F] : '=');
(*oss) << ((i + 2 < size) ? encode_tab[(block>>0)&0x3F] : '=');
}
}
bool base64_decode(std::string_view str, std::ostream *oss) {
uint32_t chunk = 0;
int fill = 0;
int skip = 0;
bool clean = true;
for (int i = 0; i < int(str.size()); i++) {
char c = str[i];
uint32_t value;
if ((c >= 'A') && (c <= 'Z')) value = c - 'A';
else if ((c >= 'a') && (c <= 'z')) value = c - 'a' + 26;
else if ((c >= '0') && (c <= '9')) value = c - '0' + 52;
else if (c == '+') value = 62;
else if (c == '/') value = 63;
else if (c == '=') { value = 0; skip ++; }
else { clean=false; continue; }
chunk = (chunk << 6) | value;
fill ++;
if (fill == 4) {
oss->put((chunk>>16) & 0xFF);
if (skip < 2) oss->put((chunk>>8) & 0xFF);
if (skip < 1) oss->put(chunk & 0xFF);
chunk = 0; fill = 0; skip = 0;
}
}
if (fill != 0) clean = false;
return clean;
}
IdVector id_vector_create(int64_t id1, int64_t id2, int64_t id3, int64_t id4) {
IdVector result;
if (id1 >= 0) result.push_back(id1);
@@ -406,8 +528,7 @@ HashValue hash_id_vector(const IdVector &idv) {
eng::string hash_to_hex(const HashValue &hv) {
eng::ostringstream oss;
oss << std::hex << std::setw(16) << std::setfill('0') << hv.first;
oss << std::hex << std::setw(16) << std::setfill('0') << hv.second;
oss << hex64.val(hv.first) << hex64.val(hv.second);
return oss.str();
}
static inline uint64_t Rot64(uint64_t x, int k)
@@ -530,6 +651,52 @@ eng::string toupper(eng::string input) {
return input;
}
static void buffer_codepoint_utf8(int32_t scp, char *buffer) {
uint32_t cp = (uint32_t)scp;
unsigned char *c = (unsigned char *)buffer;
if (cp <= 0x7F) {
c[0] = cp;
c[1] = 0;
}
else if (cp <= 0x7FF) {
c[0] = (cp>>6)+192;
c[1] = (cp&63)+128;
c[2] = 0;
}
else if (cp <= 0xFFFF) {
if (0xd800 <= cp && cp <= 0xdfff) {
c[0] = 0;
} else {
c[0] = (cp>>12)+224;
c[1] = ((cp>>6)&63)+128;
c[2] = (cp&63)+128;
c[3] = 0;
}
}
else if (cp <= 0x10FFFF) {
c[0] = (cp>>18)+240;
c[1] = ((cp>>12)&63)+128;
c[2] = ((cp>>6)&63)+128;
c[3] = (cp&63)+128;
c[4] = 0;
} else {
c[0] = 0;
}
}
eng::string get_codepoint_utf8(uint32_t cp) {
char buffer[5];
buffer_codepoint_utf8(cp, buffer);
return eng::string(buffer);
}
bool write_codepoint_utf8(int32_t cp, std::ostream *s) {
char buffer[5];
buffer_codepoint_utf8(cp, buffer);
(*s) << buffer;
return buffer[0] != 0;
}
double distance_squared(double x1, double y1, double x2, double y2) {
double dx = x1 - x2;
double dy = y1 - y2;
@@ -549,35 +716,20 @@ eng::string XYZ::debug_string() const {
return oss.str();
}
} // namespace util
std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) {
oss << "0x" << std::setw(16) << std::setfill('0') << std::hex;
return oss;
}
std::ostream &operator<<(std::ostream &oss, const util::hex32 &v) {
oss << "0x" << std::setw(8) << std::setfill('0') << std::hex;
return oss;
}
std::ostream &operator<<(std::ostream &oss, const util::hex16 &v) {
oss << "0x" << std::setw(4) << std::setfill('0') << std::hex;
return oss;
}
std::ostream &operator<<(std::ostream &oss, const util::hex8 &v) {
oss << "0x" << std::setw(2) << std::setfill('0') << std::hex;
return oss;
static std::string_view read_number_x(const char *p, bool plus, bool minus, bool dec, bool exp) {
std::string_view source = p;
return sv::read_number(source, plus, minus, dec, exp);
}
LuaDefine(unittests_util, "", "some unit tests") {
// test str_to_int64, str_to_double
LuaAssert(L, sv::to_int64("123") == 123);
LuaAssert(L, sv::to_int64("123.4") == INT64_MIN);
LuaAssert(L, sv::to_int64("12ab") == INT64_MIN);
LuaAssert(L, sv::to_int64("") == INT64_MIN);
LuaAssert(L, sv::to_int64("123.4") == INT64_MAX);
LuaAssert(L, sv::to_int64("12ab") == INT64_MAX);
LuaAssert(L, sv::to_int64("") == INT64_MAX);
LuaAssert(L, sv::to_double("123.5") == 123.5);
LuaAssert(L, std::isnan(sv::to_double("12ab")));
LuaAssert(L, std::isnan(sv::to_double("")));
@@ -689,6 +841,20 @@ LuaDefine(unittests_util, "", "some unit tests") {
LuaAssert(L, util::hash_to_double(0x1000000000000000) == 1.0/16.0);
LuaAssert(L, util::hash_to_double(0x7000000000000000) == 7.0/16.0);
LuaAssert(L, util::hash_to_double(0xF000000000000000) == 15.0/16.0);
// Test read_number allowing everything.
LuaAssert(L, read_number_x("123x", true, true, true, true) == "123");
LuaAssert(L, read_number_x("123.3x", true, true, true, true) == "123.3");
LuaAssert(L, read_number_x("123.x", true, true, true, true) == "123.");
LuaAssert(L, read_number_x("123..x", true, true, true, true) == "123.");
LuaAssert(L, read_number_x("-123x", true, true, true, true) == "-123");
LuaAssert(L, read_number_x("+123x", true, true, true, true) == "+123");
LuaAssert(L, read_number_x("+-123x", true, true, true, true) == "");
LuaAssert(L, read_number_x("-123.02e05x", true, true, true, true) == "-123.02e05");
LuaAssert(L, read_number_x("-123e-5x", true, true, true, true) == "-123e-5");
LuaAssert(L, read_number_x("-123e+5x", true, true, true, true) == "-123e+5");
LuaAssert(L, read_number_x("-123e+x", true, true, true, true) == "");
return 0;
}