json.encode and json.decode finished. Also lots of refactoring.

2022-06-06 23:03:26 -04:00
parent f03a48b0a6
commit 779d9e20b8
11 changed files with 1292 additions and 109 deletions
--- a/luprex/core/cpp/util.cpp
+++ b/luprex/core/cpp/util.cpp
@@ -65,8 +65,10 @@ bool valid_double(string_view value) {

 int64_t to_int64(string_view value, int64_t errval) {
    int64_t result;
-    const char *last = value.data() + value.size();
-    auto r = std::from_chars(value.data(), last, result, 10);
+    const char *p = value.data();
+    const char *last = p + value.size();
+    if ((p < last) && (*p == '+')) p++;
+    auto r = std::from_chars(p, last, result, 10);
    if (r.ec != std::errc()) return errval;
    if (r.ptr != last) return errval;
    return result;
@@ -74,6 +76,7 @@ int64_t to_int64(string_view value, int64_t errval) {

 uint64_t to_hex64(string_view value, uint64_t errval) {
    uint64_t result;
+    if (sv::zfront(value) == '-') return errval;
    const char *last = value.data() + value.size();
    auto r = std::from_chars(value.data(), last, result, 16);
    if (r.ec != std::errc()) return errval;
@@ -204,6 +207,15 @@ string_view read_to_line(string_view &source) {
    return result;
 }

+bool read_prefix(string_view &source, string_view prefix) {
+    if (0 == source.compare(0, prefix.size(), prefix)) {
+        source.remove_prefix(prefix.size());
+        return true;
+    } else {
+        return false;
+    }
+}
+
 string_view read_to_space(string_view &source) {
    size_t pos1 = 0;
    while ((pos1 < source.size()) && (!ascii_isspace(source[pos1]))) {
@@ -243,57 +255,119 @@ string_view read_ascii_identifier(string_view &source) {
    return result;
 }

+std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp) {
+    const char *p = source.data();
+    const char *l = p + source.size();
+    if (p == l) return source.substr(0, 0);
+    char sign = *p;
+    if (sign == '+') {
+        if (!plus) return source.substr(0, 0);
+        p++;
+    }
+    if (sign == '-') {
+        if (!minus) return source.substr(0, 0);
+        p++;
+    }
+    if (p == l) return source.substr(0, 0);
+    bool have_digits = false;
+    while ((p < l) && (ascii_isdigit(*p))) {
+        have_digits = true;
+        p++;
+    }
+    if ((p < l) && dec && (*p == '.')) {
+        p++;
+        while ((p < l) && (ascii_isdigit(*p))) {
+            have_digits = true;
+            p++;
+        }
+    }
+    if (!have_digits) return source.substr(0, 0);
+    if ((p < l) && exp && ((*p == 'e')||(*p == 'E'))) {
+        p++;
+        if ((p < l) && ((*p == '+') || (*p == '-'))) {
+            p++;
+        }
+        bool have_exp = false;
+        while ((p < l) && (ascii_isdigit(*p))) {
+            have_exp = true;
+            p++;
+        }
+        if (!have_exp) return source.substr(0, 0);
+    }
+    string_view result = source.substr(0, p - source.data());
+    source.remove_prefix(result.size());
+    return result;
+}
+
+int32_t read_ascii_char(string_view &source) {
+    if (source.empty()) return -1;
+    int32_t result = source.front();
+    source.remove_prefix(1);
+    return result;
+}
+
+int32_t read_codepoint_utf8(string_view &source) {
+    size_t size = source.size();
+    if (size == 0) return -1;
+    const unsigned char *bytes = (const unsigned char *)source.data();
+    int codepoint;
+    size_t seqlen;
+    if ((bytes[0] & 0x80) == 0x00) {
+        // U+0000 to U+007F
+        codepoint = (bytes[0] & 0x7F);
+        seqlen = 1;
+    } else if ((bytes[0] & 0xE0) == 0xC0) {
+        // U+0080 to U+07FF
+        codepoint = (bytes[0] & 0x1F);
+        seqlen = 2;
+    } else if ((bytes[0] & 0xF0) == 0xE0) {
+        // U+0800 to U+FFFF
+        codepoint = (bytes[0] & 0x0F);
+        seqlen = 3;
+    } else if ((bytes[0] & 0xF8) == 0xF0) {
+        // U+10000 to U+10FFFF
+        codepoint = (bytes[0] & 0x07);
+        seqlen = 4;
+    } else {
+        return -1;
+    }
+
+    if (seqlen > size) {
+        return -1;
+    }
+
+    for (size_t i = 1; i < seqlen; ++i) {
+        if ((bytes[i] & 0xC0) != 0x80) return -1;
+        codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
+    }
+
+    if ((codepoint > 0x10FFFF) ||
+        ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
+        ((codepoint <= 0x007F) && (seqlen != 1)) ||
+        ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
+        ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
+        ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
+        return -1;
+    }
+
+    source.remove_prefix(seqlen);
+    return codepoint;
+}
+
 bool valid_utf8(string_view s)
 {
-    const unsigned char *bytes = (const unsigned char *)s.data();
-    const unsigned char *tail = bytes + s.size();
-    unsigned int codepoint;
-    int seqlen;
-
-    while (bytes < tail) {
-        if ((bytes[0] & 0x80) == 0x00) {
-            // U+0000 to U+007F
-            codepoint = (bytes[0] & 0x7F);
-            seqlen = 1;
-        } else if ((bytes[0] & 0xE0) == 0xC0) {
-            // U+0080 to U+07FF
-            codepoint = (bytes[0] & 0x1F);
-            seqlen = 2;
-        } else if ((bytes[0] & 0xF0) == 0xE0) {
-            // U+0800 to U+FFFF
-            codepoint = (bytes[0] & 0x0F);
-            seqlen = 3;
-        } else if ((bytes[0] & 0xF8) == 0xF0) {
-            // U+10000 to U+10FFFF
-            codepoint = (bytes[0] & 0x07);
-            seqlen = 4;
-        } else {
-            return false;
-        }
-
-        if (bytes + seqlen > tail) {
-            return false;
-        }
-
-        for (int i = 1; i < seqlen; ++i) {
-            if ((bytes[i] & 0xC0) != 0x80) return false;
-            codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
-        }
-
-        if ((codepoint > 0x10FFFF) ||
-            ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
-            ((codepoint <= 0x007F) && (seqlen != 1)) ||
-            ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
-            ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
-            ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
-            return false;
-        }
-
-        bytes += seqlen;
+    while (!s.empty()) {
+        int32_t codepoint = read_codepoint_utf8(s);
+        if (codepoint < 0) return false;
    }
    return true;
 }

+bool valid_number(string_view s, bool plus, bool minus, bool dec, bool exp) {
+    read_number(s, plus, minus, dec, exp);
+    return s.empty();
+}
+
 } // namespace sv


@@ -334,6 +408,8 @@ void quote_string(const eng::string &s, std::ostream *os) {
                (*os) << (usesinglequote ? "\"" : "\\\"");
            } else if (c == '\'') {
                (*os) << (usesinglequote ? "\\'" : "'");
+            } else if (c == '\\') {
+                (*os) << "\\\\"; 
            } else {
                (*os) << c;
            }
@@ -344,7 +420,7 @@ void quote_string(const eng::string &s, std::ostream *os) {
            case '\t': (*os) << "\\t"; break;
            case '\r': (*os) << "\\r"; break;
            default:
-                (*os) << "\\" << std::setfill('0') << std::setw(3) << value;
+                (*os) << "\\" << dec.width(3).fill('0').val(value);
                break;
            }
        }
@@ -352,6 +428,52 @@ void quote_string(const eng::string &s, std::ostream *os) {
    (*os) << (usesinglequote ? '\'' : '"');
 }

+void base64_encode(std::string_view str, std::ostream *oss) {
+    const char *encode_tab = 
+        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+    const char *s = str.data();
+    size_t size = str.size();
+    for (size_t i = 0; i < size; i += 3) {
+        uint32_t block = ((unsigned char)(s[i])) << 16;
+        if (i + 1 < size) block |= ((unsigned char)(s[i + 1])) << 8;
+        if (i + 2 < size) block |= ((unsigned char)(s[i + 2]));
+        (*oss) << encode_tab[(block>>18)&0x3F];
+        (*oss) << encode_tab[(block>>12)&0x3F];
+        (*oss) << ((i + 1 < size) ? encode_tab[(block>>6)&0x3F] : '=');
+        (*oss) << ((i + 2 < size) ? encode_tab[(block>>0)&0x3F] : '=');
+    }
+}
+
+bool base64_decode(std::string_view str, std::ostream *oss) {
+    uint32_t chunk = 0;
+    int fill = 0;
+    int skip = 0;
+    bool clean = true;
+    for (int i = 0; i < int(str.size()); i++) {
+        char c = str[i];
+        uint32_t value;
+
+        if      ((c >= 'A') && (c <= 'Z')) value = c - 'A';
+        else if ((c >= 'a') && (c <= 'z')) value = c - 'a' + 26;
+        else if ((c >= '0') && (c <= '9')) value = c - '0' + 52;
+        else if (c == '+') value = 62;
+        else if (c == '/') value = 63;
+        else if (c == '=') { value = 0; skip ++; }
+        else { clean=false; continue; }
+
+        chunk = (chunk << 6) | value;
+        fill ++;
+        if (fill == 4) {
+            oss->put((chunk>>16) & 0xFF);
+            if (skip < 2) oss->put((chunk>>8) & 0xFF);
+            if (skip < 1) oss->put(chunk & 0xFF);
+            chunk = 0; fill = 0; skip = 0;
+        }
+    }
+    if (fill != 0) clean = false;
+    return clean;
+}
+
 IdVector id_vector_create(int64_t id1, int64_t id2, int64_t id3, int64_t id4) {
    IdVector result;
    if (id1 >= 0) result.push_back(id1);
@@ -406,8 +528,7 @@ HashValue hash_id_vector(const IdVector &idv) {

 eng::string hash_to_hex(const HashValue &hv) {
    eng::ostringstream oss;
-    oss << std::hex << std::setw(16) << std::setfill('0') << hv.first;
-    oss << std::hex << std::setw(16) << std::setfill('0') << hv.second;
+    oss << hex64.val(hv.first) << hex64.val(hv.second);
    return oss.str();    
 }
 static inline uint64_t Rot64(uint64_t x, int k)
@@ -530,6 +651,52 @@ eng::string toupper(eng::string input) {
    return input;
 }

+static void buffer_codepoint_utf8(int32_t scp, char *buffer) {
+    uint32_t cp = (uint32_t)scp;
+    unsigned char *c = (unsigned char *)buffer;
+    if (cp <= 0x7F) {
+        c[0] = cp;
+        c[1] = 0;
+    }
+    else if (cp <= 0x7FF) {
+        c[0] = (cp>>6)+192;
+        c[1] = (cp&63)+128;
+        c[2] = 0;
+    }
+    else if (cp <= 0xFFFF) {
+        if (0xd800 <= cp && cp <= 0xdfff) {
+            c[0] = 0;
+        } else {
+            c[0] = (cp>>12)+224;
+            c[1] = ((cp>>6)&63)+128;
+            c[2] = (cp&63)+128;
+            c[3] = 0;
+        }
+    }
+    else if (cp <= 0x10FFFF) {
+        c[0] = (cp>>18)+240;
+        c[1] = ((cp>>12)&63)+128;
+        c[2] = ((cp>>6)&63)+128;
+        c[3] = (cp&63)+128;
+        c[4] = 0;
+    } else {
+        c[0] = 0;
+    }
+}
+
+eng::string get_codepoint_utf8(uint32_t cp) {
+    char buffer[5];
+    buffer_codepoint_utf8(cp, buffer);
+    return eng::string(buffer);
+}
+
+bool write_codepoint_utf8(int32_t cp, std::ostream *s) {
+    char buffer[5];
+    buffer_codepoint_utf8(cp, buffer);
+    (*s) << buffer;
+    return buffer[0] != 0;
+}
+
 double distance_squared(double x1, double y1, double x2, double y2) {
    double dx = x1 - x2;
    double dy = y1 - y2;
@@ -549,35 +716,20 @@ eng::string XYZ::debug_string() const {
    return oss.str();
 }

-
 } // namespace util

-std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) {
-    oss << "0x" << std::setw(16) << std::setfill('0') << std::hex;
-    return oss;
-}

-std::ostream &operator<<(std::ostream &oss, const util::hex32 &v) {
-    oss << "0x" << std::setw(8) << std::setfill('0') << std::hex;
-    return oss;
-}
-
-std::ostream &operator<<(std::ostream &oss, const util::hex16 &v) {
-    oss << "0x" << std::setw(4) << std::setfill('0') << std::hex;
-    return oss;
-}
-
-std::ostream &operator<<(std::ostream &oss, const util::hex8 &v) {
-    oss << "0x" << std::setw(2) << std::setfill('0') << std::hex;
-    return oss;
+static std::string_view read_number_x(const char *p, bool plus, bool minus, bool dec, bool exp) {
+    std::string_view source = p;
+    return sv::read_number(source, plus, minus, dec, exp);
 }

 LuaDefine(unittests_util, "", "some unit tests") {
        // test str_to_int64, str_to_double
    LuaAssert(L, sv::to_int64("123") == 123);
-    LuaAssert(L, sv::to_int64("123.4") == INT64_MIN);
-    LuaAssert(L, sv::to_int64("12ab") == INT64_MIN);
-    LuaAssert(L, sv::to_int64("") == INT64_MIN);
+    LuaAssert(L, sv::to_int64("123.4") == INT64_MAX);
+    LuaAssert(L, sv::to_int64("12ab") == INT64_MAX);
+    LuaAssert(L, sv::to_int64("") == INT64_MAX);
    LuaAssert(L, sv::to_double("123.5") == 123.5);
    LuaAssert(L, std::isnan(sv::to_double("12ab")));
    LuaAssert(L, std::isnan(sv::to_double("")));
@@ -689,6 +841,20 @@ LuaDefine(unittests_util, "", "some unit tests") {
    LuaAssert(L, util::hash_to_double(0x1000000000000000) == 1.0/16.0);
    LuaAssert(L, util::hash_to_double(0x7000000000000000) == 7.0/16.0);
    LuaAssert(L, util::hash_to_double(0xF000000000000000) == 15.0/16.0);
+
+    // Test read_number allowing everything.
+    LuaAssert(L, read_number_x("123x", true, true, true, true) == "123");
+    LuaAssert(L, read_number_x("123.3x", true, true, true, true) == "123.3");
+    LuaAssert(L, read_number_x("123.x", true, true, true, true) == "123.");
+    LuaAssert(L, read_number_x("123..x", true, true, true, true) == "123.");
+    LuaAssert(L, read_number_x("-123x", true, true, true, true) == "-123");
+    LuaAssert(L, read_number_x("+123x", true, true, true, true) == "+123");
+    LuaAssert(L, read_number_x("+-123x", true, true, true, true) == "");
+    LuaAssert(L, read_number_x("-123.02e05x", true, true, true, true) == "-123.02e05");
+    LuaAssert(L, read_number_x("-123e-5x", true, true, true, true) == "-123e-5");
+    LuaAssert(L, read_number_x("-123e+5x", true, true, true, true) == "-123e+5");
+    LuaAssert(L, read_number_x("-123e+x", true, true, true, true) == "");
+    
    return 0;
 }