integration/luprex/cpp/drv/drvutil.cpp


#include "drvutil.hpp"

#include <string_view>
#include <vector>
#include <sstream>
#include <fstream>
#include <iostream>
#include <filesystem>

namespace drvutil {


inline static bool ascii_isspace(char c) {
    return (c==' ')||(c=='\t')||(c=='\r')||(c=='\n')||(c=='\f')||(c=='\v');
}

std::string_view trim(std::string_view v) {
    while ((!v.empty()) && (ascii_isspace(v.front()))) {
        v.remove_prefix(1);
    }
    while ((!v.empty()) && (ascii_isspace(v.back()))) {
        v.remove_suffix(1);
    }
    return v;
}

static std::string_view read_to_line(std::string_view &source) {
    size_t pos = source.find('\n');
    std::string_view result;
    if (pos == std::string_view::npos) {
        result = source;
        source = std::string_view();
    } else {
        result = source.substr(0, pos);
        source = source.substr(pos + 1);
    }
    if ((!result.empty()) && (result.back() == '\r')) {
        result.remove_suffix(1);
    }
    return result;
}

std::vector<std::string_view> split_view(std::string_view v, char sep) {
    std::vector<std::string_view> result;
    while (true) {
        size_t pos = v.find(sep);
        if (pos == std::string_view::npos) break;
        result.push_back(v.substr(0, pos));
        v = v.substr(pos + 1);
    }
    result.push_back(v);
    return result;
}

void split_target(std::string_view target, std::string &cert, std::string &host, std::string &port) {
    std::vector<std::string_view> split = split_view(target, ':');
    if (split.size() != 3) {
        cert.clear(); host.clear(); port.clear();
        return;
    }
    if (split[0].empty() || split[1].empty() || split[2].empty()) {
        cert.clear(); host.clear(); port.clear();
        return;
    }
    cert = std::string(split[0]);
    host = std::string(split[1]);
    port = std::string(split[2]);
}

bool is_single_wchar_t(char32_t c) {
    if ((c >= 0xD800) && (c <= 0xDFFF)) return false;
    if ((c >= 0) && (c <= 0xFFFF)) return true;
    return false;
}

static int buffer_codepoint_utf8(char32_t scp, char *buffer) {
    uint32_t cp = (uint32_t)scp;
    unsigned char *c = (unsigned char *)buffer;
    if (cp < 0) {
        return 0;
    }
    else if (cp <= 0x7F) {
        c[0] = cp;
        return 1;
    }
    else if (cp <= 0x7FF) {
        c[0] = (cp>>6)+192;
        c[1] = (cp&63)+128;
        return 2;
    }
    else if (cp <= 0xFFFF) {
        if ((cp >= 0xD800) && (cp <= 0xDFFF)) {
            return 0;
        }
        c[0] = (cp>>12)+224;
        c[1] = ((cp>>6)&63)+128;
        c[2] = (cp&63)+128;
        return 3;
    }
    else if (cp <= 0x10FFFF) {
        c[0] = (cp>>18)+240;
        c[1] = ((cp>>12)&63)+128;
        c[2] = ((cp>>6)&63)+128;
        c[3] = (cp&63)+128;
        return 4;
    } else {
        return 0;
    }
}

static int32_t read_codepoint_utf16(std::u16string_view &source) {
    if (source.empty()) return -1;

    int32_t word0 = ((const uint16_t *)source.data())[0];
    source.remove_prefix(1);

    if (word0 < 0xD800) {
        return word0;
    } else if (word0 < 0xDC00) {
        if (source.empty()) {
            return -2;
        }
        int32_t word1 = ((const uint16_t *)source.data())[0];
        if ((word1 < 0xDC00)||(word1 > 0xDFFF)) {
            return -2;
        }
        int32_t part1 = word0 & 0x3FF;
        int32_t part2 = word1 & 0x3FF;
        int32_t result = ((part1 << 10) | part2) + 0x10000;
        source.remove_prefix(1);
        return result;
    } else if (word0 < 0xE000) {
        return -2;
    } else {
        return word0;
    }
}

static int32_t read_codepoint_utf8(std::string_view &source) {
    size_t size = source.size();
    if (size == 0) return -1;

    const unsigned char *bytes = (const unsigned char *)source.data();
    int codepoint;
    size_t seqlen;
    if ((bytes[0] & 0x80) == 0x00) {
        // U+0000 to U+007F
        codepoint = (bytes[0] & 0x7F);
        seqlen = 1;
    } else if ((bytes[0] & 0xE0) == 0xC0) {
        // U+0080 to U+07FF
        codepoint = (bytes[0] & 0x1F);
        seqlen = 2;
    } else if ((bytes[0] & 0xF0) == 0xE0) {
        // U+0800 to U+FFFF
        codepoint = (bytes[0] & 0x0F);
        seqlen = 3;
    } else if ((bytes[0] & 0xF8) == 0xF0) {
        // U+10000 to U+10FFFF
        codepoint = (bytes[0] & 0x07);
        seqlen = 4;
    } else {
        // Bad character. return invalid CP.
        return -2;
    }

    if (seqlen > size) {
        return -1;
    }

    for (size_t i = 1; i < seqlen; ++i) {
        if ((bytes[i] & 0xC0) != 0x80) {
            // Bad character. return invalid CP.
            return -2;
        }
        codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
    }

    if ((codepoint > 0x10FFFF) ||
        ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
        ((codepoint <= 0x007F) && (seqlen != 1)) ||
        ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
        ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
        ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
        // Bad character. return invalid CP.
        return -2;
    }

    source.remove_prefix(seqlen);
    return codepoint;
}

std::string utf32_to_utf8(const std::u32string &s) {
    std::string result(s.size() * 4, 0);
    char *buffer = &result[0];
    int len = 0;
    for (char32_t c : s) {
        int clen = buffer_codepoint_utf8(c, buffer + len);
        len += clen;
    }
    return result.substr(0, len);
}

std::u32string utf8_to_utf32(std::string_view s, int *consumed) {
    std::string_view rest = s;
    std::u32string result(s.size(), 0);
    int len = 0;
    while (true) {
        int32_t c = read_codepoint_utf8(rest);
        if (c == -1) {
            break; // EOF reached;
        } else if (c < 0) {
            rest.remove_prefix(1);
        } else {
            result[len++] = (char32_t)c;
        }
    }
    if (consumed != nullptr) {
        *consumed = s.size() - rest.size();
    }
    return result.substr(0, len);
}

std::u16string utf8_to_ucs2(std::string_view s, int *consumed) {
    std::string_view rest = s;
    std::u16string result(s.size(), 0);
    int len = 0;
    while (true) {
        int32_t c = read_codepoint_utf8(rest);
        if (c == -1) {
            break; // EOF reached;
        } else if (c < 0) {
            rest.remove_prefix(1);
        } else if ((c >= 0xD800) && (c <= 0xDFFF)) {
            result[len++] = 0x2610;
        } else if (c > 0xFFFF) {
            result[len++] = 0x2610;
        } else {
            result[len++] = (char16_t)c;
        }
    }
    if (consumed != nullptr) {
        *consumed = s.size() - rest.size();
    }
    return result.substr(0, len);
}

std::string utf16_to_utf8(std::u16string_view s) {
    std::string result(s.size() * 4, 0);
    int len = 0;
    while (true) {
        int codepoint = read_codepoint_utf16(s);
        if (codepoint == -1) break;
        if (codepoint < 0) continue;
        len += buffer_codepoint_utf8(codepoint, &result[len]);
    }
    return result.substr(0, len);
}

static std::vector<std::string> parse_control_lst(std::string_view ctrl) {
    std::vector<std::string> result;
    while (!ctrl.empty()) {
        std::string_view line = read_to_line(ctrl);
        std::string_view trimmed = trim(line);
        if ((trimmed.size() > 0) && (trimmed[0] != '#')) {
            result.emplace_back(trimmed);
        }
    }
    return result;
}

// Read a source file into a string.
//
static std::string read_file(const std::filesystem::path &fn, std::string &err) {
    std::ifstream t(fn);
    if (t.fail()) {
        err = std::string("Could not open ") + fn.string();
        return "";
    }
    t.seekg(0, std::ios::end);
    size_t size = t.tellg();
    std::string result(size, ' ');
    t.seekg(0);
    t.read(&result[0], size);
    if ((t.fail()) || (size_t(t.tellg()) != size)) {
        err = std::string("Could not read ") + fn.string();
        return "";
    }
    err = "";
    return result;
}

// This encoding can be read by StreamBuffer::read_uint32.
//
static void sbwrite_uint32(std::ostream *s, uint32_t v) {
    s->write((const char *)&v, 4);
}

// This encoding can be read by StreamBuffer::read_uint64.
//
static void sbwrite_uint64(std::ostream *s, uint64_t v) {
    s->write((const char *)&v, 8);
}

// This encoding can be read by StreamBuffer::read_string.
//
static void sbwrite_string(std::ostream *s, std::string_view sv) {
    s->put('\xFF');
    sbwrite_uint64(s, sv.size());
    s->write(sv.data(), sv.size());
}

// This encoding can be read by StreamBuffer::read_string.
//
static bool sbwrite_file(std::ostream *s, const std::filesystem::path &fn) {
    s->put('\xFF');
    uint64_t pos1 = s->tellp();
    sbwrite_uint64(s, 0);
    uint64_t pos2 = s->tellp();
    std::ifstream t(fn);
    if (t.fail()) {
        return false;
    }
    *s << t.rdbuf();
    if (t.fail()) {
        return false;
    }
    uint64_t pos3 = s->tellp();
    s->seekp(pos1);
    sbwrite_uint64(s, pos3 - pos2);
    s->seekp(pos3);
    return true;
}

std::string package_lua_source(const std::filesystem::path &base, std::ostream *s) {
    std::string err;
    std::filesystem::path cfn = base / "lua/control.lst";
    std::string ctrl = read_file(cfn, err);
    if (!err.empty()) {
        return err;
    }

    std::vector<std::string> names = parse_control_lst(ctrl);
    sbwrite_uint32(s, names.size());
    for (int i = 0; i < int(names.size()); i++) {
        sbwrite_string(s, names[i]);
        std::filesystem::path lfn = base / "lua" / names[i];
        if (!sbwrite_file(s, lfn)) {
            return std::string("Cannot read source file: ") + lfn.string();
        }
    }
    return "";
}


} // namespace drv