diff --git a/luprex/cpp/core/json.cpp b/luprex/cpp/core/json.cpp index 75ccb650..c5aef3fc 100644 --- a/luprex/cpp/core/json.cpp +++ b/luprex/cpp/core/json.cpp @@ -136,10 +136,10 @@ static bool encode_string(lua_State *L, eng::ostringstream &oss) { std::string_view str(s, len); oss << '"'; if (sv::valid_utf8(str) && !sv::has_prefix(str, "")) { - // Output the string in the straightforward way, - // using traditional json escaping. - for (char c : str) { - switch (c) { + while (!str.empty()) { + int32_t cp = sv::read_codepoint_utf8(str); + assert(cp >= 0); + switch (cp) { case '\\': oss << "\\\\"; break; case '"' : oss << "\\\""; break; case '\b': oss << "\\b"; break; @@ -148,10 +148,11 @@ static bool encode_string(lua_State *L, eng::ostringstream &oss) { case '\n': oss << "\\n"; break; case '\t': oss << "\\t"; break; default: { - if (c < 32) { - oss << "\\u" << util::hex16.val(c); + if (cp < 32) { + oss << "\\u" << util::hex16.val(cp); } else { - oss << c; + bool ok = util::write_codepoint_utf8(cp, &oss); + assert(ok); } } } diff --git a/luprex/cpp/core/util.cpp b/luprex/cpp/core/util.cpp index 067ca0d4..a7fffb6e 100644 --- a/luprex/cpp/core/util.cpp +++ b/luprex/cpp/core/util.cpp @@ -298,7 +298,7 @@ int32_t read_ascii_char(string_view &source) { return result; } -int32_t read_codepoint_utf8(string_view &source) { +int32_t read_codepoint_utf8(std::string_view &source) { size_t size = source.size(); if (size == 0) return -1; @@ -322,9 +322,8 @@ int32_t read_codepoint_utf8(string_view &source) { codepoint = (bytes[0] & 0x07); seqlen = 4; } else { - // Bad character. Drop a byte and return invalid CP. - source.remove_prefix(1); - return 1; + // Bad character. return invalid CP. + return -2; } if (seqlen > size) { @@ -333,9 +332,8 @@ int32_t read_codepoint_utf8(string_view &source) { for (size_t i = 1; i < seqlen; ++i) { if ((bytes[i] & 0xC0) != 0x80) { - // Bad character. Drop a byte and return invalid CP. - source.remove_prefix(1); - return 1; + // Bad character. return invalid CP. + return -2; } codepoint = (codepoint << 6) | (bytes[i] & 0x3F); } @@ -346,17 +344,15 @@ int32_t read_codepoint_utf8(string_view &source) { ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) || ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) || ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) { - // Bad character. Drop a byte and return invalid CP. - source.remove_prefix(1); - return 1; + // Bad character. return invalid CP. + return -2; } source.remove_prefix(seqlen); return codepoint; } -bool valid_utf8(string_view s) -{ +bool valid_utf8(string_view s) { while (!s.empty()) { int32_t codepoint = read_codepoint_utf8(s); if (codepoint < 0) return false; @@ -403,27 +399,32 @@ void quote_string(const eng::string &s, std::ostream *os) { } bool usesinglequote = (!anysq)||(anydq); (*os) << (usesinglequote ? '\'' : '"'); - for (char c : s) { - if (c >= 32) { - if (c == '"') { - (*os) << (usesinglequote ? "\"" : "\\\""); - } else if (c == '\'') { - (*os) << (usesinglequote ? "\\'" : "'"); - } else if (c == '\\') { - (*os) << "\\\\"; - } else { - (*os) << c; - } - } else { - unsigned int value = ((unsigned char)c); - switch (c) { + std::string_view str(s); + while (!str.empty()) { + unsigned char c0 = (unsigned char)(str[0]); + int cp = sv::read_codepoint_utf8(str); + if (cp < 0) { + (*os) << "\\" << dec.width(3).fill('0').val(c0); + str.remove_prefix(1); + } else if (cp < 32) { + c0 = ((unsigned char)cp); + switch (c0) { case '\n': (*os) << "\\n"; break; case '\t': (*os) << "\\t"; break; case '\r': (*os) << "\\r"; break; + case '\b': (*os) << "\\b"; break; default: - (*os) << "\\" << dec.width(3).fill('0').val(value); + (*os) << "\\" << dec.width(3).fill('0').val(c0); break; } + } else if (cp == '"') { + (*os) << (usesinglequote ? "\"" : "\\\""); + } else if (cp == '\'') { + (*os) << (usesinglequote ? "\\'" : "'"); + } else if (cp == '\\') { + (*os) << "\\\\"; + } else { + write_codepoint_utf8(cp, os); } } (*os) << (usesinglequote ? '\'' : '"'); @@ -656,50 +657,52 @@ eng::string toupper(eng::string input) { return input; } -static void buffer_codepoint_utf8(int32_t scp, char *buffer) { +static int buffer_codepoint_utf8(char32_t scp, char *buffer) { uint32_t cp = (uint32_t)scp; unsigned char *c = (unsigned char *)buffer; - if (cp <= 0x7F) { + if (cp < 0) { + return 0; + } + else if (cp <= 0x7F) { c[0] = cp; - c[1] = 0; + return 1; } else if (cp <= 0x7FF) { c[0] = (cp>>6)+192; c[1] = (cp&63)+128; - c[2] = 0; + return 2; } else if (cp <= 0xFFFF) { - if (0xd800 <= cp && cp <= 0xdfff) { - c[0] = 0; - } else { - c[0] = (cp>>12)+224; - c[1] = ((cp>>6)&63)+128; - c[2] = (cp&63)+128; - c[3] = 0; + if ((cp >= 0xD800) && (cp <= 0xDFFF)) { + return 0; } + c[0] = (cp>>12)+224; + c[1] = ((cp>>6)&63)+128; + c[2] = (cp&63)+128; + return 3; } else if (cp <= 0x10FFFF) { c[0] = (cp>>18)+240; c[1] = ((cp>>12)&63)+128; c[2] = ((cp>>6)&63)+128; c[3] = (cp&63)+128; - c[4] = 0; + return 4; } else { - c[0] = 0; + return 0; } } eng::string get_codepoint_utf8(uint32_t cp) { - char buffer[5]; - buffer_codepoint_utf8(cp, buffer); - return eng::string(buffer); + char buffer[4]; + int len = buffer_codepoint_utf8(cp, buffer); + return eng::string(buffer, len); } bool write_codepoint_utf8(int32_t cp, std::ostream *s) { - char buffer[5]; - buffer_codepoint_utf8(cp, buffer); - (*s) << buffer; - return buffer[0] != 0; + char buffer[4]; + int len = buffer_codepoint_utf8(cp, buffer); + (*s) << std::string_view(buffer, len); + return (len > 0); } double distance_squared(double x1, double y1, double x2, double y2) { diff --git a/luprex/cpp/core/util.hpp b/luprex/cpp/core/util.hpp index 55ffd654..60bb14a0 100644 --- a/luprex/cpp/core/util.hpp +++ b/luprex/cpp/core/util.hpp @@ -181,8 +181,14 @@ int32_t read_ascii_char(string_view &source); // Read a UTF8 codepoint from a string_view. // -// If the next thing in the string_view isn't a valid -// codepoint, returns -1 and doesn't update the view. +// If the string_view is empty, returns -1 and doesn't update +// the string_view. +// +// If the string_view contains an unfinished but possibly valid +// codepoint, returns -1 and doesn't update the string_view. +// +// If the next thing in the string_view is an invalid codepoint, +// returns -2 and doesn't update the string_view. // int32_t read_codepoint_utf8(string_view &source); diff --git a/luprex/cpp/drv/driver-linux.cpp b/luprex/cpp/drv/driver-linux.cpp index 0b33732b..ab1003ab 100644 --- a/luprex/cpp/drv/driver-linux.cpp +++ b/luprex/cpp/drv/driver-linux.cpp @@ -219,18 +219,18 @@ static int socket_poll(struct pollfd *pollvec, int pollcount, int mstimeout, std } // Write unicode onto the console. -static void console_write(const CodepointString &cps) { - std::string utf8 = ReadlineDevice::to_utf8(cps); +static void console_write(const std::u32string &cps) { + std::string utf8 = drvutil::to_utf8(cps); write(1, utf8.c_str(), utf8.size()); } -static CodepointString console_read() { - CodepointString result; +static std::u32string console_read() { + std::u32string result; char buffer[512]; int nread = read(0, buffer, 512); if (nread > 0) { std::string_view s(buffer, nread); - result = ReadlineDevice::from_utf8(s, nullptr); + result = drvutil::from_utf8(s, nullptr); } return result; } diff --git a/luprex/cpp/drv/driver-windows.cpp b/luprex/cpp/drv/driver-windows.cpp index da5c8356..cb8c40bf 100644 --- a/luprex/cpp/drv/driver-windows.cpp +++ b/luprex/cpp/drv/driver-windows.cpp @@ -230,14 +230,15 @@ static void init_winsock() { } } -static void console_write(const CodepointString &cps) { + +static void console_write(const std::u32string &cps) { if (cps.size() == 0) return; - // Convert to wstring. - // Any character outside the range 0xFFFF is replaced with a box. + // Convert to wstring. Any character not representable as a single wchar_t + // is replaced with a box. It's not ideal, but it's pretty good. std::wstring ws(cps.size(), 0); for (int i = 0; i < int(cps.size()); i++) { char32_t c = cps[i]; - if ((c >= 0)&&(c <= 0xFFFF)) ws[i] = (wchar_t)c; + if (drvutil::is_single_wchar_t(c)) ws[i] = (wchar_t)c; else ws[i] = 0x2610; } HANDLE hstdout = GetStdHandle(STD_OUTPUT_HANDLE); @@ -253,7 +254,7 @@ static void console_write(const CodepointString &cps) { } } -static CodepointString console_read() { +static std::u32string console_read() { HANDLE hstdin = GetStdHandle(STD_INPUT_HANDLE); assert(hstdin != INVALID_HANDLE_VALUE); INPUT_RECORD inrecords[512]; @@ -262,7 +263,7 @@ static CodepointString console_read() { if (int(nevents) > 0) { if (int(nevents) > 512) nevents = 512; ReadConsoleInputW(hstdin, inrecords, nevents, &nread); - CodepointString result(nread, 0); + std::u32string result(nread, 0); int len = 0; for (int i = 0; i < int(nread); i++) { const INPUT_RECORD &inr = inrecords[i]; @@ -274,7 +275,7 @@ static CodepointString console_read() { return result.substr(0, len); } } - return CodepointString(); + return std::u32string(); } static void ssl_load_certificate_authorities(SSL_CTX *ctx) { diff --git a/luprex/cpp/drv/driver.cpp b/luprex/cpp/drv/driver.cpp index 016fc074..61ca74a0 100644 --- a/luprex/cpp/drv/driver.cpp +++ b/luprex/cpp/drv/driver.cpp @@ -206,7 +206,7 @@ class Driver { if (ndata > DRV_SHORTSTRING_SIZE) ndata = DRV_SHORTSTRING_SIZE; std::string_view src(data, ndata); int consumed; - CodepointString cps = ReadlineDevice::from_utf8(src, &consumed); + std::u32string cps = drvutil::from_utf8(src, &consumed); readline_device_.print(cps); engw.play_sent_outgoing(&engw, 0, consumed); } @@ -217,16 +217,16 @@ class Driver { uint32_t promptlen; const char *promptdata; engw.get_console_prompt(&engw, &promptlen, &promptdata); - CodepointString prompt = ReadlineDevice::from_utf8(std::string_view(promptdata, promptlen), nullptr); + std::u32string prompt = drvutil::from_utf8(std::string_view(promptdata, promptlen), nullptr); readline_device_.set_prompt(prompt); while (true) { - CodepointString cps = console_read(); + std::u32string cps = console_read(); if (cps.size() == 0) break; read_console_recently_ = true; for (char32_t c : cps) { - CodepointString line = readline_device_.putcode(c); + std::u32string line = readline_device_.putcode(c); if (!line.empty()) { - std::string utf8 = ReadlineDevice::to_utf8(line); + std::string utf8 = drvutil::to_utf8(line); engw.play_recv_incoming(&engw, 0, utf8.size(), utf8.c_str()); } } diff --git a/luprex/cpp/drv/drvutil.cpp b/luprex/cpp/drv/drvutil.cpp index 430ad867..7f1a83be 100644 --- a/luprex/cpp/drv/drvutil.cpp +++ b/luprex/cpp/drv/drvutil.cpp @@ -77,6 +77,131 @@ void split_target(std::string_view target, std::string &cert, std::string &host, port = std::string(split[2]); } +bool is_single_wchar_t(char32_t c) { + if ((c >= 0xD800) && (c <= 0xDFFF)) return false; + if ((c >= 0) && (c <= 0xFFFF)) return true; + return false; +} + +static int buffer_codepoint_utf8(char32_t scp, char *buffer) { + uint32_t cp = (uint32_t)scp; + unsigned char *c = (unsigned char *)buffer; + if (cp < 0) { + return 0; + } + else if (cp <= 0x7F) { + c[0] = cp; + return 1; + } + else if (cp <= 0x7FF) { + c[0] = (cp>>6)+192; + c[1] = (cp&63)+128; + return 2; + } + else if (cp <= 0xFFFF) { + if ((cp >= 0xD800) && (cp <= 0xDFFF)) { + return 0; + } + c[0] = (cp>>12)+224; + c[1] = ((cp>>6)&63)+128; + c[2] = (cp&63)+128; + return 3; + } + else if (cp <= 0x10FFFF) { + c[0] = (cp>>18)+240; + c[1] = ((cp>>12)&63)+128; + c[2] = ((cp>>6)&63)+128; + c[3] = (cp&63)+128; + return 4; + } else { + return 0; + } +} + +static int32_t read_codepoint_utf8(std::string_view &source) { + size_t size = source.size(); + if (size == 0) return -1; + + const unsigned char *bytes = (const unsigned char *)source.data(); + int codepoint; + size_t seqlen; + if ((bytes[0] & 0x80) == 0x00) { + // U+0000 to U+007F + codepoint = (bytes[0] & 0x7F); + seqlen = 1; + } else if ((bytes[0] & 0xE0) == 0xC0) { + // U+0080 to U+07FF + codepoint = (bytes[0] & 0x1F); + seqlen = 2; + } else if ((bytes[0] & 0xF0) == 0xE0) { + // U+0800 to U+FFFF + codepoint = (bytes[0] & 0x0F); + seqlen = 3; + } else if ((bytes[0] & 0xF8) == 0xF0) { + // U+10000 to U+10FFFF + codepoint = (bytes[0] & 0x07); + seqlen = 4; + } else { + // Bad character. return invalid CP. + return -2; + } + + if (seqlen > size) { + return -1; + } + + for (size_t i = 1; i < seqlen; ++i) { + if ((bytes[i] & 0xC0) != 0x80) { + // Bad character. return invalid CP. + return -2; + } + codepoint = (codepoint << 6) | (bytes[i] & 0x3F); + } + + if ((codepoint > 0x10FFFF) || + ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) || + ((codepoint <= 0x007F) && (seqlen != 1)) || + ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) || + ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) || + ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) { + // Bad character. return invalid CP. + return -2; + } + + source.remove_prefix(seqlen); + return codepoint; +} + +std::string to_utf8(const std::u32string &s) { + std::string result(s.size() * 4, 0); + char *buffer = &result[0]; + int len = 0; + for (char32_t c : s) { + int clen = buffer_codepoint_utf8(c, buffer + len); + len += clen; + } + return result.substr(0, len); +} + +std::u32string from_utf8(std::string_view s, int *consumed) { + std::string_view rest = s; + std::u32string result(s.size(), 0); + int len = 0; + while (true) { + int32_t c = read_codepoint_utf8(rest); + if (c == -1) { + break; // EOF reached; + } else if (c < 0) { + rest.remove_prefix(1); + } else { + result[len++] = (char32_t)c; + } + } + if (consumed != nullptr) { + *consumed = s.size() - rest.size(); + } + return result.substr(0, len); +} static std::vector parse_control_lst(std::string_view ctrl) { std::vector result; diff --git a/luprex/cpp/drv/drvutil.hpp b/luprex/cpp/drv/drvutil.hpp index 2228c1d8..364db007 100644 --- a/luprex/cpp/drv/drvutil.hpp +++ b/luprex/cpp/drv/drvutil.hpp @@ -46,6 +46,23 @@ std::string package_lua_source(const std::filesystem::path &base, std::ostream * // void split_target(std::string_view target, std::string &cert, std::string &host, std::string &port); +// Return true if the unicode codepoint can be converted to a single 16-bit wchar_t. +// +bool is_single_wchar_t(char32_t c); + +// Convert a codepoint string into a UTF8-string. +// If the codepoint string contains invalid codepoints, they're silently dropped. +// +std::string to_utf8(const std::u32string &cps); + +// Convert a UTF8 string to a codepoint string. +// +// If the UTF8 string contains invalid sequences, they're silently dropped. +// Some of the bytes may not be consumed, if the source ends with an unfinished +// utf-8 sequence. Returns the Codepoint string and the number of bytes consumed. +// +std::u32string from_utf8(std::string_view source, int *consumed); + // Get a system error message, in an OS-independent manner. // // These versions of strerror is thread-safe, and it never fails diff --git a/luprex/cpp/drv/readline.cpp b/luprex/cpp/drv/readline.cpp index f61b103d..630ec70d 100644 --- a/luprex/cpp/drv/readline.cpp +++ b/luprex/cpp/drv/readline.cpp @@ -2,8 +2,8 @@ #define MAXLINE 512 -static CodepointString n_backspaces(int n) { - CodepointString result(3 * n, 0); +static std::u32string n_backspaces(int n) { + std::u32string result(3 * n, 0); for (int i = 0; i < n; i++) { result[i*3 + 0] = '\b'; result[i*3 + 1] = ' '; @@ -12,7 +12,7 @@ static CodepointString n_backspaces(int n) { return result; } -static int common_prefix_length(const CodepointString &a, const CodepointString &b) { +static int common_prefix_length(const std::u32string &a, const std::u32string &b) { int minlen = std::min(a.size(), b.size()); for (int i = 0; i < minlen; i++) { if (a[i] != b[i]) return i; @@ -20,104 +20,11 @@ static int common_prefix_length(const CodepointString &a, const CodepointString return minlen; } -static int buffer_codepoint_utf8(char32_t scp, char *buffer) { - uint32_t cp = (uint32_t)scp; - unsigned char *c = (unsigned char *)buffer; - if (cp < 0) { - return 0; - } - else if (cp <= 0x7F) { - c[0] = cp; - return 1; - } - else if (cp <= 0x7FF) { - c[0] = (cp>>6)+192; - c[1] = (cp&63)+128; - return 2; - } - else if (cp <= 0xFFFF) { - c[0] = (cp>>12)+224; - c[1] = ((cp>>6)&63)+128; - c[2] = (cp&63)+128; - return 3; - } - else if (cp <= 0x10FFFF) { - c[0] = (cp>>18)+240; - c[1] = ((cp>>12)&63)+128; - c[2] = ((cp>>6)&63)+128; - c[3] = (cp&63)+128; - return 4; - } else { - return 0; - } -} - -static int32_t read_codepoint_utf8(std::string_view &source) { - size_t size = source.size(); - if (size == 0) return -1; - - const unsigned char *bytes = (const unsigned char *)source.data(); - int codepoint; - size_t seqlen; - if ((bytes[0] & 0x80) == 0x00) { - // U+0000 to U+007F - codepoint = (bytes[0] & 0x7F); - seqlen = 1; - } else if ((bytes[0] & 0xE0) == 0xC0) { - // U+0080 to U+07FF - codepoint = (bytes[0] & 0x1F); - seqlen = 2; - } else if ((bytes[0] & 0xF0) == 0xE0) { - // U+0800 to U+FFFF - codepoint = (bytes[0] & 0x0F); - seqlen = 3; - } else if ((bytes[0] & 0xF8) == 0xF0) { - // U+10000 to U+10FFFF - codepoint = (bytes[0] & 0x07); - seqlen = 4; - } else { - // Bad character. Drop a byte and return invalid CP. - source.remove_prefix(1); - return -2; - } - - if (seqlen > size) { - return -1; - } - - for (size_t i = 1; i < seqlen; ++i) { - if ((bytes[i] & 0xC0) != 0x80) { - // Bad character. Drop a byte and return invalid CP. - source.remove_prefix(1); - return -2; - } - codepoint = (codepoint << 6) | (bytes[i] & 0x3F); - } - - if ((codepoint > 0x10FFFF) || - ((codepoint <= 0x007F) && (seqlen != 1)) || - ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) || - ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) || - ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) { - // Bad character. Drop a byte and return invalid CP. - source.remove_prefix(1); - return -2; - } - - source.remove_prefix(seqlen); - return codepoint; -} - -ReadlineDevice::ReadlineDevice() { - desired_prompt_ = CodepointString(1, '>'); -} - - void ReadlineDevice::set_print_callback(print_callback cb) { print_cb_ = cb; } -void ReadlineDevice::set_prompt(const CodepointString &prompt) { +void ReadlineDevice::set_prompt(const std::u32string &prompt) { desired_prompt_ = prompt; echo_command(); } @@ -152,24 +59,24 @@ void ReadlineDevice::echo_command() { } // Echo the new part. - CodepointString newpart = desired_command_.substr(current_command_.size()); + std::u32string newpart = desired_command_.substr(current_command_.size()); if (!newpart.empty()) { print_cb_(newpart); current_command_ = desired_command_; } } -CodepointString ReadlineDevice::putcode(char32_t c) { +std::u32string ReadlineDevice::putcode(char32_t c) { if ((c == '\n') && (readline_lastc_ == '\r')) { // Ignore newline immediately after carriage return. // Otherwise, crlf produces two newlines. - return CodepointString(); + return std::u32string(); } else if ((c == '\r') || (c == '\n')) { - CodepointString white(1, ' '); - CodepointString newline(1, '\n'); + std::u32string white(1, ' '); + std::u32string newline(1, '\n'); echo_command(); print_cb_(white + newline); - CodepointString result = desired_command_ + newline; + std::u32string result = desired_command_ + newline; desired_command_.clear(); current_prompt_.clear(); current_command_.clear(); @@ -181,20 +88,20 @@ CodepointString ReadlineDevice::putcode(char32_t c) { desired_command_ = desired_command_.substr(0, len-1); } echo_command(); - return CodepointString(); + return std::u32string(); } else if ((c >= 32)&&(c <= 0x10FFFF)) { int len = desired_command_.size(); if (len < MAXLINE) { desired_command_ = desired_command_ + c; } echo_command(); - return CodepointString(); + return std::u32string(); } readline_lastc_ = c; - return CodepointString(); + return std::u32string(); } -void ReadlineDevice::print(const CodepointString &s) { +void ReadlineDevice::print(const std::u32string &s) { if (!s.empty()) { erase_command(); print_cb_(s); @@ -202,30 +109,3 @@ void ReadlineDevice::print(const CodepointString &s) { } } -std::string ReadlineDevice::to_utf8(const CodepointString &s) { - std::string result(s.size() * 4, 0); - char *buffer = &result[0]; - int len = 0; - for (char32_t c : s) { - int clen = buffer_codepoint_utf8(c, buffer + len); - len += clen; - } - return result.substr(0, len); -} - -CodepointString ReadlineDevice::from_utf8(std::string_view s, int *consumed) { - std::string_view rest = s; - CodepointString result(s.size(), 0); - int len = 0; - while (true) { - int32_t c = read_codepoint_utf8(rest); - if (c == -1) break; // EOF reached; - if (c == -2) continue; // Filter out bad UTF8 but continue. - result[len++] = (char32_t)c; - } - if (consumed != nullptr) { - *consumed = s.size() - rest.size(); - } - return result.substr(0, len); -} - diff --git a/luprex/cpp/drv/readline.hpp b/luprex/cpp/drv/readline.hpp index 0b98aaae..78cfe3e7 100644 --- a/luprex/cpp/drv/readline.hpp +++ b/luprex/cpp/drv/readline.hpp @@ -4,19 +4,19 @@ #include #include +#include "drvutil.hpp" -using CodepointString = std::basic_string; class ReadlineDevice { public: - using print_callback = void (*)(const CodepointString &text); + using print_callback = void (*)(const std::u32string &text); private: print_callback print_cb_; - CodepointString desired_command_; - CodepointString current_command_; - CodepointString desired_prompt_; - CodepointString current_prompt_; + std::u32string desired_command_; + std::u32string current_command_; + std::u32string desired_prompt_; + std::u32string current_prompt_; char32_t readline_lastc_; void erase_command(); @@ -24,31 +24,19 @@ private: public: - ReadlineDevice(); - // The callback must be set before using the readline device. void set_print_callback(print_callback cb); // change the prompt. - void set_prompt(const CodepointString &prompt); + void set_prompt(const std::u32string &prompt); // Use this to print anything on the console. - void print(const CodepointString &cps); + void print(const std::u32string &cps); // Whenever the user types a character, call 'putcode'. If the code is // newline, this returns the line of text that was entered, including the // newline. Otherwise returns empty string. Backspace is handled here. - CodepointString putcode(char32_t codepoint); - - // This can be used to convert a codepoint string into a - // UTF8-string. - static std::string to_utf8(const CodepointString &cps); - - // This can be used to convert UTF8 to a codepoint string. - // Some of the bytes may not be consumed, if the source contains - // a partial utf-8 sequence. Returns the Codepoint string and the - // number of bytes consumed. - static CodepointString from_utf8(std::string_view source, int *consumed); + std::u32string putcode(char32_t codepoint); };