Rename some unicode functions and document better

This commit is contained in:
2023-05-30 23:35:54 -04:00
parent 54125c9c8c
commit b98bf33724
4 changed files with 15 additions and 9 deletions

View File

@@ -221,7 +221,7 @@ static int socket_poll(struct pollfd *pollvec, int pollcount, int mstimeout, std
// Write unicode onto the console.
static void console_write(const std::u32string &cps) {
std::string utf8 = drvutil::to_utf8(cps);
std::string utf8 = drvutil::utf32_to_utf8(cps);
write(1, utf8.c_str(), utf8.size());
}
@@ -231,7 +231,7 @@ static std::u32string console_read() {
int nread = read(0, buffer, 512);
if (nread > 0) {
std::string_view s(buffer, nread);
result = drvutil::from_utf8(s, nullptr);
result = drvutil::utf8_to_utf32(s, nullptr);
}
return result;
}

View File

@@ -206,7 +206,7 @@ class Driver {
if (ndata > DRV_SHORTSTRING_SIZE) ndata = DRV_SHORTSTRING_SIZE;
std::string_view src(data, ndata);
int consumed;
std::u32string cps = drvutil::from_utf8(src, &consumed);
std::u32string cps = drvutil::utf8_to_utf32(src, &consumed);
readline_device_.print(cps);
engw.play_sent_outgoing(&engw, 0, consumed);
}
@@ -217,7 +217,7 @@ class Driver {
uint32_t promptlen;
const char *promptdata;
engw.get_console_prompt(&engw, &promptlen, &promptdata);
std::u32string prompt = drvutil::from_utf8(std::string_view(promptdata, promptlen), nullptr);
std::u32string prompt = drvutil::utf8_to_utf32(std::string_view(promptdata, promptlen), nullptr);
readline_device_.set_prompt(prompt);
while (true) {
std::u32string cps = console_read();
@@ -226,7 +226,7 @@ class Driver {
for (char32_t c : cps) {
std::u32string line = readline_device_.putcode(c);
if (!line.empty()) {
std::string utf8 = drvutil::to_utf8(line);
std::string utf8 = drvutil::utf32_to_utf8(line);
engw.play_recv_incoming(&engw, 0, utf8.size(), utf8.c_str());
}
}

View File

@@ -163,7 +163,7 @@ static int32_t read_codepoint_utf8(std::string_view &source) {
return codepoint;
}
std::string to_utf8(const std::u32string &s) {
std::string utf32_to_utf8(const std::u32string &s) {
std::string result(s.size() * 4, 0);
char *buffer = &result[0];
int len = 0;
@@ -174,7 +174,7 @@ std::string to_utf8(const std::u32string &s) {
return result.substr(0, len);
}
std::u32string from_utf8(std::string_view s, int *consumed) {
std::u32string utf8_to_utf32(std::string_view s, int *consumed) {
std::string_view rest = s;
std::u32string result(s.size(), 0);
int len = 0;

View File

@@ -53,7 +53,7 @@ bool is_single_wchar_t(char32_t c);
// Convert a codepoint string into a UTF8-string.
// If the codepoint string contains invalid codepoints, they're silently dropped.
//
std::string to_utf8(const std::u32string &cps);
std::string utf32_to_utf8(const std::u32string &cps);
// Convert a UTF8 string to a codepoint string.
//
@@ -61,10 +61,16 @@ std::string to_utf8(const std::u32string &cps);
// Some of the bytes may not be consumed, if the source ends with an unfinished
// utf-8 sequence. Returns the Codepoint string and the number of bytes consumed.
//
std::u32string from_utf8(std::string_view source, int *consumed);
std::u32string utf8_to_utf32(std::string_view source, int *consumed);
// Convert a UTF8 string to a UCS-2 string.
//
// If the UTF8 string contains invalid sequences, they're silently dropped.
// Some of the bytes may not be consumed, if the source ends with an unfinished
// utf-8 sequence. Returns the UCS-2 string and the number of bytes consumed.
// Of course, UCS-2 can't represent all of unicode, so this is lossy.
// Any character that can't be represented is replaced with a box.
//
std::u16string utf8_to_ucs2(std::string_view source, int *consumed);
// Get a system error message, in an OS-independent manner.