From b98bf337241aaf03c2287e447488ca9849a41a12 Mon Sep 17 00:00:00 2001
From: jyelon <jyelon@gmail.com>
Date: Tue, 30 May 2023 23:35:54 -0400
Subject: [PATCH] Rename some unicode functions and document better

---
 luprex/cpp/drv/driver-linux.cpp |  4 ++--
 luprex/cpp/drv/driver.cpp       |  6 +++---
 luprex/cpp/drv/drvutil.cpp      |  4 ++--
 luprex/cpp/drv/drvutil.hpp      | 10 ++++++++--
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/luprex/cpp/drv/driver-linux.cpp b/luprex/cpp/drv/driver-linux.cpp
index 0b43e456..23cbba86 100644
--- a/luprex/cpp/drv/driver-linux.cpp
+++ b/luprex/cpp/drv/driver-linux.cpp
@@ -221,7 +221,7 @@ static int socket_poll(struct pollfd *pollvec, int pollcount, int mstimeout, std
 
 // Write unicode onto the console.
 static void console_write(const std::u32string &cps) {
-    std::string utf8 = drvutil::to_utf8(cps);
+    std::string utf8 = drvutil::utf32_to_utf8(cps);
     write(1, utf8.c_str(), utf8.size());
 }
 
@@ -231,7 +231,7 @@ static std::u32string console_read() {
     int nread = read(0, buffer, 512);
     if (nread > 0) {
         std::string_view s(buffer, nread);
-        result = drvutil::from_utf8(s, nullptr);
+        result = drvutil::utf8_to_utf32(s, nullptr);
     }
     return result;
 }
diff --git a/luprex/cpp/drv/driver.cpp b/luprex/cpp/drv/driver.cpp
index 61ca74a0..6a9705b4 100644
--- a/luprex/cpp/drv/driver.cpp
+++ b/luprex/cpp/drv/driver.cpp
@@ -206,7 +206,7 @@ class Driver {
             if (ndata > DRV_SHORTSTRING_SIZE) ndata = DRV_SHORTSTRING_SIZE;
             std::string_view src(data, ndata);
             int consumed;
-            std::u32string cps = drvutil::from_utf8(src, &consumed);
+            std::u32string cps = drvutil::utf8_to_utf32(src, &consumed);
             readline_device_.print(cps);
             engw.play_sent_outgoing(&engw, 0, consumed);
         }
@@ -217,7 +217,7 @@ class Driver {
         uint32_t promptlen;
         const char *promptdata;
         engw.get_console_prompt(&engw, &promptlen, &promptdata);
-        std::u32string prompt = drvutil::from_utf8(std::string_view(promptdata, promptlen), nullptr);
+        std::u32string prompt = drvutil::utf8_to_utf32(std::string_view(promptdata, promptlen), nullptr);
         readline_device_.set_prompt(prompt);
         while (true) {
             std::u32string cps = console_read();
@@ -226,7 +226,7 @@ class Driver {
             for (char32_t c : cps) {
                 std::u32string line = readline_device_.putcode(c);
                 if (!line.empty()) {
-                    std::string utf8 = drvutil::to_utf8(line);
+                    std::string utf8 = drvutil::utf32_to_utf8(line);
                     engw.play_recv_incoming(&engw, 0, utf8.size(), utf8.c_str());
                 }
             }
diff --git a/luprex/cpp/drv/drvutil.cpp b/luprex/cpp/drv/drvutil.cpp
index 7ab53739..18544805 100644
--- a/luprex/cpp/drv/drvutil.cpp
+++ b/luprex/cpp/drv/drvutil.cpp
@@ -163,7 +163,7 @@ static int32_t read_codepoint_utf8(std::string_view &source) {
     return codepoint;
 }
 
-std::string to_utf8(const std::u32string &s) {
+std::string utf32_to_utf8(const std::u32string &s) {
     std::string result(s.size() * 4, 0);
     char *buffer = &result[0];
     int len = 0;
@@ -174,7 +174,7 @@ std::string to_utf8(const std::u32string &s) {
     return result.substr(0, len);
 }
 
-std::u32string from_utf8(std::string_view s, int *consumed) {
+std::u32string utf8_to_utf32(std::string_view s, int *consumed) {
     std::string_view rest = s;
     std::u32string result(s.size(), 0);
     int len = 0;
diff --git a/luprex/cpp/drv/drvutil.hpp b/luprex/cpp/drv/drvutil.hpp
index 5ef8484b..c59b0d27 100644
--- a/luprex/cpp/drv/drvutil.hpp
+++ b/luprex/cpp/drv/drvutil.hpp
@@ -53,7 +53,7 @@ bool is_single_wchar_t(char32_t c);
 // Convert a codepoint string into a UTF8-string.
 // If the codepoint string contains invalid codepoints, they're silently dropped.
 //
-std::string to_utf8(const std::u32string &cps);
+std::string utf32_to_utf8(const std::u32string &cps);
 
 // Convert a UTF8 string to a codepoint string.
 // 
@@ -61,10 +61,16 @@ std::string to_utf8(const std::u32string &cps);
 // Some of the bytes may not be consumed, if the source ends with an unfinished
 // utf-8 sequence.  Returns the Codepoint string and the number of bytes consumed.
 //
-std::u32string from_utf8(std::string_view source, int *consumed);
+std::u32string utf8_to_utf32(std::string_view source, int *consumed);
 
 // Convert a UTF8 string to a UCS-2 string.
 //
+// If the UTF8 string contains invalid sequences, they're silently dropped.
+// Some of the bytes may not be consumed, if the source ends with an unfinished
+// utf-8 sequence.  Returns the UCS-2 string and the number of bytes consumed.
+// Of course, UCS-2 can't represent all of unicode, so this is lossy.
+// Any character that can't be represented is replaced with a box.
+//
 std::u16string utf8_to_ucs2(std::string_view source, int *consumed);
 
 // Get a system error message, in an OS-independent manner.