Rename some unicode functions and document better

2023-05-30 23:35:54 -04:00
parent 54125c9c8c
commit b98bf33724
4 changed files with 15 additions and 9 deletions
--- a/luprex/cpp/drv/drvutil.hpp
+++ b/luprex/cpp/drv/drvutil.hpp
@@ -53,7 +53,7 @@ bool is_single_wchar_t(char32_t c);
 // Convert a codepoint string into a UTF8-string.
 // If the codepoint string contains invalid codepoints, they're silently dropped.
 //
-std::string to_utf8(const std::u32string &cps);
+std::string utf32_to_utf8(const std::u32string &cps);

 // Convert a UTF8 string to a codepoint string.
 // 
@@ -61,10 +61,16 @@ std::string to_utf8(const std::u32string &cps);
 // Some of the bytes may not be consumed, if the source ends with an unfinished
 // utf-8 sequence.  Returns the Codepoint string and the number of bytes consumed.
 //
-std::u32string from_utf8(std::string_view source, int *consumed);
+std::u32string utf8_to_utf32(std::string_view source, int *consumed);

 // Convert a UTF8 string to a UCS-2 string.
 //
+// If the UTF8 string contains invalid sequences, they're silently dropped.
+// Some of the bytes may not be consumed, if the source ends with an unfinished
+// utf-8 sequence.  Returns the UCS-2 string and the number of bytes consumed.
+// Of course, UCS-2 can't represent all of unicode, so this is lossy.
+// Any character that can't be represented is replaced with a box.
+//
 std::u16string utf8_to_ucs2(std::string_view source, int *consumed);

 // Get a system error message, in an OS-independent manner.