Rename some unicode functions and document better

This commit is contained in:
2023-05-30 23:35:54 -04:00
parent 54125c9c8c
commit b98bf33724
4 changed files with 15 additions and 9 deletions

View File

@@ -53,7 +53,7 @@ bool is_single_wchar_t(char32_t c);
// Convert a codepoint string into a UTF8-string.
// If the codepoint string contains invalid codepoints, they're silently dropped.
//
std::string to_utf8(const std::u32string &cps);
std::string utf32_to_utf8(const std::u32string &cps);
// Convert a UTF8 string to a codepoint string.
//
@@ -61,10 +61,16 @@ std::string to_utf8(const std::u32string &cps);
// Some of the bytes may not be consumed, if the source ends with an unfinished
// utf-8 sequence. Returns the Codepoint string and the number of bytes consumed.
//
std::u32string from_utf8(std::string_view source, int *consumed);
std::u32string utf8_to_utf32(std::string_view source, int *consumed);
// Convert a UTF8 string to a UCS-2 string.
//
// If the UTF8 string contains invalid sequences, they're silently dropped.
// Some of the bytes may not be consumed, if the source ends with an unfinished
// utf-8 sequence. Returns the UCS-2 string and the number of bytes consumed.
// Of course, UCS-2 can't represent all of unicode, so this is lossy.
// Any character that can't be represented is replaced with a box.
//
std::u16string utf8_to_ucs2(std::string_view source, int *consumed);
// Get a system error message, in an OS-independent manner.