More UTF-16 stuff
This commit is contained in:
@@ -109,6 +109,34 @@ static int buffer_codepoint_utf8(char32_t scp, char *buffer) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int32_t read_codepoint_utf16(std::u16string_view &source) {
|
||||||
|
if (source.empty()) return -1;
|
||||||
|
|
||||||
|
int32_t word0 = ((const uint16_t *)source.data())[0];
|
||||||
|
source.remove_prefix(1);
|
||||||
|
|
||||||
|
if (word0 < 0xD800) {
|
||||||
|
return word0;
|
||||||
|
} else if (word0 < 0xDC00) {
|
||||||
|
if (source.empty()) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
int32_t word1 = ((const uint16_t *)source.data())[0];
|
||||||
|
if ((word1 < 0xDC00)||(word1 > 0xDFFF)) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
int32_t part1 = word0 & 0x3FF;
|
||||||
|
int32_t part2 = word1 & 0x3FF;
|
||||||
|
int32_t result = ((part1 << 10) | part2) + 0x10000;
|
||||||
|
source.remove_prefix(1);
|
||||||
|
return result;
|
||||||
|
} else if (word0 < 0xE000) {
|
||||||
|
return -2;
|
||||||
|
} else {
|
||||||
|
return word0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int32_t read_codepoint_utf8(std::string_view &source) {
|
static int32_t read_codepoint_utf8(std::string_view &source) {
|
||||||
size_t size = source.size();
|
size_t size = source.size();
|
||||||
if (size == 0) return -1;
|
if (size == 0) return -1;
|
||||||
@@ -218,6 +246,18 @@ std::u16string utf8_to_ucs2(std::string_view s, int *consumed) {
|
|||||||
return result.substr(0, len);
|
return result.substr(0, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string utf16_to_utf8(std::u16string_view s) {
|
||||||
|
std::string result(s.size() * 4, 0);
|
||||||
|
int len = 0;
|
||||||
|
while (true) {
|
||||||
|
int codepoint = read_codepoint_utf16(s);
|
||||||
|
if (codepoint == -1) break;
|
||||||
|
if (codepoint < 0) continue;
|
||||||
|
len += buffer_codepoint_utf8(codepoint, &result[len]);
|
||||||
|
}
|
||||||
|
return result.substr(0, len);
|
||||||
|
}
|
||||||
|
|
||||||
static std::vector<std::string> parse_control_lst(std::string_view ctrl) {
|
static std::vector<std::string> parse_control_lst(std::string_view ctrl) {
|
||||||
std::vector<std::string> result;
|
std::vector<std::string> result;
|
||||||
while (!ctrl.empty()) {
|
while (!ctrl.empty()) {
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ bool is_single_wchar_t(char32_t c);
|
|||||||
//
|
//
|
||||||
std::string utf32_to_utf8(const std::u32string &cps);
|
std::string utf32_to_utf8(const std::u32string &cps);
|
||||||
|
|
||||||
// Convert a UTF8 string to a codepoint string.
|
// Convert a UTF8 string to a UTF32 string.
|
||||||
//
|
//
|
||||||
// If the UTF8 string contains invalid sequences, they're silently dropped.
|
// If the UTF8 string contains invalid sequences, they're silently dropped.
|
||||||
// Some of the bytes may not be consumed, if the source ends with an unfinished
|
// Some of the bytes may not be consumed, if the source ends with an unfinished
|
||||||
@@ -73,6 +73,12 @@ std::u32string utf8_to_utf32(std::string_view source, int *consumed);
|
|||||||
//
|
//
|
||||||
std::u16string utf8_to_ucs2(std::string_view source, int *consumed);
|
std::u16string utf8_to_ucs2(std::string_view source, int *consumed);
|
||||||
|
|
||||||
|
// Convert a UTF16 string to a UTF8 string.
|
||||||
|
//
|
||||||
|
// If the UTF16 string contains invalid sequences, they're silently dropped.
|
||||||
|
//
|
||||||
|
std::string utf16_to_utf8(std::u16string_view source);
|
||||||
|
|
||||||
// Get a system error message, in an OS-independent manner.
|
// Get a system error message, in an OS-independent manner.
|
||||||
//
|
//
|
||||||
// These versions of strerror is thread-safe, and it never fails
|
// These versions of strerror is thread-safe, and it never fails
|
||||||
|
|||||||
Reference in New Issue
Block a user