Implement unicode on console, move readline into driver

This commit is contained in:
2023-05-18 17:14:55 -04:00
parent 2b03ca2eb6
commit fd137e8e74
12 changed files with 371 additions and 162 deletions

View File

@@ -301,6 +301,7 @@ int32_t read_ascii_char(string_view &source) {
int32_t read_codepoint_utf8(string_view &source) {
size_t size = source.size();
if (size == 0) return -1;
const unsigned char *bytes = (const unsigned char *)source.data();
int codepoint;
size_t seqlen;
@@ -321,7 +322,9 @@ int32_t read_codepoint_utf8(string_view &source) {
codepoint = (bytes[0] & 0x07);
seqlen = 4;
} else {
return -1;
// Bad character. Drop a byte and return invalid CP.
source.remove_prefix(1);
return 1;
}
if (seqlen > size) {
@@ -329,7 +332,11 @@ int32_t read_codepoint_utf8(string_view &source) {
}
for (size_t i = 1; i < seqlen; ++i) {
if ((bytes[i] & 0xC0) != 0x80) return -1;
if ((bytes[i] & 0xC0) != 0x80) {
// Bad character. Drop a byte and return invalid CP.
source.remove_prefix(1);
return 1;
}
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
}
@@ -339,7 +346,9 @@ int32_t read_codepoint_utf8(string_view &source) {
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
return -1;
// Bad character. Drop a byte and return invalid CP.
source.remove_prefix(1);
return 1;
}
source.remove_prefix(seqlen);