Implement unicode on console, move readline into driver
This commit is contained in:
@@ -301,6 +301,7 @@ int32_t read_ascii_char(string_view &source) {
|
||||
int32_t read_codepoint_utf8(string_view &source) {
|
||||
size_t size = source.size();
|
||||
if (size == 0) return -1;
|
||||
|
||||
const unsigned char *bytes = (const unsigned char *)source.data();
|
||||
int codepoint;
|
||||
size_t seqlen;
|
||||
@@ -321,7 +322,9 @@ int32_t read_codepoint_utf8(string_view &source) {
|
||||
codepoint = (bytes[0] & 0x07);
|
||||
seqlen = 4;
|
||||
} else {
|
||||
return -1;
|
||||
// Bad character. Drop a byte and return invalid CP.
|
||||
source.remove_prefix(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (seqlen > size) {
|
||||
@@ -329,7 +332,11 @@ int32_t read_codepoint_utf8(string_view &source) {
|
||||
}
|
||||
|
||||
for (size_t i = 1; i < seqlen; ++i) {
|
||||
if ((bytes[i] & 0xC0) != 0x80) return -1;
|
||||
if ((bytes[i] & 0xC0) != 0x80) {
|
||||
// Bad character. Drop a byte and return invalid CP.
|
||||
source.remove_prefix(1);
|
||||
return 1;
|
||||
}
|
||||
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
|
||||
}
|
||||
|
||||
@@ -339,7 +346,9 @@ int32_t read_codepoint_utf8(string_view &source) {
|
||||
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
|
||||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
|
||||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
|
||||
return -1;
|
||||
// Bad character. Drop a byte and return invalid CP.
|
||||
source.remove_prefix(1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
source.remove_prefix(seqlen);
|
||||
|
||||
Reference in New Issue
Block a user