Implement unicode on console, move readline into driver

This commit is contained in:
2023-05-18 17:14:55 -04:00
parent 2b03ca2eb6
commit fd137e8e74
12 changed files with 371 additions and 162 deletions

View File

@@ -2,6 +2,7 @@
#include "drvutil.hpp"
#include "sslutil.hpp"
#include "readline.hpp"
#include "../core/enginewrapper.hpp"
#include <iostream>
@@ -217,12 +218,21 @@ static int socket_poll(struct pollfd *pollvec, int pollcount, int mstimeout, std
return 0;
}
static int console_write(const char *bytes, int nbytes) {
return write(1, bytes, nbytes);
// Write unicode onto the console.
static void console_write(const CodepointString &cps) {
std::string utf8 = ReadlineDevice::to_utf8(cps);
write(1, utf8.c_str(), utf8.size());
}
static int console_read(char *bytes, int nbytes) {
return read(0, bytes, nbytes);
static CodepointString console_read() {
CodepointString result;
char buffer[512];
int nread = read(0, buffer, 512);
if (nread > 0) {
std::string_view s(buffer, nread);
result = ReadlineDevice::from_utf8(s, nullptr);
}
return result;
}
static void call_init_engine_wrapper(const std::filesystem::path &luprexroot, EngineWrapper *w) {

View File

@@ -229,38 +229,51 @@ static void init_winsock() {
}
}
static int console_write(const char *bytes, int nbytes) {
if (nbytes == 0) return 0;
static void console_write(const CodepointString &cps) {
if (cps.size() == 0) return;
// Convert to wstring.
// Any character outside the range 0xFFFF is replaced with a box.
std::wstring ws(cps.size());
for (int i = 0; i < cps.size(); i++) {
char32_t c = cps[i];
if ((c >= 0)&&(c <= 0xFFFF)) ws[i] = (wchar_t)c;
else ws[i] = 0x2610;
}
HANDLE hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
assert(hstdout != INVALID_HANDLE_VALUE);
DWORD nwrote;
if (nbytes > 10000) nbytes = 10000;
assert(WriteConsoleA(hstdout, bytes, nbytes, &nwrote, nullptr));
assert(nwrote > 0);
return nwrote;
std::wstring_view v(ws);
while (v.size() > 0) {
int nwrite = v.size();
if (nwrite > 10000) nwrite = 10000;
assert(WriteConsoleW(hstdout, v.data(), nwrite, &nwrote, nullptr));
assert(nwrote > 0);
v.remove_prefix(nwrote);
}
}
static int console_read(char *bytes, int nbytes) {
static CodepointString console_read() {
HANDLE hstdin = GetStdHandle(STD_INPUT_HANDLE);
assert(hstdin != INVALID_HANDLE_VALUE);
INPUT_RECORD inrecords[512];
DWORD nread, nevents;
int nascii = 0;
if (GetNumberOfConsoleInputEvents(hstdin, &nevents)) {
if (int(nevents) > nbytes) nevents = nbytes;
ReadConsoleInputA(hstdin, inrecords, nevents, &nread);
for (int i = 0; i < int(nread); i++) {
const INPUT_RECORD &inr = inrecords[i];
if (inr.EventType != KEY_EVENT) continue;
const KEY_EVENT_RECORD &key = inr.Event.KeyEvent;
if (!key.bKeyDown) continue;
char c = key.uChar.AsciiChar;
bytes[nascii++] = c;
if (int(nevents) > 0) {
if (int(nevents) > 512) nevents = 512;
ReadConsoleInputW(hstdin, inrecords, nevents, &nread);
CodepointString result(nread, 0);
int len = 0;
for (int i = 0; i < int(nread); i++) {
const INPUT_RECORD &inr = inrecords[i];
if (inr.EventType != KEY_EVENT) continue;
const KEY_EVENT_RECORD &key = inr.Event.KeyEvent;
if (!key.bKeyDown) continue;
result[len++] = key.uChar.UnicodeChar;
}
return result.substr(0, len);
}
return nascii;
} else {
return 0;
}
return CodepointString();
}
static void ssl_load_certificate_authorities(SSL_CTX *ctx) {

View File

@@ -54,6 +54,7 @@ class Driver {
CHAN_SSL_ACCEPTING,
CHAN_SSL_READWRITE,
};
struct ChanInfo {
int chid;
SOCKET socket;
@@ -89,6 +90,7 @@ class Driver {
bool read_console_recently_;
std::unique_ptr<struct pollfd[]> pollvec_;
std::unique_ptr<char[]> chbuf_;
ReadlineDevice readline_device_;
sslutil::UniqueCTX ssl_server_ctx_;
sslutil::UniqueCTX ssl_client_secure_ctx_;
@@ -202,20 +204,27 @@ class Driver {
engw.get_outgoing(&engw, 0, &ndata, &data);
if (ndata == 0) break;
if (ndata > DRV_SHORTSTRING_SIZE) ndata = DRV_SHORTSTRING_SIZE;
int nwrote = console_write(data, ndata);
if (nwrote <= 0) break;
engw.play_sent_outgoing(&engw, 0, nwrote);
std::string_view src(data, ndata);
int consumed;
CodepointString cps = ReadlineDevice::from_utf8(src, &consumed);
readline_device_.print(cps);
engw.play_sent_outgoing(&engw, 0, consumed);
}
}
void handle_console_input() {
char buffer[256];
read_console_recently_ = false;
while (true) {
int nread = console_read(buffer, 256);
if (nread <= 0) break;
CodepointString cps = console_read();
if (cps.size() == 0) break;
read_console_recently_ = true;
engw.play_recv_incoming(&engw, 0, nread, buffer);
for (char32_t c : cps) {
CodepointString line = readline_device_.putcode(c);
if (!line.empty()) {
std::string utf8 = ReadlineDevice::to_utf8(line);
engw.play_recv_incoming(&engw, 0, utf8.size(), utf8.c_str());
}
}
}
}
@@ -575,6 +584,9 @@ class Driver {
}
int drive(int argc, char *argv[]) {
// Set up the console readline device.
readline_device_.set_print_callback(console_write);
// Remove the program name from argv.
std::string program = argv[0];
argc -= 1;

225
luprex/cpp/drv/readline.cpp Normal file
View File

@@ -0,0 +1,225 @@
#include "readline.hpp"
#define MAXLINE 512
static CodepointString n_backspaces(int n) {
CodepointString result(3 * n, 0);
for (int i = 0; i < n; i++) {
result[i*3 + 0] = '\b';
result[i*3 + 1] = ' ';
result[i*3 + 2] = '\b';
}
return result;
}
static int common_prefix_length(const CodepointString &a, const CodepointString &b) {
int minlen = std::min(a.size(), b.size());
for (int i = 0; i < minlen; i++) {
if (a[i] != b[i]) return i;
}
return minlen;
}
static int buffer_codepoint_utf8(char32_t scp, char *buffer) {
uint32_t cp = (uint32_t)scp;
unsigned char *c = (unsigned char *)buffer;
if (cp < 0) {
return 0;
}
else if (cp <= 0x7F) {
c[0] = cp;
return 1;
}
else if (cp <= 0x7FF) {
c[0] = (cp>>6)+192;
c[1] = (cp&63)+128;
return 2;
}
else if (cp <= 0xFFFF) {
c[0] = (cp>>12)+224;
c[1] = ((cp>>6)&63)+128;
c[2] = (cp&63)+128;
return 3;
}
else if (cp <= 0x10FFFF) {
c[0] = (cp>>18)+240;
c[1] = ((cp>>12)&63)+128;
c[2] = ((cp>>6)&63)+128;
c[3] = (cp&63)+128;
return 4;
} else {
return 0;
}
}
static int32_t read_codepoint_utf8(std::string_view &source) {
size_t size = source.size();
if (size == 0) return -1;
const unsigned char *bytes = (const unsigned char *)source.data();
int codepoint;
size_t seqlen;
if ((bytes[0] & 0x80) == 0x00) {
// U+0000 to U+007F
codepoint = (bytes[0] & 0x7F);
seqlen = 1;
} else if ((bytes[0] & 0xE0) == 0xC0) {
// U+0080 to U+07FF
codepoint = (bytes[0] & 0x1F);
seqlen = 2;
} else if ((bytes[0] & 0xF0) == 0xE0) {
// U+0800 to U+FFFF
codepoint = (bytes[0] & 0x0F);
seqlen = 3;
} else if ((bytes[0] & 0xF8) == 0xF0) {
// U+10000 to U+10FFFF
codepoint = (bytes[0] & 0x07);
seqlen = 4;
} else {
// Bad character. Drop a byte and return invalid CP.
source.remove_prefix(1);
return -2;
}
if (seqlen > size) {
return -1;
}
for (size_t i = 1; i < seqlen; ++i) {
if ((bytes[i] & 0xC0) != 0x80) {
// Bad character. Drop a byte and return invalid CP.
source.remove_prefix(1);
return -2;
}
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
}
if ((codepoint > 0x10FFFF) ||
((codepoint <= 0x007F) && (seqlen != 1)) ||
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
// Bad character. Drop a byte and return invalid CP.
source.remove_prefix(1);
return -2;
}
source.remove_prefix(seqlen);
return codepoint;
}
ReadlineDevice::ReadlineDevice() {
desired_prompt_ = CodepointString(1, '>');
}
void ReadlineDevice::set_print_callback(print_callback cb) {
print_cb_ = cb;
}
void ReadlineDevice::erase_command() {
int ccsize = current_prompt_.size() + current_command_.size();
if (ccsize > 0) {
print_cb_(n_backspaces(ccsize));
current_prompt_.clear();
current_command_.clear();
}
}
void ReadlineDevice::echo_command() {
// If the prompt has changed, erase everything and start over.
if (desired_prompt_ != current_prompt_) {
int ccsize = current_prompt_.size() + current_command_.size();
print_cb_(n_backspaces(ccsize));
print_cb_(desired_prompt_);
current_command_.clear();
current_prompt_ = desired_prompt_;
}
// Find out how much of the command matches.
int match = common_prefix_length(current_command_, desired_command_);
// Echo backspaces to remove the non-matching part.
int remove = current_command_.size() - match;
if (remove > 0) {
print_cb_(n_backspaces(remove));
current_command_ = current_command_.substr(0, match);
}
// Echo the new part.
CodepointString newpart = desired_command_.substr(current_command_.size());
if (!newpart.empty()) {
print_cb_(newpart);
current_command_ = desired_command_;
}
}
CodepointString ReadlineDevice::putcode(char32_t c) {
if ((c == '\n') && (readline_lastc_ == '\r')) {
// Ignore newline immediately after carriage return.
// Otherwise, crlf produces two newlines.
return CodepointString();
} else if ((c == '\r') || (c == '\n')) {
CodepointString white(1, ' ');
CodepointString newline(1, '\n');
echo_command();
print_cb_(white + newline);
CodepointString result = desired_command_ + newline;
desired_command_.clear();
current_prompt_.clear();
current_command_.clear();
return result;
} else if ((c == '\b') || (c == 127)) {
int len = desired_command_.size();
if (len > 0) {
desired_command_ = desired_command_.substr(0, len-1);
}
echo_command();
return CodepointString();
} else if ((c >= 32)&&(c <= 0x10FFFF)) {
int len = desired_command_.size();
if (len < MAXLINE) {
desired_command_ = desired_command_ + c;
}
echo_command();
return CodepointString();
}
readline_lastc_ = c;
return CodepointString();
}
void ReadlineDevice::print(const CodepointString &s) {
if (!s.empty()) {
erase_command();
print_cb_(s);
echo_command();
}
}
std::string ReadlineDevice::to_utf8(const CodepointString &s) {
std::string result(s.size() * 4, 0);
char *buffer = &result[0];
int len = 0;
for (char32_t c : s) {
int clen = buffer_codepoint_utf8(c, buffer + len);
len += clen;
}
return result.substr(0, len);
}
CodepointString ReadlineDevice::from_utf8(std::string_view s, int *consumed) {
std::string_view rest = s;
CodepointString result(s.size(), 0);
int len = 0;
while (true) {
int32_t c = read_codepoint_utf8(rest);
if (c == -1) break; // EOF reached;
if (c == -2) continue; // Filter out bad UTF8 but continue.
result[len++] = (char32_t)c;
}
if (consumed != nullptr) {
*consumed = s.size() - rest.size();
}
return result.substr(0, len);
}

View File

@@ -0,0 +1,53 @@
#ifndef READLINE_HPP
#define READLINE_HPP
#include <string>
#include <string_view>
using CodepointString = std::basic_string<char32_t>;
class ReadlineDevice {
public:
using print_callback = void (*)(const CodepointString &text);
private:
print_callback print_cb_;
CodepointString desired_command_;
CodepointString current_command_;
CodepointString desired_prompt_;
CodepointString current_prompt_;
char32_t readline_lastc_;
void echo_command();
void erase_command();
public:
ReadlineDevice();
// The callback must be set before using the readline device.
void set_print_callback(print_callback cb);
// change the prompt.
void set_prompt(const CodepointString &prompt);
// Use this to print anything on the console.
void print(const CodepointString &cps);
// Whenever the user types a character, call 'putcode'. If the code is
// newline, this returns the line of text that was entered, including the
// newline. Otherwise returns empty string. Backspace is handled here.
CodepointString putcode(char32_t codepoint);
// This can be used to convert a codepoint string into a
// UTF8-string.
static std::string to_utf8(const CodepointString &cps);
// This can be used to convert UTF8 to a codepoint string.
// Some of the bytes may not be consumed. Returns the Codepoint
// string and the number of bytes consumed.
static CodepointString from_utf8(std::string_view source, int *consumed);
};
#endif // READLINE_HPP