From bd389c7815931343d26d0316f5bc9de4fc8364be Mon Sep 17 00:00:00 2001 From: jyelon Date: Mon, 25 Apr 2022 13:43:11 -0400 Subject: [PATCH] Can now parse HTTP responses. --- luprex/core/cpp/http.cpp | 692 ++++++++++++++++++++++++++++++++------- luprex/core/cpp/http.hpp | 112 +++++-- luprex/core/cpp/util.cpp | 78 +++++ luprex/core/cpp/util.hpp | 37 ++- 4 files changed, 766 insertions(+), 153 deletions(-) diff --git a/luprex/core/cpp/http.cpp b/luprex/core/cpp/http.cpp index 11850670..5042701c 100644 --- a/luprex/core/cpp/http.cpp +++ b/luprex/core/cpp/http.cpp @@ -1,3 +1,8 @@ +// +// Things to worry about: +// Expect: 100-Continue + + #include "http.hpp" #include "wrap-sstream.hpp" @@ -7,45 +12,192 @@ #include -static void url_encode(const eng::string &value, StreamBuffer *sb) { +using string_view = std::string_view; + +bool words_separated_by_dashes(string_view v) { + while (true) { + if (!sv::ascii_isalpha(sv::zfront(v))) return false; + v.remove_prefix(1); + while (sv::ascii_isalnum(sv::zfront(v))) v.remove_prefix(1); + if (v.empty()) return true; + if (sv::zfront(v) != '-') return false; + v.remove_prefix(1); + } +} + +// Technically, this is a true, correct URL encode routine. +static eng::string url_encode_param(string_view value) { + eng::ostringstream result; const char *hexdigits = "0123456789ABCDEF"; for (int i = 0; i < int(value.size()); i++) { char c = value[i]; - if (sv::ascii_isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || (c == '/')) { - sb->write_char(c); + if (sv::ascii_isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') { + result << c; } else if (c == ' ') { - sb->write_char('+'); + result << '+'; } else { - sb->write_char('%'); - sb->write_char(hexdigits[c>>4]); - sb->write_char(hexdigits[c&15]); + result << '%' << hexdigits[c>>4] << hexdigits[c&15]; } } + return result.str(); +} + +// This URL encode routine leaves slashes intact. That's not +// technically correct, but it's really what you want for paths. +static eng::string url_encode_path(string_view value) { + eng::ostringstream result; + const char *hexdigits = "0123456789ABCDEF"; + for (int i = 0; i < int(value.size()); i++) { + char c = value[i]; + + if (sv::ascii_isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || c == '/') { + result << c; + } else if (c == ' ') { + result << '+'; + } else { + result << '%' << hexdigits[c>>4] << hexdigits[c&15]; + } + } + return result.str(); +} + +static eng::string url_decode(string_view eurl) { + eng::ostringstream result; + int i = 0; + int len = eurl.size(); + while (i < len) { + char c = eurl[i]; + if (c == '+') { + result << ' '; + i += 1; + } else if ((c == '%') && (i + 2 < len)) { + std::string_view code = eurl.substr(i + 1, 2); + uint64_t value = sv::to_hex64(code); + if (value > 255) { + result << '?'; + } else { + result << char(value); + } + i += 3; + } else { + result << c; + i += 1; + } + } + return result.str(); +} + +static void send_encoded_path(std::string_view path, const UrlParameters ¶ms, StreamBuffer *sb) { + sb->write_bytes(url_encode_path(path)); + bool first_param = true; + for (const auto &pair : params) { + sb->write_char(first_param ? '?' : '&'); + sb->write_bytes(url_encode_param(pair.first)); + sb->write_char('='); + sb->write_bytes(url_encode_param(pair.second)); + first_param = false; + } } -class ErrorStringStream : public eng::ostringstream { -private: - eng::string *target_; +static void send_host_and_port(std::string_view host, int port, StreamBuffer *sb) { + sb->write_bytes(host); + if (port != 0) { + sb->write_char(':'); + sb->ostream() << port; + } +} + +// In a properly-formed url, the hostname and path are url encoded. +// This parser expects an encoded URL. + +struct ParsedURL { public: - ErrorStringStream(eng::string *target) : target_(target) {} - ~ErrorStringStream() { - if (target_->empty()) { - (*target_) = str(); + bool valid; + eng::string proto; + eng::string host; + int port; + eng::string path; + UrlParameters params; + +public: + void clear() { + valid = false; + proto.clear(); + host.clear(); + port = 0; + path.clear(); + params.clear(); + } + + eng::string str() { + StreamBuffer sb; + sb.write_bytes(proto); + sb.write_bytes("://"); + send_host_and_port(host, port, &sb); + send_encoded_path(path, params, &sb); + return eng::string(sb.view()); + } + + ParsedURL(std::string_view url) { + clear(); + + proto = util::ascii_tolower(sv::read_to_sep(url, ':')); + if (!sv::has_prefix(url, "//")) { clear(); return; } + url.remove_prefix(2); + if (!words_separated_by_dashes(proto)) { clear(); return; } + + // Extract the host and port as a single string. + string_view turl = url; + string_view hostport = sv::read_to_sep(turl, '/'); + url.remove_prefix(hostport.size()); + + // Split the host and port from each other and parse them. + host = util::ascii_tolower(sv::read_to_sep(hostport, ':')); + if (host.empty()) { clear(); return; } + if (!hostport.empty()) { + int64_t iport = sv::to_int64(hostport); + if ((iport < 1) || (iport > 65535)) { + clear(); return; + } + port = iport; } + + // Split off the path. + path = url_decode(sv::read_to_sep(url, '?')); + if (path.empty()) { + path = "/"; + } + + // Process url parameters. + while (!sv::isnull(url)) { + std::string_view keyval = sv::read_to_sep(url, '&'); + if (keyval.empty()) { clear(); return; } + std::string_view key = sv::read_to_sep(keyval, '='); + if (key.empty()) { clear(); return; } + if (sv::isnull(keyval)) { clear(); return; } + eng::string dkey = url_decode(key); + eng::string dval = url_decode(keyval); + params[dkey] = dval; + } + + // If we made it here, we have a valid URL + valid = true; } }; -HttpRequest::HttpRequest() { +HttpOutRequest::HttpOutRequest() { verify_certificate_ = true; port_ = 0; } -void HttpRequest::set_verify_certificate(bool flag) { - verify_certificate_ = flag; +void HttpOutRequest::fail(string_view s) { + if (error_.empty()) { + error_ = s; + } } -eng::string HttpRequest::target() const { +eng::string HttpOutRequest::target() const { assert(check().empty()); eng::ostringstream oss; oss << (verify_certificate_ ? "cert" : "nocert"); @@ -53,151 +205,154 @@ eng::string HttpRequest::target() const { return oss.str(); } -void HttpRequest::set_method(const eng::string &s) { +void HttpOutRequest::set_verify_certificate(bool flag) { + verify_certificate_ = flag; +} + +void HttpOutRequest::set_method(const eng::string &s) { eng::string method = util::ascii_toupper(s); if ((method != "GET") && (method != "HEAD")) { - ErrorStringStream error(&error_); - error << "HTTPS method not implemented: " << method; - error << ". Currently, only HEAD and GET are implemented."; + fail(util::ss("HTTP method not implemented: ", method, ".", + "Currently, only HEAD and GET are implemented.")); return; } if ((!method_.empty()) && (method_ != method)) { - ErrorStringStream error(&error_); - error << "HTTPS method specified twice: " << method_ << " and " << method; + fail(util::ss("HTTP method specified twice: ", method_, " and ", method)); return; } method_ = method; } -void HttpRequest::set_host(const eng::string &s) { +void HttpOutRequest::set_host(const eng::string &s) { eng::string host = util::ascii_tolower(s); if (host.empty()) { - ErrorStringStream error(&error_); - error << "HTTPS hostname cannot be empty string."; + fail(util::ss("HTTP hostname cannot be empty string.")); return; } // This is not quite strict, but it's close. I believe // the DNS lookup will fail for invalid hostnames anyway. for (char c : host) { if ((c != '-') && (c != '.') && (!sv::ascii_isalnum(c))) { - ErrorStringStream error(&error_); - error << "HTTPS hostnames can only contain letters, digits, and hyphen: " << host; + fail(util::ss("HTTP hostnames can only contain letters, digits, and hyphen: ", host)); return; } } if (!host_.empty()) { - ErrorStringStream error(&error_); - error << "HTTPS hostname specified twice: " << host_ << " and " << host; + fail(util::ss("HTTP hostname specified twice: ", host_, " and ", host)); return; } host_ = host; } -void HttpRequest::set_port(int port) { +void HttpOutRequest::set_port(int port) { if ((port < 1) || (port > 65535)) { - ErrorStringStream error(&error_); - error << "HTTP port must be between 1 and 65535: " << port; + fail(util::ss("HTTP port must be between 1 and 65535: ", port)); return; } if (port_ != 0) { - ErrorStringStream error(&error_); - error << "HTTPS port specified twice: " << port_ << " and " << port; + fail(util::ss("HTTP port specified twice: ", port_, " and ", port)); return; } port_ = port; } -void HttpRequest::set_url(const eng::string &url) { - if (sv::has_prefix(url, "https://")) { - ErrorStringStream error(&error_); - error << "set_url(full_url) not implemented yet."; - return; - } else if (sv::has_prefix(url, "/")) { - if (!path_.empty()) { - ErrorStringStream error(&error_); - error << "HTTP path specified twice: " << path_ << " and " << url; - return; - } - path_ = url; - } else { - ErrorStringStream error(&error_); - error << "HTTP url must start with https://, or with /"; +void HttpOutRequest::set_path(string_view path) { + if (!sv::has_prefix(path, "/")) { + fail(util::ss("HTTP path must start with slash")); return; } + if (!path_.empty()) { + fail(util::ss("HTTP path specified twice: ", path_, " and ", path)); + return; + } + path_ = path; } -void HttpRequest::set_param(const eng::string &key, const eng::string &val) { +void HttpOutRequest::set_param(const eng::string &key, const eng::string &val) { if (params_.find(key) != params_.end()) { - ErrorStringStream error(&error_); - error << "HTTP url parameter specified twice: " << key; + fail(util::ss("HTTP url parameter specified twice: ", key)); + return; + } + if (key.empty()) { + fail(util::ss("HTTP parameter key cannot be empty")); return; } params_[key] = val; } -void HttpRequest::set_verify_certificate(LuaStack &LS, LuaSlot val) { +void HttpOutRequest::set_url(string_view url) { + ParsedURL parsed_url(url); + if (!parsed_url.valid) { + fail(util::ss("syntactically invalid URL: ", url)); + return; + } + if (parsed_url.proto != "https") { + fail(util::ss("unsupported protocol: ", parsed_url.proto)); + return; + } + set_host(parsed_url.host); + if (parsed_url.port) set_port(parsed_url.port); + set_path(parsed_url.path); + for (const auto &pair : parsed_url.params) { + set_param(pair.first, pair.second); + } +} + +void HttpOutRequest::set_verify_certificate(LuaStack &LS, LuaSlot val) { if (!LS.isboolean(val)) { - ErrorStringStream error(&error_); - error << "HTTP verify_certificate must be a boolean"; + fail(util::ss("HTTP verify_certificate must be a boolean")); return; } set_verify_certificate(LS.ckboolean(val)); } -void HttpRequest::set_method(LuaStack &LS, LuaSlot val) { +void HttpOutRequest::set_method(LuaStack &LS, LuaSlot val) { if (!LS.isstring(val)) { - ErrorStringStream error(&error_); - error << "HTTP method must be a string"; + fail(util::ss("HTTP method must be a string")); return; } set_method(LS.ckstring(val)); } -void HttpRequest::set_host(LuaStack &LS, LuaSlot val) { +void HttpOutRequest::set_host(LuaStack &LS, LuaSlot val) { if (!LS.isstring(val)) { - ErrorStringStream error(&error_); - error << "HTTP host must be a string"; + fail(util::ss("HTTP host must be a string")); return; } set_host(LS.ckstring(val)); } -void HttpRequest::set_port(LuaStack &LS, LuaSlot val) { +void HttpOutRequest::set_port(LuaStack &LS, LuaSlot val) { if (!LS.isint(val)) { - ErrorStringStream error(&error_); - error << "HTTP port must be an int"; + fail(util::ss("HTTP port must be an int")); return; } set_port(LS.ckint(val)); } -void HttpRequest::set_url(LuaStack &LS, LuaSlot val) { +void HttpOutRequest::set_path(LuaStack &LS, LuaSlot val) { if (!LS.isstring(val)) { - ErrorStringStream error(&error_); - error << "HTTP url must be a string"; + fail(util::ss("HTTP path must be a string")); return; } - set_url(LS.ckstring(val)); + set_path(LS.ckstring(val)); } -void HttpRequest::set_param(LuaStack &LS, LuaSlot key, LuaSlot val) { +void HttpOutRequest::set_param(LuaStack &LS, LuaSlot key, LuaSlot val) { if (!LS.isstring(key)) { - ErrorStringStream error(&error_); - error << "HTTP url parameter key must be a string"; + fail(util::ss("HTTP url parameter key must be a string")); return; } if (!LS.isstring(val)) { - ErrorStringStream error(&error_); - error << "HTTP url parameter val must be a string"; + fail(util::ss("HTTP url parameter val must be a string")); return; } set_param(LS.ckstring(key), LS.ckstring(val)); } -void HttpRequest::set_params(LuaStack &LS0, LuaSlot tab) { +void HttpOutRequest::set_params(LuaStack &LS0, LuaSlot tab) { if (!LS0.istable(tab)) { - ErrorStringStream error(&error_); - error << "HTTP params must be a table"; + fail(util::ss("HTTP params must be a table")); return; } LuaVar key, val; @@ -208,7 +363,15 @@ void HttpRequest::set_params(LuaStack &LS0, LuaSlot tab) { } } -void HttpRequest::set_defaults() { +void HttpOutRequest::set_url(LuaStack &LS, LuaSlot val) { + if (!LS.isstring(val)) { + fail(util::ss("HTTP url must be a string")); + return; + } + set_url(LS.ckstring(val)); +} + +void HttpOutRequest::set_defaults() { if (method_.empty()) { method_ = "GET"; } @@ -217,7 +380,7 @@ void HttpRequest::set_defaults() { } } -void HttpRequest::set_config(LuaStack &LS0, LuaSlot tab) { +void HttpOutRequest::set_config(LuaStack &LS0, LuaSlot tab) { LuaVar key, val; LuaStack LS(LS0.state(), key, val); LS.set(key, LuaNil); @@ -230,23 +393,25 @@ void HttpRequest::set_config(LuaStack &LS0, LuaSlot tab) { set_host(LS, val); } else if (kstr == "port") { set_port(LS, val); - } else if (kstr == "url") { - set_url(LS, val); + } else if (kstr == "path") { + set_path(LS, val); + } else if (kstr == "encodedpath") { + set_path(LS, val); } else if (kstr == "params") { set_params(LS, val); + } else if (kstr == "url") { + set_url(LS, val); } else if (kstr == "verifycertificate") { set_verify_certificate(LS, val); } else if (kstr == "") { - ErrorStringStream error(&error_); - error << "HTTP config parameter names must be strings."; + fail(util::ss("HTTP config parameter names must be strings.")); } else { - ErrorStringStream error(&error_); - error << "HTTP unrecognized config parameter: " << kstr; + fail(util::ss("HTTP unrecognized config parameter: ", kstr)); } } } -eng::string HttpRequest::check() const { +eng::string HttpOutRequest::check() const { if (!error_.empty()) { return error_; } @@ -265,7 +430,7 @@ eng::string HttpRequest::check() const { return ""; } -void HttpRequest::send_internal(StreamBuffer *sb, bool debug_string) const { +void HttpOutRequest::send_internal(StreamBuffer *sb, bool debug_string) const { // If there's an error in the request, handle it. In debug string mode, // we just put the error into the output. In production mode, we assert // fail. @@ -285,23 +450,13 @@ void HttpRequest::send_internal(StreamBuffer *sb, bool debug_string) const { // Send the command. sb->write_bytes(method_); sb->write_char(' '); - url_encode(path_, sb); - bool first_param = true; - for (const auto &pair : params_) { - sb->write_char(first_param ? '?' : '&'); - url_encode(pair.first, sb); - sb->write_char('='); - url_encode(pair.second, sb); - first_param = false; - } + send_encoded_path(path_, params_, sb); sb->write_bytes(" HTTP/1.1"); sb->write_bytes(linebreak); // Send the host header. sb->write_bytes("Host: "); - sb->write_bytes(host_); - sb->write_char(':'); - sb->ostream() << port_; + send_host_and_port(host_, port_, sb); sb->write_bytes(linebreak); // The empty accept-encoding header notifies the @@ -320,50 +475,335 @@ void HttpRequest::send_internal(StreamBuffer *sb, bool debug_string) const { } } -eng::string HttpRequest::DebugString() { +eng::string HttpOutRequest::DebugString() { StreamBuffer sb; send_internal(&sb, true); return eng::string(sb.view()); } -HttpResponse::HttpResponse() { - response_code_ = 0; +HttpInResponse::HttpInResponse() { + status_code_ = 0; response_length_ = 0; - mime_type_ = "application/empty"; + mime_type_ = ""; + content_length_ = -1; } -void HttpResponse::fail(int response_code, const eng::string &error) { - response_code_ = response_code; - error_ = error; - response_length_ = 0; - mime_type_ = "application/empty"; +eng::string HttpInResponse::DebugString() const { + eng::ostringstream oss; + oss << "HttpInResponse:" << std::endl; + oss << " status_code: " << status_code_ << std::endl; + oss << " error: " << error_ << std::endl; + oss << " content_length: " << content_length_ << std::endl; + oss << " transfer_encoding: " << transfer_encoding_ << std::endl; + oss << " location: " << location_ << std::endl; + oss << " mime_type: " << mime_type_ << std::endl; + oss << " charset: " << charset_ << std::endl; + oss << " content: " << content_ << std::endl; + oss << " response_length: " << response_length_ << std::endl; + return oss.str(); +} + +void HttpInResponse::fail(int code, string_view message) { + status_code_ = code; + error_ = message; + mime_type_ = ""; + charset_ = ""; content_ = ""; } -void HttpResponse::parse(const StreamBuffer *sb) { +void HttpInResponse::incomplete(bool closed) { + if (closed) { + fail(500, "response truncated"); + } else { + fail(0, "response not yet fully received"); + } +} + +void HttpInResponse::parse_content_encoding(string_view value) { + content_encoding_ = util::ascii_tolower(value); +} + +void HttpInResponse::parse_content_length(string_view value) { + int64_t code = sv::to_int64(value); + if ((code < 0) || (code > INT_MAX)) { + fail(500, util::ss("unparseable content-length: ", value)); + } + content_length_ = code; +} + +void HttpInResponse::parse_content_type(string_view value) { + eng::string ctype = util::ascii_tolower(value); + string_view ctview(ctype); + mime_type_ = sv::trim(sv::read_to_sep(ctview, ';')); + if (mime_type_.empty()) { + fail(500, util::ss("unparseable content-type: ", value)); + return; + } + while (true) { + string_view feature = sv::trim(sv::read_to_sep(ctview, ';')); + if (feature.empty()) { + return; + } + string_view ftype = sv::trim(sv::read_to_sep(feature, '=')); + if (ftype == "charset") { + charset_ = sv::trim(feature); + } + } +} + +void HttpInResponse::parse_location(string_view value) { + location_ = url_decode(value); +} + +void HttpInResponse::parse_transfer_encoding(string_view value) { + transfer_encoding_ = util::ascii_tolower(value); +} + +void HttpInResponse::parse_header(string_view header, string_view value) { + if (header == "content-encoding") { + parse_content_encoding(value); + } else if (header == "content-length") { + parse_content_length(value); + } else if (header == "content-type") { + parse_content_type(value); + } else if (header == "location") { + parse_location(value); + } else if (header == "transfer-encoding") { + parse_transfer_encoding(value); + } else if (header == "content-range") { + fail(500, util::ss("unsupported response header: ", header)); + } +} + +bool HttpInResponse::parse_content_basic(std::string_view &view, bool closed) { + if (content_length_ >= 0) { + if (content_length_ > MAX_CONTENT_LENGTH) { + fail(500, "content too long"); + return false; + } + if (int(view.size()) < content_length_) { + incomplete(closed); + return false; + } + content_ = sv::read_nbytes(view, content_length_); + } else { + if (int64_t(view.size()) > MAX_CONTENT_LENGTH) { + fail(500, "content too long"); + return false; + } + if (!closed) { + incomplete(closed); + return false; + } + content_ = sv::read_nbytes(view, view.size()); + } + return true; +} + +bool HttpInResponse::parse_content_chunked(std::string_view &view, bool closed) { + int64_t total_size = 0; + std::vector chunks; + while (true) { + std::string_view chunk_header = sv::trim(sv::read_to_line(view)); + if (sv::isnull(view)) { + incomplete(closed); + return false; + } + int64_t chunk_size = sv::to_hex64(chunk_header, -1); + if (chunk_size < 0) { + fail(500, "unparseable chunk header"); + return false; + } + if (chunk_size > MAX_CONTENT_LENGTH) { + fail(500, "content too long"); + return false; + } + if (chunk_size == 0) break; + total_size += chunk_size; + if (total_size > MAX_CONTENT_LENGTH) { + fail(500, "content too long"); + return false; + } + std::string_view chunk = sv::read_nbytes(view, chunk_size); + if (int64_t(chunk.size()) != chunk_size) { + incomplete(closed); + return false; + } + std::string_view newline = sv::read_to_line(view); + if (!newline.empty()) { + fail(500, "corrupted chunk encoding"); + return false; + } + if (sv::isnull(view)) { + incomplete(closed); + return false; + } + chunks.push_back(chunk); + } + content_.resize(total_size); + size_t offset = 0; + for (string_view chunk : chunks) { + content_.replace(offset, chunk.size(), chunk); + offset += chunk.size(); + } + return true; +} + +void HttpInResponse::parse(const StreamBuffer *sb, bool closed) { // We're not going to modify the StreamBuffer at all. // Instead, we work entirely on a view. - std::string_view view = sb->view(); + string_view view = sb->view(); - // Special case this. - if (view.empty()) { - fail(500, "HTTP server response completely empty"); + // Get the status line. + string_view status = sv::trim(sv::read_to_line(view)); + if (sv::isnull(view)) { + incomplete(closed); return; } // Parse the status line. - std::string_view status = sv::read_to_line(view); - if (status.empty()) { - fail(500, "HTTP status-line not present in response"); + string_view scode = sv::read_to_space(status); + int64_t code = sv::to_int64(scode, 0); + if ((code < 100) || (code > 599)) { + fail(500, util::ss("protocol error: invalid response code: ", scode)); + } + status_code_ = code; + + // Responses outside the range 200-299 are errors, + // and therefore must store an error message. + if ((code < 200) || (code > 299)) { + error_ = status; + if (error_.empty()) { + fail(code, util::ss("error code ", code)); + } + } + + // Parse the headers. + while (true) { + string_view header = sv::read_to_line(view); + if (sv::isnull(view)) { + incomplete(closed); + return; + } + if (header.empty()) { + break; + } + eng::string command = util::ascii_tolower(sv::trim(sv::read_to_sep(header, ':'))); + if (sv::isnull(header)) { + fail(500, util::ss("protocol error: no colon in header line: ", command)); + return; + } + if (!words_separated_by_dashes(command)) { + fail(500, util::ss("protocol error: invalid header: ", command)); + return; + } + parse_header(command, sv::trim(header)); + } + + // Process the content using the transfer encoding. + if (transfer_encoding_ == "") { + if (!parse_content_basic(view, closed)) return; + } else if (transfer_encoding_ == "chunked") { + if (!parse_content_chunked(view, closed)) return; + } else { + fail(500, util::ss("unsupported transfer-encoding: ", transfer_encoding_)); return; } - //std::string_view status_code = util::sv_split_one(status, ' '); - + // Calculate the response length. + response_length_ = sb->fill() - view.size(); + // If it's not a redirect, disallow 'location'. + if ((status_code_ < 300) || (status_code_ > 399)) { + if (!location_.empty()) { + fail(500, util::ss("redirect specified, but result code not 300-399: ", code)); + return; + } + } + // If the server didn't specify content-type, make a guess. + if (mime_type_.empty()) { + if (sv::valid_utf8(content_)) { + mime_type_ = "text/plain"; + charset_ = "utf-8"; + } else { + mime_type_ = "application/octet-stream"; + charset_ = ""; + } + } + + // If it's multipart, reject it. + if (sv::has_prefix(mime_type_, "multipart/")) { + fail(500, "multipart messages not implemented"); + return; + } + + // If it's text, demand a reasonable charset. Otherwise, + // ignore the charset. + if (sv::has_prefix(mime_type_, "text/")) { + if (charset_.empty()) { + charset_ = "utf-8"; + } + if (charset_ != "utf-8") { + fail(500, util::ss("charset not supported: ", charset_)); + return; + } + } else { + charset_.clear(); + } + + // Uncompress the content. + if ((content_encoding_ == "") || (content_encoding_ == "identity")) { + } else { + fail(500, util::ss("content-encoding not supported: ", content_encoding_)); + return; + } + + // If there's an error code, throw out the content. + if ((status_code_ < 200) || (status_code_ > 299)) { + mime_type_.clear(); + charset_.clear(); + content_.clear(); + } } +void HttpInResponse::store(LuaStack &LS0, LuaSlot tab) { + LuaStack LS(LS0.state()); + + LS.newtable(tab); + LS.rawset(tab, "responsecode", status_code_); + if (!error_.empty()) { + LS.rawset(tab, "error", error_); + } + if (!location_.empty()) { + LS.rawset(tab, "location", location_); + } + if (!mime_type_.empty()) { + LS.rawset(tab, "mimetype", mime_type_); + LS.rawset(tab, "content", content_); + } + + // Debugging fields. Do not use for lua programming. + LS.rawset(tab, "dbg-content-length", content_length_); + LS.rawset(tab, "dbg-transfer-encoding", transfer_encoding_); + LS.rawset(tab, "dbg-charset", charset_); + LS.rawset(tab, "dbg-response-length", response_length_); +} + +LuaDefine(http_fixurl, "url", "validate URL and repair minor flaws in the URL syntax") { + LuaArg url; + LuaRet fixed; + LuaStack LS(L, url, fixed); + ParsedURL parsed(LS.ckstring(url)); + if (!parsed.valid) { + luaL_error(L, "invalid URL, not fixable"); + return LS.result(); + } + LS.set(fixed, parsed.str()); + return LS.result(); +} + + LuaDefine(http_request, "reqtab", "|Given an HTTP request in the form of a table, returns the same " "|request as a string, to assist with debugging." @@ -397,7 +837,7 @@ LuaDefine(http_request, "reqtab", LuaArg tab; LuaRet str; LuaStack LS(L, tab, str); - HttpRequest req; + HttpOutRequest req; req.set_config(LS, tab); req.set_defaults(); eng::string error = req.check(); @@ -409,3 +849,15 @@ LuaDefine(http_request, "reqtab", return LS.result(); } +LuaDefine(http_response, "text", "") { + LuaArg text; + LuaRet tab; + LuaStack LS(L, text, tab); + HttpInResponse resp; + StreamBuffer sb; + sb.write_bytes(LS.ckstring(text)); + resp.parse(&sb, true); + resp.store(LS, tab); + return LS.result(); +} + diff --git a/luprex/core/cpp/http.hpp b/luprex/core/cpp/http.hpp index 9df944c4..8f274d8f 100644 --- a/luprex/core/cpp/http.hpp +++ b/luprex/core/cpp/http.hpp @@ -20,7 +20,9 @@ #include "luastack.hpp" #include "streambuffer.hpp" -class HttpRequest : public eng::nevernew { +using UrlParameters = eng::map; + +class HttpOutRequest : public eng::nevernew { private: // If the request contains an error, the error // message is stored here. @@ -40,24 +42,26 @@ private: // Port number. int port_; - // The path is always UTF-8. This field should not be urlencoded. - // Instead, urlencoding is done automatically when the request - // is sent. Should not include protocol, host, port, or parameters. + // You may specify either path or encoded_path. + // The path is not url-encoded, and must not include URL parameters. eng::string path_; // If params is nonempty, then we will add URL parameters // to the URL. The contents of the params field should not be // urlencoded, the urlencoding is done automatically when the - // request is sent. - eng::map params_; + // request is sent. If you specify encoded_path, then the + // params must be empty, because the encoded path already contains + // the params. + UrlParameters params_; private: - void send_internal(StreamBuffer *target, bool debug_string) const; + void fail(std::string_view error); + void send_internal(StreamBuffer *target, bool debug_string) const; public: // Construct an empty HTTP request. // All of the fields have empty values. - HttpRequest(); + HttpOutRequest(); // Get fields. const eng::string &error() const { return error_; } @@ -80,17 +84,19 @@ public: void set_method(const eng::string &method); void set_host(const eng::string &host); void set_port(int port); - void set_url(const eng::string &url); + void set_path(std::string_view path); void set_param(const eng::string &key, const eng::string &value); - + void set_url(std::string_view url); + // Same as above, but using Lua values. void set_verify_certificate(LuaStack &LS, LuaSlot val); void set_method(LuaStack &LS, LuaSlot val); void set_host(LuaStack &LS, LuaSlot val); void set_port(LuaStack &LS, LuaSlot val); - void set_url(LuaStack &LS, LuaSlot val); + void set_path(LuaStack &LS, LuaSlot path); void set_param(LuaStack &LS, LuaSlot key, LuaSlot val); void set_params(LuaStack &LS, LuaSlot tab); + void set_url(LuaStack &LS, LuaSlot val); // Set default values for any fields that should have // defaults. This must be done after setting regular @@ -111,48 +117,94 @@ public: eng::string DebugString(); }; -class HttpResponse { +class HttpInResponse { private: - // The HTTP response code. - int response_code_; - + // The HTTP response status code. + int status_code_; + // If the HTTP response contains an error, the // error message is stored here. If the HTTP response // is a success such as "200 OK" or "201 Created", this // is the empty string, not "OK" or "Created". eng::string error_; - // The length in bytes of the entire response. - // May be zero, which means that the response - // was so garbled that we couldn't determine the length. - bool response_length_; - + // Only if content-length header present, otherwise, -1. + int64_t content_length_; + + // If empty, it means there was no transfer-encoding header. + eng::string transfer_encoding_; + + // If empty, it means there was no content-encoding header. + eng::string content_encoding_; + + // Only if location header present. + eng::string location_; + // MIME type of the content. eng::string mime_type_; + // Charset of the content. Hopefully utf-8. + eng::string charset_; + // The content as string. eng::string content_; + // The length in bytes of the entire response. + // May be zero, which means that the response + // was so garbled that we couldn't determine the length. + int response_length_; + private: + // Store a message indicating that we haven't received enough + // bytes yet. If the connection is closed and we still haven't + // received enough bytes, that's a fatal error. + void incomplete(bool closed); + // Parse a response header. Most headers are ignored. + // If the header contains an error, the error is stored. + void parse_header(std::string_view header, std::string_view value); + + // Parse specific headers. + // For several headers, all we do is verify that they aren't + // invoking unsupported features. + void parse_content_encoding(std::string_view value); + void parse_content_length(std::string_view value); + void parse_content_type(std::string_view value); + void parse_location(std::string_view value); + void parse_transfer_encoding(std::string_view value); + + // parse the body + bool parse_content_basic(std::string_view &view, bool closed); + bool parse_content_chunked(std::string_view &view, bool closed); public: + const int64_t MAX_CONTENT_LENGTH = 1000000; + // Construct a blank response. - HttpResponse(); + HttpInResponse(); - // Store an error message. This is used when the client detects an error, + // Store a result code and an error message, and clear the content. + // This is generally used when the client detects an error, // such as a DNS lookup fail, a connection failed, an SSL negotiation - // failed, or the like. Clears the content, leaving only the error - // and response code. - void fail(int response_code, const eng::string &error); + // failed, or the like. + void fail(int status_code, std::string_view error); - // Parse the HTTP response. Note that the response is not - // removed from the StreamBuffer, which is always unmodified. - // If you want to remove the response from the StreamBuffer, see - // response_length. - void parse(const StreamBuffer *sb); + // Parse the HTTP response. The closed flag is to be set to true if the + // remote has closed the connection. + // + // If the request is incomplete, generates a 600 incomplete error. In that + // case, loading more data from the server might improve the situation. + // + // Note that the response is not ever removed from the StreamBuffer, which + // is always unmodified. If you want to remove the response from the + // StreamBuffer, see response_length. + // + void parse(const StreamBuffer *sb, bool closed); // Convert the HTTP response to a lua table. void store(LuaStack &LS, LuaSlot tab); + + // Convert to a debug string. + eng::string DebugString() const; }; #endif // HTTP_HPP diff --git a/luprex/core/cpp/util.cpp b/luprex/core/cpp/util.cpp index 47343748..baf21132 100644 --- a/luprex/core/cpp/util.cpp +++ b/luprex/core/cpp/util.cpp @@ -33,6 +33,15 @@ bool valid_int64(string_view value) { return true; } +bool valid_hex64(string_view value) { + int64_t result; + const char *last = value.data() + value.size(); + auto r = std::from_chars(value.data(), last, result, 16); + if (r.ec != std::errc()) return false; + if (r.ptr != last) return false; + return true; +} + bool valid_double(string_view value) { double result; const char *last = value.data() + value.size(); @@ -51,6 +60,15 @@ int64_t to_int64(string_view value, int64_t errval) { return result; } +uint64_t to_hex64(string_view value, uint64_t errval) { + uint64_t result; + const char *last = value.data() + value.size(); + auto r = std::from_chars(value.data(), last, result, 16); + if (r.ec != std::errc()) return errval; + if (r.ptr != last) return errval; + return result; +} + double to_double(string_view value, double errval) { double result; const char *last = value.data() + value.size(); @@ -192,6 +210,65 @@ string_view read_to_space(string_view &source) { return result; } +string_view read_nbytes(string_view &source, int nbytes) { + if (nbytes < 0) nbytes = 0; + if (nbytes > int(source.size())) nbytes = source.size(); + string_view result = source.substr(0, nbytes); + source = source.substr(nbytes); + return result; +} + +bool valid_utf8(string_view s) +{ + const unsigned char *bytes = (const unsigned char *)s.data(); + const unsigned char *tail = bytes + s.size(); + unsigned int codepoint; + int seqlen; + + while (bytes < tail) { + if ((bytes[0] & 0x80) == 0x00) { + // U+0000 to U+007F + codepoint = (bytes[0] & 0x7F); + seqlen = 1; + } else if ((bytes[0] & 0xE0) == 0xC0) { + // U+0080 to U+07FF + codepoint = (bytes[0] & 0x1F); + seqlen = 2; + } else if ((bytes[0] & 0xF0) == 0xE0) { + // U+0800 to U+FFFF + codepoint = (bytes[0] & 0x0F); + seqlen = 3; + } else if ((bytes[0] & 0xF8) == 0xF0) { + // U+10000 to U+10FFFF + codepoint = (bytes[0] & 0x07); + seqlen = 4; + } else { + return false; + } + + if (bytes + seqlen > tail) { + return false; + } + + for (int i = 1; i < seqlen; ++i) { + if ((bytes[i] & 0xC0) != 0x80) return false; + codepoint = (codepoint << 6) | (bytes[i] & 0x3F); + } + + if ((codepoint > 0x10FFFF) || + ((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) || + ((codepoint <= 0x007F) && (seqlen != 1)) || + ((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) || + ((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) || + ((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) { + return false; + } + + bytes += seqlen; + } + return true; +} + } // namespace sv @@ -450,6 +527,7 @@ eng::string XYZ::debug_string() const { return oss.str(); } + } // namespace util std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) { diff --git a/luprex/core/cpp/util.hpp b/luprex/core/cpp/util.hpp index 0ea0b96a..ff7e9fb7 100644 --- a/luprex/core/cpp/util.hpp +++ b/luprex/core/cpp/util.hpp @@ -51,12 +51,14 @@ inline bool ascii_isspace(char c) { return (c==' ')||(c=='\t')||(c=='\r')||(c==' inline bool isnull(string_view v) { return v.data() == nullptr; } // Check if numbers can be parsed as int64/double -bool valid_int64(string_view v); bool valid_double(string_view v); +bool valid_int64(string_view v); +bool valid_hex64(string_view v); -// Parse numbers as int64/double. Returns errval on failure. -int64_t to_int64(string_view v, int64_t errval = std::numeric_limits::min()); +// Parse numbers as int32, int64, or double. Returns errval on failure. double to_double(string_view v, double errval = std::numeric_limits::quiet_NaN()); +int64_t to_int64(string_view v, int64_t errval = std::numeric_limits::min()); +uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits::max()); // Trim whitspace from a string_view. string_view ltrim(string_view v); @@ -81,6 +83,12 @@ bool is_lua_id(string_view s); // Return true if the line of code is a lua comment. bool is_lua_comment(string_view s); +// Return the first character, but if the view is empty, +// return zero. +inline char zfront(string_view &s) { + return s.empty() ? char(0) : s.front(); +} + // Read from a string_view until separator is reached. // // If the separator appears in the source, returns everything @@ -114,6 +122,13 @@ string_view read_to_line(string_view &source); // string_view read_to_space(string_view &source); +// Read up to nbytes from a string_view. +// +string_view read_nbytes(string_view &source, int nbytes); + +// Return true if the string is valid utf-8. +bool valid_utf8(string_view s); + } // namespace sv namespace util { @@ -233,6 +248,22 @@ public: int overflow(int c) { return c; } }; +// send_to_stream: send all arguments to the specified stream. +inline void send_to_stream(std::ostream &os) {} +template +inline void send_to_stream(std::ostream &os, ARG arg, REST & ... rest) { + os << arg; + send_to_stream(os, rest...); +} + +// ss: convert all arguments to a string by sending them to a stringstream. +template +inline eng::string ss(ARGS & ... args) { + eng::ostringstream oss; + send_to_stream(oss, args...); + return oss.str(); +} + } // namespace util std::ostream &operator<<(std::ostream &oss, const util::hex64 &v);