Can now parse HTTP responses.

This commit is contained in:
2022-04-25 13:43:11 -04:00
parent d2c81e640d
commit bd389c7815
4 changed files with 766 additions and 153 deletions

View File

@@ -1,3 +1,8 @@
//
// Things to worry about:
// Expect: 100-Continue
#include "http.hpp" #include "http.hpp"
#include "wrap-sstream.hpp" #include "wrap-sstream.hpp"
@@ -7,45 +12,192 @@
#include <cstdint> #include <cstdint>
static void url_encode(const eng::string &value, StreamBuffer *sb) { using string_view = std::string_view;
bool words_separated_by_dashes(string_view v) {
while (true) {
if (!sv::ascii_isalpha(sv::zfront(v))) return false;
v.remove_prefix(1);
while (sv::ascii_isalnum(sv::zfront(v))) v.remove_prefix(1);
if (v.empty()) return true;
if (sv::zfront(v) != '-') return false;
v.remove_prefix(1);
}
}
// Technically, this is a true, correct URL encode routine.
static eng::string url_encode_param(string_view value) {
eng::ostringstream result;
const char *hexdigits = "0123456789ABCDEF"; const char *hexdigits = "0123456789ABCDEF";
for (int i = 0; i < int(value.size()); i++) { for (int i = 0; i < int(value.size()); i++) {
char c = value[i]; char c = value[i];
if (sv::ascii_isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || (c == '/')) { if (sv::ascii_isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~') {
sb->write_char(c); result << c;
} else if (c == ' ') { } else if (c == ' ') {
sb->write_char('+'); result << '+';
} else { } else {
sb->write_char('%'); result << '%' << hexdigits[c>>4] << hexdigits[c&15];
sb->write_char(hexdigits[c>>4]);
sb->write_char(hexdigits[c&15]);
} }
} }
return result.str();
}
// This URL encode routine leaves slashes intact. That's not
// technically correct, but it's really what you want for paths.
static eng::string url_encode_path(string_view value) {
eng::ostringstream result;
const char *hexdigits = "0123456789ABCDEF";
for (int i = 0; i < int(value.size()); i++) {
char c = value[i];
if (sv::ascii_isalnum(c) || c == '-' || c == '_' || c == '.' || c == '~' || c == '/') {
result << c;
} else if (c == ' ') {
result << '+';
} else {
result << '%' << hexdigits[c>>4] << hexdigits[c&15];
}
}
return result.str();
}
static eng::string url_decode(string_view eurl) {
eng::ostringstream result;
int i = 0;
int len = eurl.size();
while (i < len) {
char c = eurl[i];
if (c == '+') {
result << ' ';
i += 1;
} else if ((c == '%') && (i + 2 < len)) {
std::string_view code = eurl.substr(i + 1, 2);
uint64_t value = sv::to_hex64(code);
if (value > 255) {
result << '?';
} else {
result << char(value);
}
i += 3;
} else {
result << c;
i += 1;
}
}
return result.str();
}
static void send_encoded_path(std::string_view path, const UrlParameters &params, StreamBuffer *sb) {
sb->write_bytes(url_encode_path(path));
bool first_param = true;
for (const auto &pair : params) {
sb->write_char(first_param ? '?' : '&');
sb->write_bytes(url_encode_param(pair.first));
sb->write_char('=');
sb->write_bytes(url_encode_param(pair.second));
first_param = false;
}
} }
class ErrorStringStream : public eng::ostringstream { static void send_host_and_port(std::string_view host, int port, StreamBuffer *sb) {
private: sb->write_bytes(host);
eng::string *target_; if (port != 0) {
sb->write_char(':');
sb->ostream() << port;
}
}
// In a properly-formed url, the hostname and path are url encoded.
// This parser expects an encoded URL.
struct ParsedURL {
public: public:
ErrorStringStream(eng::string *target) : target_(target) {} bool valid;
~ErrorStringStream() { eng::string proto;
if (target_->empty()) { eng::string host;
(*target_) = str(); int port;
eng::string path;
UrlParameters params;
public:
void clear() {
valid = false;
proto.clear();
host.clear();
port = 0;
path.clear();
params.clear();
}
eng::string str() {
StreamBuffer sb;
sb.write_bytes(proto);
sb.write_bytes("://");
send_host_and_port(host, port, &sb);
send_encoded_path(path, params, &sb);
return eng::string(sb.view());
}
ParsedURL(std::string_view url) {
clear();
proto = util::ascii_tolower(sv::read_to_sep(url, ':'));
if (!sv::has_prefix(url, "//")) { clear(); return; }
url.remove_prefix(2);
if (!words_separated_by_dashes(proto)) { clear(); return; }
// Extract the host and port as a single string.
string_view turl = url;
string_view hostport = sv::read_to_sep(turl, '/');
url.remove_prefix(hostport.size());
// Split the host and port from each other and parse them.
host = util::ascii_tolower(sv::read_to_sep(hostport, ':'));
if (host.empty()) { clear(); return; }
if (!hostport.empty()) {
int64_t iport = sv::to_int64(hostport);
if ((iport < 1) || (iport > 65535)) {
clear(); return;
}
port = iport;
} }
// Split off the path.
path = url_decode(sv::read_to_sep(url, '?'));
if (path.empty()) {
path = "/";
}
// Process url parameters.
while (!sv::isnull(url)) {
std::string_view keyval = sv::read_to_sep(url, '&');
if (keyval.empty()) { clear(); return; }
std::string_view key = sv::read_to_sep(keyval, '=');
if (key.empty()) { clear(); return; }
if (sv::isnull(keyval)) { clear(); return; }
eng::string dkey = url_decode(key);
eng::string dval = url_decode(keyval);
params[dkey] = dval;
}
// If we made it here, we have a valid URL
valid = true;
} }
}; };
HttpRequest::HttpRequest() { HttpOutRequest::HttpOutRequest() {
verify_certificate_ = true; verify_certificate_ = true;
port_ = 0; port_ = 0;
} }
void HttpRequest::set_verify_certificate(bool flag) { void HttpOutRequest::fail(string_view s) {
verify_certificate_ = flag; if (error_.empty()) {
error_ = s;
}
} }
eng::string HttpRequest::target() const { eng::string HttpOutRequest::target() const {
assert(check().empty()); assert(check().empty());
eng::ostringstream oss; eng::ostringstream oss;
oss << (verify_certificate_ ? "cert" : "nocert"); oss << (verify_certificate_ ? "cert" : "nocert");
@@ -53,151 +205,154 @@ eng::string HttpRequest::target() const {
return oss.str(); return oss.str();
} }
void HttpRequest::set_method(const eng::string &s) { void HttpOutRequest::set_verify_certificate(bool flag) {
verify_certificate_ = flag;
}
void HttpOutRequest::set_method(const eng::string &s) {
eng::string method = util::ascii_toupper(s); eng::string method = util::ascii_toupper(s);
if ((method != "GET") && (method != "HEAD")) { if ((method != "GET") && (method != "HEAD")) {
ErrorStringStream error(&error_); fail(util::ss("HTTP method not implemented: ", method, ".",
error << "HTTPS method not implemented: " << method; "Currently, only HEAD and GET are implemented."));
error << ". Currently, only HEAD and GET are implemented.";
return; return;
} }
if ((!method_.empty()) && (method_ != method)) { if ((!method_.empty()) && (method_ != method)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP method specified twice: ", method_, " and ", method));
error << "HTTPS method specified twice: " << method_ << " and " << method;
return; return;
} }
method_ = method; method_ = method;
} }
void HttpRequest::set_host(const eng::string &s) { void HttpOutRequest::set_host(const eng::string &s) {
eng::string host = util::ascii_tolower(s); eng::string host = util::ascii_tolower(s);
if (host.empty()) { if (host.empty()) {
ErrorStringStream error(&error_); fail(util::ss("HTTP hostname cannot be empty string."));
error << "HTTPS hostname cannot be empty string.";
return; return;
} }
// This is not quite strict, but it's close. I believe // This is not quite strict, but it's close. I believe
// the DNS lookup will fail for invalid hostnames anyway. // the DNS lookup will fail for invalid hostnames anyway.
for (char c : host) { for (char c : host) {
if ((c != '-') && (c != '.') && (!sv::ascii_isalnum(c))) { if ((c != '-') && (c != '.') && (!sv::ascii_isalnum(c))) {
ErrorStringStream error(&error_); fail(util::ss("HTTP hostnames can only contain letters, digits, and hyphen: ", host));
error << "HTTPS hostnames can only contain letters, digits, and hyphen: " << host;
return; return;
} }
} }
if (!host_.empty()) { if (!host_.empty()) {
ErrorStringStream error(&error_); fail(util::ss("HTTP hostname specified twice: ", host_, " and ", host));
error << "HTTPS hostname specified twice: " << host_ << " and " << host;
return; return;
} }
host_ = host; host_ = host;
} }
void HttpRequest::set_port(int port) { void HttpOutRequest::set_port(int port) {
if ((port < 1) || (port > 65535)) { if ((port < 1) || (port > 65535)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP port must be between 1 and 65535: ", port));
error << "HTTP port must be between 1 and 65535: " << port;
return; return;
} }
if (port_ != 0) { if (port_ != 0) {
ErrorStringStream error(&error_); fail(util::ss("HTTP port specified twice: ", port_, " and ", port));
error << "HTTPS port specified twice: " << port_ << " and " << port;
return; return;
} }
port_ = port; port_ = port;
} }
void HttpRequest::set_url(const eng::string &url) { void HttpOutRequest::set_path(string_view path) {
if (sv::has_prefix(url, "https://")) { if (!sv::has_prefix(path, "/")) {
ErrorStringStream error(&error_); fail(util::ss("HTTP path must start with slash"));
error << "set_url(full_url) not implemented yet.";
return;
} else if (sv::has_prefix(url, "/")) {
if (!path_.empty()) {
ErrorStringStream error(&error_);
error << "HTTP path specified twice: " << path_ << " and " << url;
return;
}
path_ = url;
} else {
ErrorStringStream error(&error_);
error << "HTTP url must start with https://, or with /";
return; return;
} }
if (!path_.empty()) {
fail(util::ss("HTTP path specified twice: ", path_, " and ", path));
return;
}
path_ = path;
} }
void HttpRequest::set_param(const eng::string &key, const eng::string &val) { void HttpOutRequest::set_param(const eng::string &key, const eng::string &val) {
if (params_.find(key) != params_.end()) { if (params_.find(key) != params_.end()) {
ErrorStringStream error(&error_); fail(util::ss("HTTP url parameter specified twice: ", key));
error << "HTTP url parameter specified twice: " << key; return;
}
if (key.empty()) {
fail(util::ss("HTTP parameter key cannot be empty"));
return; return;
} }
params_[key] = val; params_[key] = val;
} }
void HttpRequest::set_verify_certificate(LuaStack &LS, LuaSlot val) { void HttpOutRequest::set_url(string_view url) {
ParsedURL parsed_url(url);
if (!parsed_url.valid) {
fail(util::ss("syntactically invalid URL: ", url));
return;
}
if (parsed_url.proto != "https") {
fail(util::ss("unsupported protocol: ", parsed_url.proto));
return;
}
set_host(parsed_url.host);
if (parsed_url.port) set_port(parsed_url.port);
set_path(parsed_url.path);
for (const auto &pair : parsed_url.params) {
set_param(pair.first, pair.second);
}
}
void HttpOutRequest::set_verify_certificate(LuaStack &LS, LuaSlot val) {
if (!LS.isboolean(val)) { if (!LS.isboolean(val)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP verify_certificate must be a boolean"));
error << "HTTP verify_certificate must be a boolean";
return; return;
} }
set_verify_certificate(LS.ckboolean(val)); set_verify_certificate(LS.ckboolean(val));
} }
void HttpRequest::set_method(LuaStack &LS, LuaSlot val) { void HttpOutRequest::set_method(LuaStack &LS, LuaSlot val) {
if (!LS.isstring(val)) { if (!LS.isstring(val)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP method must be a string"));
error << "HTTP method must be a string";
return; return;
} }
set_method(LS.ckstring(val)); set_method(LS.ckstring(val));
} }
void HttpRequest::set_host(LuaStack &LS, LuaSlot val) { void HttpOutRequest::set_host(LuaStack &LS, LuaSlot val) {
if (!LS.isstring(val)) { if (!LS.isstring(val)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP host must be a string"));
error << "HTTP host must be a string";
return; return;
} }
set_host(LS.ckstring(val)); set_host(LS.ckstring(val));
} }
void HttpRequest::set_port(LuaStack &LS, LuaSlot val) { void HttpOutRequest::set_port(LuaStack &LS, LuaSlot val) {
if (!LS.isint(val)) { if (!LS.isint(val)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP port must be an int"));
error << "HTTP port must be an int";
return; return;
} }
set_port(LS.ckint(val)); set_port(LS.ckint(val));
} }
void HttpRequest::set_url(LuaStack &LS, LuaSlot val) { void HttpOutRequest::set_path(LuaStack &LS, LuaSlot val) {
if (!LS.isstring(val)) { if (!LS.isstring(val)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP path must be a string"));
error << "HTTP url must be a string";
return; return;
} }
set_url(LS.ckstring(val)); set_path(LS.ckstring(val));
} }
void HttpRequest::set_param(LuaStack &LS, LuaSlot key, LuaSlot val) { void HttpOutRequest::set_param(LuaStack &LS, LuaSlot key, LuaSlot val) {
if (!LS.isstring(key)) { if (!LS.isstring(key)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP url parameter key must be a string"));
error << "HTTP url parameter key must be a string";
return; return;
} }
if (!LS.isstring(val)) { if (!LS.isstring(val)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP url parameter val must be a string"));
error << "HTTP url parameter val must be a string";
return; return;
} }
set_param(LS.ckstring(key), LS.ckstring(val)); set_param(LS.ckstring(key), LS.ckstring(val));
} }
void HttpRequest::set_params(LuaStack &LS0, LuaSlot tab) { void HttpOutRequest::set_params(LuaStack &LS0, LuaSlot tab) {
if (!LS0.istable(tab)) { if (!LS0.istable(tab)) {
ErrorStringStream error(&error_); fail(util::ss("HTTP params must be a table"));
error << "HTTP params must be a table";
return; return;
} }
LuaVar key, val; LuaVar key, val;
@@ -208,7 +363,15 @@ void HttpRequest::set_params(LuaStack &LS0, LuaSlot tab) {
} }
} }
void HttpRequest::set_defaults() { void HttpOutRequest::set_url(LuaStack &LS, LuaSlot val) {
if (!LS.isstring(val)) {
fail(util::ss("HTTP url must be a string"));
return;
}
set_url(LS.ckstring(val));
}
void HttpOutRequest::set_defaults() {
if (method_.empty()) { if (method_.empty()) {
method_ = "GET"; method_ = "GET";
} }
@@ -217,7 +380,7 @@ void HttpRequest::set_defaults() {
} }
} }
void HttpRequest::set_config(LuaStack &LS0, LuaSlot tab) { void HttpOutRequest::set_config(LuaStack &LS0, LuaSlot tab) {
LuaVar key, val; LuaVar key, val;
LuaStack LS(LS0.state(), key, val); LuaStack LS(LS0.state(), key, val);
LS.set(key, LuaNil); LS.set(key, LuaNil);
@@ -230,23 +393,25 @@ void HttpRequest::set_config(LuaStack &LS0, LuaSlot tab) {
set_host(LS, val); set_host(LS, val);
} else if (kstr == "port") { } else if (kstr == "port") {
set_port(LS, val); set_port(LS, val);
} else if (kstr == "url") { } else if (kstr == "path") {
set_url(LS, val); set_path(LS, val);
} else if (kstr == "encodedpath") {
set_path(LS, val);
} else if (kstr == "params") { } else if (kstr == "params") {
set_params(LS, val); set_params(LS, val);
} else if (kstr == "url") {
set_url(LS, val);
} else if (kstr == "verifycertificate") { } else if (kstr == "verifycertificate") {
set_verify_certificate(LS, val); set_verify_certificate(LS, val);
} else if (kstr == "") { } else if (kstr == "") {
ErrorStringStream error(&error_); fail(util::ss("HTTP config parameter names must be strings."));
error << "HTTP config parameter names must be strings.";
} else { } else {
ErrorStringStream error(&error_); fail(util::ss("HTTP unrecognized config parameter: ", kstr));
error << "HTTP unrecognized config parameter: " << kstr;
} }
} }
} }
eng::string HttpRequest::check() const { eng::string HttpOutRequest::check() const {
if (!error_.empty()) { if (!error_.empty()) {
return error_; return error_;
} }
@@ -265,7 +430,7 @@ eng::string HttpRequest::check() const {
return ""; return "";
} }
void HttpRequest::send_internal(StreamBuffer *sb, bool debug_string) const { void HttpOutRequest::send_internal(StreamBuffer *sb, bool debug_string) const {
// If there's an error in the request, handle it. In debug string mode, // If there's an error in the request, handle it. In debug string mode,
// we just put the error into the output. In production mode, we assert // we just put the error into the output. In production mode, we assert
// fail. // fail.
@@ -285,23 +450,13 @@ void HttpRequest::send_internal(StreamBuffer *sb, bool debug_string) const {
// Send the command. // Send the command.
sb->write_bytes(method_); sb->write_bytes(method_);
sb->write_char(' '); sb->write_char(' ');
url_encode(path_, sb); send_encoded_path(path_, params_, sb);
bool first_param = true;
for (const auto &pair : params_) {
sb->write_char(first_param ? '?' : '&');
url_encode(pair.first, sb);
sb->write_char('=');
url_encode(pair.second, sb);
first_param = false;
}
sb->write_bytes(" HTTP/1.1"); sb->write_bytes(" HTTP/1.1");
sb->write_bytes(linebreak); sb->write_bytes(linebreak);
// Send the host header. // Send the host header.
sb->write_bytes("Host: "); sb->write_bytes("Host: ");
sb->write_bytes(host_); send_host_and_port(host_, port_, sb);
sb->write_char(':');
sb->ostream() << port_;
sb->write_bytes(linebreak); sb->write_bytes(linebreak);
// The empty accept-encoding header notifies the // The empty accept-encoding header notifies the
@@ -320,50 +475,335 @@ void HttpRequest::send_internal(StreamBuffer *sb, bool debug_string) const {
} }
} }
eng::string HttpRequest::DebugString() { eng::string HttpOutRequest::DebugString() {
StreamBuffer sb; StreamBuffer sb;
send_internal(&sb, true); send_internal(&sb, true);
return eng::string(sb.view()); return eng::string(sb.view());
} }
HttpResponse::HttpResponse() { HttpInResponse::HttpInResponse() {
response_code_ = 0; status_code_ = 0;
response_length_ = 0; response_length_ = 0;
mime_type_ = "application/empty"; mime_type_ = "";
content_length_ = -1;
} }
void HttpResponse::fail(int response_code, const eng::string &error) { eng::string HttpInResponse::DebugString() const {
response_code_ = response_code; eng::ostringstream oss;
error_ = error; oss << "HttpInResponse:" << std::endl;
response_length_ = 0; oss << " status_code: " << status_code_ << std::endl;
mime_type_ = "application/empty"; oss << " error: " << error_ << std::endl;
oss << " content_length: " << content_length_ << std::endl;
oss << " transfer_encoding: " << transfer_encoding_ << std::endl;
oss << " location: " << location_ << std::endl;
oss << " mime_type: " << mime_type_ << std::endl;
oss << " charset: " << charset_ << std::endl;
oss << " content: " << content_ << std::endl;
oss << " response_length: " << response_length_ << std::endl;
return oss.str();
}
void HttpInResponse::fail(int code, string_view message) {
status_code_ = code;
error_ = message;
mime_type_ = "";
charset_ = "";
content_ = ""; content_ = "";
} }
void HttpResponse::parse(const StreamBuffer *sb) { void HttpInResponse::incomplete(bool closed) {
if (closed) {
fail(500, "response truncated");
} else {
fail(0, "response not yet fully received");
}
}
void HttpInResponse::parse_content_encoding(string_view value) {
content_encoding_ = util::ascii_tolower(value);
}
void HttpInResponse::parse_content_length(string_view value) {
int64_t code = sv::to_int64(value);
if ((code < 0) || (code > INT_MAX)) {
fail(500, util::ss("unparseable content-length: ", value));
}
content_length_ = code;
}
void HttpInResponse::parse_content_type(string_view value) {
eng::string ctype = util::ascii_tolower(value);
string_view ctview(ctype);
mime_type_ = sv::trim(sv::read_to_sep(ctview, ';'));
if (mime_type_.empty()) {
fail(500, util::ss("unparseable content-type: ", value));
return;
}
while (true) {
string_view feature = sv::trim(sv::read_to_sep(ctview, ';'));
if (feature.empty()) {
return;
}
string_view ftype = sv::trim(sv::read_to_sep(feature, '='));
if (ftype == "charset") {
charset_ = sv::trim(feature);
}
}
}
void HttpInResponse::parse_location(string_view value) {
location_ = url_decode(value);
}
void HttpInResponse::parse_transfer_encoding(string_view value) {
transfer_encoding_ = util::ascii_tolower(value);
}
void HttpInResponse::parse_header(string_view header, string_view value) {
if (header == "content-encoding") {
parse_content_encoding(value);
} else if (header == "content-length") {
parse_content_length(value);
} else if (header == "content-type") {
parse_content_type(value);
} else if (header == "location") {
parse_location(value);
} else if (header == "transfer-encoding") {
parse_transfer_encoding(value);
} else if (header == "content-range") {
fail(500, util::ss("unsupported response header: ", header));
}
}
bool HttpInResponse::parse_content_basic(std::string_view &view, bool closed) {
if (content_length_ >= 0) {
if (content_length_ > MAX_CONTENT_LENGTH) {
fail(500, "content too long");
return false;
}
if (int(view.size()) < content_length_) {
incomplete(closed);
return false;
}
content_ = sv::read_nbytes(view, content_length_);
} else {
if (int64_t(view.size()) > MAX_CONTENT_LENGTH) {
fail(500, "content too long");
return false;
}
if (!closed) {
incomplete(closed);
return false;
}
content_ = sv::read_nbytes(view, view.size());
}
return true;
}
bool HttpInResponse::parse_content_chunked(std::string_view &view, bool closed) {
int64_t total_size = 0;
std::vector<string_view> chunks;
while (true) {
std::string_view chunk_header = sv::trim(sv::read_to_line(view));
if (sv::isnull(view)) {
incomplete(closed);
return false;
}
int64_t chunk_size = sv::to_hex64(chunk_header, -1);
if (chunk_size < 0) {
fail(500, "unparseable chunk header");
return false;
}
if (chunk_size > MAX_CONTENT_LENGTH) {
fail(500, "content too long");
return false;
}
if (chunk_size == 0) break;
total_size += chunk_size;
if (total_size > MAX_CONTENT_LENGTH) {
fail(500, "content too long");
return false;
}
std::string_view chunk = sv::read_nbytes(view, chunk_size);
if (int64_t(chunk.size()) != chunk_size) {
incomplete(closed);
return false;
}
std::string_view newline = sv::read_to_line(view);
if (!newline.empty()) {
fail(500, "corrupted chunk encoding");
return false;
}
if (sv::isnull(view)) {
incomplete(closed);
return false;
}
chunks.push_back(chunk);
}
content_.resize(total_size);
size_t offset = 0;
for (string_view chunk : chunks) {
content_.replace(offset, chunk.size(), chunk);
offset += chunk.size();
}
return true;
}
void HttpInResponse::parse(const StreamBuffer *sb, bool closed) {
// We're not going to modify the StreamBuffer at all. // We're not going to modify the StreamBuffer at all.
// Instead, we work entirely on a view. // Instead, we work entirely on a view.
std::string_view view = sb->view(); string_view view = sb->view();
// Special case this. // Get the status line.
if (view.empty()) { string_view status = sv::trim(sv::read_to_line(view));
fail(500, "HTTP server response completely empty"); if (sv::isnull(view)) {
incomplete(closed);
return; return;
} }
// Parse the status line. // Parse the status line.
std::string_view status = sv::read_to_line(view); string_view scode = sv::read_to_space(status);
if (status.empty()) { int64_t code = sv::to_int64(scode, 0);
fail(500, "HTTP status-line not present in response"); if ((code < 100) || (code > 599)) {
fail(500, util::ss("protocol error: invalid response code: ", scode));
}
status_code_ = code;
// Responses outside the range 200-299 are errors,
// and therefore must store an error message.
if ((code < 200) || (code > 299)) {
error_ = status;
if (error_.empty()) {
fail(code, util::ss("error code ", code));
}
}
// Parse the headers.
while (true) {
string_view header = sv::read_to_line(view);
if (sv::isnull(view)) {
incomplete(closed);
return;
}
if (header.empty()) {
break;
}
eng::string command = util::ascii_tolower(sv::trim(sv::read_to_sep(header, ':')));
if (sv::isnull(header)) {
fail(500, util::ss("protocol error: no colon in header line: ", command));
return;
}
if (!words_separated_by_dashes(command)) {
fail(500, util::ss("protocol error: invalid header: ", command));
return;
}
parse_header(command, sv::trim(header));
}
// Process the content using the transfer encoding.
if (transfer_encoding_ == "") {
if (!parse_content_basic(view, closed)) return;
} else if (transfer_encoding_ == "chunked") {
if (!parse_content_chunked(view, closed)) return;
} else {
fail(500, util::ss("unsupported transfer-encoding: ", transfer_encoding_));
return; return;
} }
//std::string_view status_code = util::sv_split_one(status, ' ');
// Calculate the response length.
response_length_ = sb->fill() - view.size();
// If it's not a redirect, disallow 'location'.
if ((status_code_ < 300) || (status_code_ > 399)) {
if (!location_.empty()) {
fail(500, util::ss("redirect specified, but result code not 300-399: ", code));
return;
}
}
// If the server didn't specify content-type, make a guess.
if (mime_type_.empty()) {
if (sv::valid_utf8(content_)) {
mime_type_ = "text/plain";
charset_ = "utf-8";
} else {
mime_type_ = "application/octet-stream";
charset_ = "";
}
}
// If it's multipart, reject it.
if (sv::has_prefix(mime_type_, "multipart/")) {
fail(500, "multipart messages not implemented");
return;
}
// If it's text, demand a reasonable charset. Otherwise,
// ignore the charset.
if (sv::has_prefix(mime_type_, "text/")) {
if (charset_.empty()) {
charset_ = "utf-8";
}
if (charset_ != "utf-8") {
fail(500, util::ss("charset not supported: ", charset_));
return;
}
} else {
charset_.clear();
}
// Uncompress the content.
if ((content_encoding_ == "") || (content_encoding_ == "identity")) {
} else {
fail(500, util::ss("content-encoding not supported: ", content_encoding_));
return;
}
// If there's an error code, throw out the content.
if ((status_code_ < 200) || (status_code_ > 299)) {
mime_type_.clear();
charset_.clear();
content_.clear();
}
} }
void HttpInResponse::store(LuaStack &LS0, LuaSlot tab) {
LuaStack LS(LS0.state());
LS.newtable(tab);
LS.rawset(tab, "responsecode", status_code_);
if (!error_.empty()) {
LS.rawset(tab, "error", error_);
}
if (!location_.empty()) {
LS.rawset(tab, "location", location_);
}
if (!mime_type_.empty()) {
LS.rawset(tab, "mimetype", mime_type_);
LS.rawset(tab, "content", content_);
}
// Debugging fields. Do not use for lua programming.
LS.rawset(tab, "dbg-content-length", content_length_);
LS.rawset(tab, "dbg-transfer-encoding", transfer_encoding_);
LS.rawset(tab, "dbg-charset", charset_);
LS.rawset(tab, "dbg-response-length", response_length_);
}
LuaDefine(http_fixurl, "url", "validate URL and repair minor flaws in the URL syntax") {
LuaArg url;
LuaRet fixed;
LuaStack LS(L, url, fixed);
ParsedURL parsed(LS.ckstring(url));
if (!parsed.valid) {
luaL_error(L, "invalid URL, not fixable");
return LS.result();
}
LS.set(fixed, parsed.str());
return LS.result();
}
LuaDefine(http_request, "reqtab", LuaDefine(http_request, "reqtab",
"|Given an HTTP request in the form of a table, returns the same " "|Given an HTTP request in the form of a table, returns the same "
"|request as a string, to assist with debugging." "|request as a string, to assist with debugging."
@@ -397,7 +837,7 @@ LuaDefine(http_request, "reqtab",
LuaArg tab; LuaArg tab;
LuaRet str; LuaRet str;
LuaStack LS(L, tab, str); LuaStack LS(L, tab, str);
HttpRequest req; HttpOutRequest req;
req.set_config(LS, tab); req.set_config(LS, tab);
req.set_defaults(); req.set_defaults();
eng::string error = req.check(); eng::string error = req.check();
@@ -409,3 +849,15 @@ LuaDefine(http_request, "reqtab",
return LS.result(); return LS.result();
} }
LuaDefine(http_response, "text", "") {
LuaArg text;
LuaRet tab;
LuaStack LS(L, text, tab);
HttpInResponse resp;
StreamBuffer sb;
sb.write_bytes(LS.ckstring(text));
resp.parse(&sb, true);
resp.store(LS, tab);
return LS.result();
}

View File

@@ -20,7 +20,9 @@
#include "luastack.hpp" #include "luastack.hpp"
#include "streambuffer.hpp" #include "streambuffer.hpp"
class HttpRequest : public eng::nevernew { using UrlParameters = eng::map<eng::string, eng::string>;
class HttpOutRequest : public eng::nevernew {
private: private:
// If the request contains an error, the error // If the request contains an error, the error
// message is stored here. // message is stored here.
@@ -40,24 +42,26 @@ private:
// Port number. // Port number.
int port_; int port_;
// The path is always UTF-8. This field should not be urlencoded. // You may specify either path or encoded_path.
// Instead, urlencoding is done automatically when the request // The path is not url-encoded, and must not include URL parameters.
// is sent. Should not include protocol, host, port, or parameters.
eng::string path_; eng::string path_;
// If params is nonempty, then we will add URL parameters // If params is nonempty, then we will add URL parameters
// to the URL. The contents of the params field should not be // to the URL. The contents of the params field should not be
// urlencoded, the urlencoding is done automatically when the // urlencoded, the urlencoding is done automatically when the
// request is sent. // request is sent. If you specify encoded_path, then the
eng::map<eng::string, eng::string> params_; // params must be empty, because the encoded path already contains
// the params.
UrlParameters params_;
private: private:
void send_internal(StreamBuffer *target, bool debug_string) const; void fail(std::string_view error);
void send_internal(StreamBuffer *target, bool debug_string) const;
public: public:
// Construct an empty HTTP request. // Construct an empty HTTP request.
// All of the fields have empty values. // All of the fields have empty values.
HttpRequest(); HttpOutRequest();
// Get fields. // Get fields.
const eng::string &error() const { return error_; } const eng::string &error() const { return error_; }
@@ -80,17 +84,19 @@ public:
void set_method(const eng::string &method); void set_method(const eng::string &method);
void set_host(const eng::string &host); void set_host(const eng::string &host);
void set_port(int port); void set_port(int port);
void set_url(const eng::string &url); void set_path(std::string_view path);
void set_param(const eng::string &key, const eng::string &value); void set_param(const eng::string &key, const eng::string &value);
void set_url(std::string_view url);
// Same as above, but using Lua values. // Same as above, but using Lua values.
void set_verify_certificate(LuaStack &LS, LuaSlot val); void set_verify_certificate(LuaStack &LS, LuaSlot val);
void set_method(LuaStack &LS, LuaSlot val); void set_method(LuaStack &LS, LuaSlot val);
void set_host(LuaStack &LS, LuaSlot val); void set_host(LuaStack &LS, LuaSlot val);
void set_port(LuaStack &LS, LuaSlot val); void set_port(LuaStack &LS, LuaSlot val);
void set_url(LuaStack &LS, LuaSlot val); void set_path(LuaStack &LS, LuaSlot path);
void set_param(LuaStack &LS, LuaSlot key, LuaSlot val); void set_param(LuaStack &LS, LuaSlot key, LuaSlot val);
void set_params(LuaStack &LS, LuaSlot tab); void set_params(LuaStack &LS, LuaSlot tab);
void set_url(LuaStack &LS, LuaSlot val);
// Set default values for any fields that should have // Set default values for any fields that should have
// defaults. This must be done after setting regular // defaults. This must be done after setting regular
@@ -111,48 +117,94 @@ public:
eng::string DebugString(); eng::string DebugString();
}; };
class HttpResponse { class HttpInResponse {
private: private:
// The HTTP response code. // The HTTP response status code.
int response_code_; int status_code_;
// If the HTTP response contains an error, the // If the HTTP response contains an error, the
// error message is stored here. If the HTTP response // error message is stored here. If the HTTP response
// is a success such as "200 OK" or "201 Created", this // is a success such as "200 OK" or "201 Created", this
// is the empty string, not "OK" or "Created". // is the empty string, not "OK" or "Created".
eng::string error_; eng::string error_;
// The length in bytes of the entire response. // Only if content-length header present, otherwise, -1.
// May be zero, which means that the response int64_t content_length_;
// was so garbled that we couldn't determine the length.
bool response_length_; // If empty, it means there was no transfer-encoding header.
eng::string transfer_encoding_;
// If empty, it means there was no content-encoding header.
eng::string content_encoding_;
// Only if location header present.
eng::string location_;
// MIME type of the content. // MIME type of the content.
eng::string mime_type_; eng::string mime_type_;
// Charset of the content. Hopefully utf-8.
eng::string charset_;
// The content as string. // The content as string.
eng::string content_; eng::string content_;
// The length in bytes of the entire response.
// May be zero, which means that the response
// was so garbled that we couldn't determine the length.
int response_length_;
private: private:
// Store a message indicating that we haven't received enough
// bytes yet. If the connection is closed and we still haven't
// received enough bytes, that's a fatal error.
void incomplete(bool closed);
// Parse a response header. Most headers are ignored.
// If the header contains an error, the error is stored.
void parse_header(std::string_view header, std::string_view value);
// Parse specific headers.
// For several headers, all we do is verify that they aren't
// invoking unsupported features.
void parse_content_encoding(std::string_view value);
void parse_content_length(std::string_view value);
void parse_content_type(std::string_view value);
void parse_location(std::string_view value);
void parse_transfer_encoding(std::string_view value);
// parse the body
bool parse_content_basic(std::string_view &view, bool closed);
bool parse_content_chunked(std::string_view &view, bool closed);
public: public:
const int64_t MAX_CONTENT_LENGTH = 1000000;
// Construct a blank response. // Construct a blank response.
HttpResponse(); HttpInResponse();
// Store an error message. This is used when the client detects an error, // Store a result code and an error message, and clear the content.
// This is generally used when the client detects an error,
// such as a DNS lookup fail, a connection failed, an SSL negotiation // such as a DNS lookup fail, a connection failed, an SSL negotiation
// failed, or the like. Clears the content, leaving only the error // failed, or the like.
// and response code. void fail(int status_code, std::string_view error);
void fail(int response_code, const eng::string &error);
// Parse the HTTP response. Note that the response is not // Parse the HTTP response. The closed flag is to be set to true if the
// removed from the StreamBuffer, which is always unmodified. // remote has closed the connection.
// If you want to remove the response from the StreamBuffer, see //
// response_length. // If the request is incomplete, generates a 600 incomplete error. In that
void parse(const StreamBuffer *sb); // case, loading more data from the server might improve the situation.
//
// Note that the response is not ever removed from the StreamBuffer, which
// is always unmodified. If you want to remove the response from the
// StreamBuffer, see response_length.
//
void parse(const StreamBuffer *sb, bool closed);
// Convert the HTTP response to a lua table. // Convert the HTTP response to a lua table.
void store(LuaStack &LS, LuaSlot tab); void store(LuaStack &LS, LuaSlot tab);
// Convert to a debug string.
eng::string DebugString() const;
}; };
#endif // HTTP_HPP #endif // HTTP_HPP

View File

@@ -33,6 +33,15 @@ bool valid_int64(string_view value) {
return true; return true;
} }
bool valid_hex64(string_view value) {
int64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 16);
if (r.ec != std::errc()) return false;
if (r.ptr != last) return false;
return true;
}
bool valid_double(string_view value) { bool valid_double(string_view value) {
double result; double result;
const char *last = value.data() + value.size(); const char *last = value.data() + value.size();
@@ -51,6 +60,15 @@ int64_t to_int64(string_view value, int64_t errval) {
return result; return result;
} }
uint64_t to_hex64(string_view value, uint64_t errval) {
uint64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 16);
if (r.ec != std::errc()) return errval;
if (r.ptr != last) return errval;
return result;
}
double to_double(string_view value, double errval) { double to_double(string_view value, double errval) {
double result; double result;
const char *last = value.data() + value.size(); const char *last = value.data() + value.size();
@@ -192,6 +210,65 @@ string_view read_to_space(string_view &source) {
return result; return result;
} }
string_view read_nbytes(string_view &source, int nbytes) {
if (nbytes < 0) nbytes = 0;
if (nbytes > int(source.size())) nbytes = source.size();
string_view result = source.substr(0, nbytes);
source = source.substr(nbytes);
return result;
}
bool valid_utf8(string_view s)
{
const unsigned char *bytes = (const unsigned char *)s.data();
const unsigned char *tail = bytes + s.size();
unsigned int codepoint;
int seqlen;
while (bytes < tail) {
if ((bytes[0] & 0x80) == 0x00) {
// U+0000 to U+007F
codepoint = (bytes[0] & 0x7F);
seqlen = 1;
} else if ((bytes[0] & 0xE0) == 0xC0) {
// U+0080 to U+07FF
codepoint = (bytes[0] & 0x1F);
seqlen = 2;
} else if ((bytes[0] & 0xF0) == 0xE0) {
// U+0800 to U+FFFF
codepoint = (bytes[0] & 0x0F);
seqlen = 3;
} else if ((bytes[0] & 0xF8) == 0xF0) {
// U+10000 to U+10FFFF
codepoint = (bytes[0] & 0x07);
seqlen = 4;
} else {
return false;
}
if (bytes + seqlen > tail) {
return false;
}
for (int i = 1; i < seqlen; ++i) {
if ((bytes[i] & 0xC0) != 0x80) return false;
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
}
if ((codepoint > 0x10FFFF) ||
((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
((codepoint <= 0x007F) && (seqlen != 1)) ||
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
return false;
}
bytes += seqlen;
}
return true;
}
} // namespace sv } // namespace sv
@@ -450,6 +527,7 @@ eng::string XYZ::debug_string() const {
return oss.str(); return oss.str();
} }
} // namespace util } // namespace util
std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) { std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) {

View File

@@ -51,12 +51,14 @@ inline bool ascii_isspace(char c) { return (c==' ')||(c=='\t')||(c=='\r')||(c=='
inline bool isnull(string_view v) { return v.data() == nullptr; } inline bool isnull(string_view v) { return v.data() == nullptr; }
// Check if numbers can be parsed as int64/double // Check if numbers can be parsed as int64/double
bool valid_int64(string_view v);
bool valid_double(string_view v); bool valid_double(string_view v);
bool valid_int64(string_view v);
bool valid_hex64(string_view v);
// Parse numbers as int64/double. Returns errval on failure. // Parse numbers as int32, int64, or double. Returns errval on failure.
int64_t to_int64(string_view v, int64_t errval = std::numeric_limits<int64_t>::min());
double to_double(string_view v, double errval = std::numeric_limits<double>::quiet_NaN()); double to_double(string_view v, double errval = std::numeric_limits<double>::quiet_NaN());
int64_t to_int64(string_view v, int64_t errval = std::numeric_limits<int64_t>::min());
uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits<uint64_t>::max());
// Trim whitspace from a string_view. // Trim whitspace from a string_view.
string_view ltrim(string_view v); string_view ltrim(string_view v);
@@ -81,6 +83,12 @@ bool is_lua_id(string_view s);
// Return true if the line of code is a lua comment. // Return true if the line of code is a lua comment.
bool is_lua_comment(string_view s); bool is_lua_comment(string_view s);
// Return the first character, but if the view is empty,
// return zero.
inline char zfront(string_view &s) {
return s.empty() ? char(0) : s.front();
}
// Read from a string_view until separator is reached. // Read from a string_view until separator is reached.
// //
// If the separator appears in the source, returns everything // If the separator appears in the source, returns everything
@@ -114,6 +122,13 @@ string_view read_to_line(string_view &source);
// //
string_view read_to_space(string_view &source); string_view read_to_space(string_view &source);
// Read up to nbytes from a string_view.
//
string_view read_nbytes(string_view &source, int nbytes);
// Return true if the string is valid utf-8.
bool valid_utf8(string_view s);
} // namespace sv } // namespace sv
namespace util { namespace util {
@@ -233,6 +248,22 @@ public:
int overflow(int c) { return c; } int overflow(int c) { return c; }
}; };
// send_to_stream: send all arguments to the specified stream.
inline void send_to_stream(std::ostream &os) {}
template <class ARG, class... REST>
inline void send_to_stream(std::ostream &os, ARG arg, REST & ... rest) {
os << arg;
send_to_stream(os, rest...);
}
// ss: convert all arguments to a string by sending them to a stringstream.
template <class... ARGS>
inline eng::string ss(ARGS & ... args) {
eng::ostringstream oss;
send_to_stream(oss, args...);
return oss.str();
}
} // namespace util } // namespace util
std::ostream &operator<<(std::ostream &oss, const util::hex64 &v); std::ostream &operator<<(std::ostream &oss, const util::hex64 &v);