Files
integration/luprex/ext/base-buffer.hpp

673 lines
23 KiB
C++
Raw Normal View History

#pragma once
/////////////////////////////////////////////////////////////////
//
// IMPORTANT: This is a header-only library that is included
// by the graphics engine as well. It cannot contain references
// to anything else in the engine.
//
/////////////////////////////////////////////////////////////////
#include <cstdio>
#include <cstdint>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <string_view>
///////////////////////////////////////////////////////////////
//
// BaseLuaValue
//
// A struct that holds a dynamically typed value.
2026-02-09 13:54:00 -05:00
// This can hold a string, token, number, vector, or boolean.
//
// The type is stored in the 'type' field.
//
// If it's a STRING, the value is in the field s
2026-02-09 13:54:00 -05:00
// If it's a TOKEN, the value is stored in the field s
// If it's a NUMBER, the value is in the field x
// If it's a BOOLEAN, it's true if (x==1.0)
// If it's a VECTOR, the value is in x,y,z
//
///////////////////////////////////////////////////////////////
2026-02-09 13:54:00 -05:00
enum class LuaValueType {
UNINITIALIZED,
STRING,
TOKEN,
NUMBER,
BOOLEAN,
VECTOR,
};
template<class STRING>
struct BaseLuaValue {
using string = STRING;
2026-02-09 13:54:00 -05:00
LuaValueType type;
double x, y, z;
string s;
BaseLuaValue() {
2026-02-09 13:54:00 -05:00
type = LuaValueType::UNINITIALIZED;
x=y=z=0;
}
2026-02-09 13:54:00 -05:00
static const char *type_name_of(LuaValueType t) {
switch (t) {
2026-02-09 13:54:00 -05:00
case LuaValueType::UNINITIALIZED: return "uninitialized";
case LuaValueType::STRING: return "string";
case LuaValueType::TOKEN: return "token";
case LuaValueType::BOOLEAN: return "boolean";
case LuaValueType::NUMBER: return "number";
case LuaValueType::VECTOR: return "vector";
default: return "unknown";
}
}
const char *type_name() const {
return type_name_of(type);
}
void set_uninitialized() {
2026-02-09 13:54:00 -05:00
type=LuaValueType::UNINITIALIZED; s.clear(); x=y=z=0;
}
void set_string(std::string_view is) {
2026-02-09 13:54:00 -05:00
type=LuaValueType::STRING; s=is; x=y=z=0;
}
void set_token(std::string_view is) {
2026-02-09 13:54:00 -05:00
type=LuaValueType::TOKEN; s=is; x=y=z=0;
}
void set_number(double n) {
2026-02-09 13:54:00 -05:00
type = LuaValueType::NUMBER; s.clear(); x=n; y=z=0;
}
void set_boolean(bool b) {
2026-02-09 13:54:00 -05:00
type = LuaValueType::BOOLEAN; s.clear(); x=(b?1:0); y=z=0;
}
void set_vector(double ix, double iy, double iz) {
2026-02-09 13:54:00 -05:00
type = LuaValueType::VECTOR; s.clear(); x=ix; y=iy; z=iz;
}
void copy_value(const BaseLuaValue &other) {
type = other.type;
s=other.s; x=other.x; y=other.y; z=other.z;
}
};
///////////////////////////////////////////////////////////////
//
2026-02-22 22:46:54 -05:00
// DataSerializer
//
2026-02-22 22:46:54 -05:00
// DataSerializer is an object that can serialize ints,
// strings, floats, and other basic types. It provides a
// consistent standard for the byte formats.
//
2026-02-22 22:46:54 -05:00
// To serialize, first construct a DataSerializer, passing
// in a pointer to an output device. An output device is
// any class that has these methods:
//
2026-02-22 22:46:54 -05:00
// void write_bytes(char *data, size_t len);
//
2026-02-22 22:46:54 -05:00
// void raise_integer_truncated();
//
2026-02-22 22:46:54 -05:00
// After constructing the DataSerializer, call write_int,
// write_float, write_string, or the like. The data will be
// written to the output device using write_bytes. If
// there's an error, a 'raise' method may be called on the
// output device.
2026-02-22 20:59:02 -05:00
//
2026-02-22 22:46:54 -05:00
// It is intended that the compiler will optimize this
// process to such a degree that it costs no more than
// simply calling write_bytes directly on the output
// device. In other words, it is our intent that the
// use of a DataSerializer should be free.
//
///////////////////////////////////////////////////////////////
2026-02-22 22:46:54 -05:00
template<class OutputDevice>
class DataSerializer {
private:
OutputDevice *output_;
template<class T>
void write_value_core(T arg) {
2026-02-22 22:46:54 -05:00
output_->write_bytes((const char *)&arg, sizeof(arg));
}
template<class T, class XT>
void write_int_core(XT arg) {
T reduced = arg;
2026-02-22 22:46:54 -05:00
if (XT(reduced) != arg) output_->raise_integer_truncated();
output_->write_bytes((const char *)&reduced, sizeof(reduced));
}
public:
2026-02-22 22:46:54 -05:00
DataSerializer(OutputDevice *o) : output_(o) {}
void write_uint8(uint64_t data) { write_int_core<uint8_t, uint64_t>(data); }
void write_uint16(uint64_t data) { write_int_core<uint16_t, uint64_t>(data); }
void write_uint32(uint64_t data) { write_int_core<uint32_t, uint64_t>(data); }
void write_uint64(uint64_t data) { write_int_core<uint64_t, uint64_t>(data); }
void write_int8(int64_t data) { write_int_core<int8_t, int64_t>(data); }
void write_int16(int64_t data) { write_int_core<int16_t, int64_t>(data); }
void write_int32(int64_t data) { write_int_core<int32_t, int64_t>(data); }
void write_int64(int64_t data) { write_int_core<int64_t, int64_t>(data); }
void write_bool(bool b) { write_uint8(b ? 1:0); }
void write_char(char c) { write_value_core(c); }
void write_float(float arg) { write_value_core(arg); }
void write_double(double arg) { write_value_core(arg); }
void write_length(size_t len) {
if (len >= 255) {
write_uint8(0xFF);
write_uint64(len);
} else {
write_uint8(len);
}
}
void write_string(std::string_view s) {
write_length(s.size());
2026-02-22 22:46:54 -05:00
output_->write_bytes(s.data(), s.size());
}
};
///////////////////////////////////////////////////////////////
//
2026-02-22 22:46:54 -05:00
// DataDeserializer
//
2026-02-22 22:46:54 -05:00
// DataDeserializer is an object that can deserialize ints,
// strings, floats, and other basic types. It provides a
// consistent standard for the byte formats.
//
2026-02-22 22:46:54 -05:00
// To deserialize, first construct a DataDeserializer, passing
// in a pointer to an input device. An input device is
// any class that has these methods:
//
2026-02-22 22:46:54 -05:00
// void read_bytes_into(char *data, size_t len);
// void raise_string_too_long();
//
2026-02-22 22:46:54 -05:00
// After constructing the DataDeserializer, call read_int,
// read_float, read_string, or the like. The data will be
// read from the input device using read_bytes_into. If
// there's an error, a 'raise' method may be called on the
// input device.
//
2026-02-22 22:46:54 -05:00
// It is intended that the compiler will optimize this
// process to such a degree that it costs no more than
// simply calling read_bytes_into directly on the input
// device. In other words, it is our intent that the
// use of a DataDeserializer should be free.
//
///////////////////////////////////////////////////////////////
2026-02-22 22:46:54 -05:00
template<class InputDevice, class StringType = std::string>
class DataDeserializer {
private:
InputDevice *input_;
2026-02-22 20:59:02 -05:00
template<class T>
T read_value_core() {
T result;
2026-02-22 22:46:54 -05:00
input_->read_bytes_into((char *)(&result), sizeof(result));
return result;
}
public:
2026-02-22 22:46:54 -05:00
DataDeserializer(InputDevice *i) : input_(i) {}
uint8_t read_uint8() { return read_value_core<uint8_t>(); }
uint16_t read_uint16() { return read_value_core<uint16_t>(); }
uint32_t read_uint32() { return read_value_core<uint32_t>(); }
uint64_t read_uint64() { return read_value_core<uint64_t>(); }
int8_t read_int8() { return read_value_core<int8_t>(); }
int16_t read_int16() { return read_value_core<int16_t>(); }
int32_t read_int32() { return read_value_core<int32_t>(); }
int64_t read_int64() { return read_value_core<int64_t>(); }
2026-02-22 22:46:54 -05:00
bool read_bool() { return (bool)read_uint8(); }
char read_char() { return read_value_core<char>(); }
float read_float() { return read_value_core<float>(); }
double read_double() { return read_value_core<double>(); }
2026-02-22 22:46:54 -05:00
size_t read_length() {
uint64_t len = read_uint8();
2026-02-22 20:59:02 -05:00
if (len == 255) len = read_uint64();
return len;
}
2026-02-22 22:46:54 -05:00
StringType read_string_limit(uint64_t limit) {
size_t len = read_length();
if (len > limit) {
2026-02-22 22:46:54 -05:00
input_->raise_string_too_long();
len = 0;
}
2026-02-22 22:46:54 -05:00
StringType result(len, ' ');
input_->read_bytes_into(&(result[0]), len);
return result;
}
2026-02-22 22:46:54 -05:00
StringType read_string() {
2026-02-22 20:59:02 -05:00
return read_string_limit(0x1000000); // 16MB limit default
2026-02-22 22:46:54 -05:00
}
};
///////////////////////////////////////////////////////////////
//
// Class BaseBuffer
//
2026-02-22 20:59:02 -05:00
// You must supply a BaseBufferConfig which must define these:
//
2026-02-22 20:59:02 -05:00
// using string_type = std::string; // or compatible
// void *basebuffer_malloc(size_t size);
// void basebuffer_free(void *data);
// void raise_eof_on_read();
// void raise_string_too_long();
// void raise_integer_truncated();
//
///////////////////////////////////////////////////////////////
2026-02-22 20:59:02 -05:00
template<class BaseBufferConfig>
class BaseBuffer : public BaseBufferConfig {
private:
2026-02-22 20:59:02 -05:00
// True if we own this buffer.
bool owned_;
// True if we're not allowed to expand this buffer.
bool fixed_size_;
// Start and end of the allocated block.
char *buf_lo_;
char *buf_hi_;
// The write and read cursors.
char *write_cursor_;
char *read_cursor_;
// Number of bytes read before buffer was last aligned.
int64_t pre_read_count_;
2026-02-22 22:46:54 -05:00
public:
using string_type = typename BaseBufferConfig::string_type;
private:
2026-02-22 22:46:54 -05:00
using DS = DataSerializer<BaseBuffer<BaseBufferConfig>>;
using DD = DataDeserializer<BaseBuffer<BaseBufferConfig>, string_type>;
void init(bool fixed, bool owned, char *buf, int64_t size) {
2026-02-22 20:59:02 -05:00
BaseBufferConfig::clear_error_flags();
owned_ = owned;
fixed_size_ = fixed;
buf_lo_ = buf;
buf_hi_ = buf_lo_ + size;
read_cursor_ = buf_lo_;
write_cursor_ = buf_lo_;
pre_read_count_ = 0;
}
public:
2026-02-22 22:46:54 -05:00
// Construct an empty buffer.
//
BaseBuffer() {
init(false, true, 0, 0);
}
// Construct an empty buffer, preallocate the specified amount of space.
//
BaseBuffer(int64_t size, bool fixed) {
assert(size >= 0);
2026-02-22 20:59:02 -05:00
init(fixed, true, (char *)BaseBufferConfig::basebuffer_malloc(size), size);
}
// Construct a streambuffer that reads from an external block of bytes.
//
BaseBuffer(std::string_view data) {
init(true, false, const_cast<char *>(data.data()), data.size());
write_cursor_ = buf_hi_;
}
2024-09-04 23:14:39 -04:00
// Modify an existing streambuffer to read from an external block of bytes.
//
void open(std::string_view data) {
2026-02-22 20:59:02 -05:00
if (owned_ && (buf_lo_ != 0)) BaseBufferConfig::basebuffer_free(buf_lo_);
2024-09-04 23:14:39 -04:00
init(true, false, const_cast<char *>(data.data()), data.size());
write_cursor_ = buf_hi_;
}
// Destructor. Frees the buffer, if any.
//
~BaseBuffer() {
2026-02-22 20:59:02 -05:00
if (owned_ && (buf_lo_ != 0)) BaseBufferConfig::basebuffer_free(buf_lo_);
}
// Return the total number of bytes ever read.
//
int64_t total_reads() const {
return (read_cursor_ - buf_lo_) + pre_read_count_;
}
// Return the total number of bytes ever written.
//
int64_t total_writes() const {
return (write_cursor_ - buf_lo_) + pre_read_count_;
}
// Return the total bytes in the buffer.
//
int64_t fill() const {
return write_cursor_ - read_cursor_;
}
// Checks to see if the buffer is empty.
//
bool empty() const {
return write_cursor_ == read_cursor_;
}
// Return the contents as a string_view.
//
std::string_view view() const {
return std::string_view(read_cursor_, write_cursor_ - read_cursor_);
}
// Make the specified amount of space in the buffer for writing.
//
char *make_space(int64_t bytes) {
int64_t available = buf_hi_ - write_cursor_;
if (available < bytes) make_space_slow(bytes);
return write_cursor_;
}
// Used after calling make_space then filling the space.
//
void wrote_space(int64_t bytes) {
int64_t available = buf_hi_ - write_cursor_;
assert(bytes >= 0);
assert(available >= bytes);
write_cursor_ += bytes;
}
// Rewind the read cursor to a previous position.
//
void unread_to(int64_t rd_count) {
assert(rd_count >= pre_read_count_);
assert(rd_count <= total_reads());
read_cursor_ = buf_lo_ + (rd_count - pre_read_count_);
}
// Rewind the write cursor to a previous position.
//
void unwrite_to(int64_t wr_count) {
assert(wr_count >= total_reads());
assert(wr_count <= total_writes());
write_cursor_ = buf_lo_ + (wr_count - pre_read_count_);
}
// Discard all data. Reset total read and write counts.
2024-09-04 23:14:39 -04:00
// May release the allocated buffer, if it is large.
//
void clear() {
2024-09-04 23:14:39 -04:00
if (!owned_) {
open("");
} else {
if ((!fixed_size_) && (buf_lo_ != nullptr) && ((buf_hi_ - buf_lo_) > 100000)) {
2026-02-22 20:59:02 -05:00
BaseBufferConfig::basebuffer_free(buf_lo_);
2024-09-04 23:14:39 -04:00
buf_lo_ = nullptr;
buf_hi_ = nullptr;
}
read_cursor_ = buf_lo_;
write_cursor_ = buf_lo_;
pre_read_count_ = 0;
}
}
// Write block of bytes into the buffer.
//
void write_bytes(const char *data, size_t size) {
make_space(size);
memcpy(write_cursor_, data, size);
write_cursor_ += size;
}
void write_bytes(std::string_view s) {
write_bytes(s.data(), s.size());
}
2026-02-22 22:46:54 -05:00
// Write methods — delegate to DataSerializer.
//
void write_uint8(uint64_t data) { DS(this).write_uint8(data); }
void write_uint16(uint64_t data) { DS(this).write_uint16(data); }
void write_uint32(uint64_t data) { DS(this).write_uint32(data); }
void write_uint64(uint64_t data) { DS(this).write_uint64(data); }
void write_int8(int64_t data) { DS(this).write_int8(data); }
void write_int16(int64_t data) { DS(this).write_int16(data); }
void write_int32(int64_t data) { DS(this).write_int32(data); }
void write_int64(int64_t data) { DS(this).write_int64(data); }
void write_bool(bool b) { DS(this).write_bool(b); }
void write_char(char c) { DS(this).write_char(c); }
void write_float(float arg) { DS(this).write_float(arg); }
void write_double(double arg) { DS(this).write_double(arg); }
void write_length(size_t len) { DS(this).write_length(len); }
void write_string(std::string_view s) { DS(this).write_string(s); }
2026-02-09 13:54:00 -05:00
// Write a LuaValueType.
//
2026-02-09 13:54:00 -05:00
void write_simple_dynamic_tag(LuaValueType tag) {
write_uint8(uint8_t(tag));
}
// Write a BaseLuaValue value.
//
// This works regardless of what kind of data is present in the
// BaseLuaValue.
//
template<class STRING>
void write_simple_dynamic(const BaseLuaValue<STRING> &sd) {
write_simple_dynamic_tag(sd.type);
switch(sd.type) {
2026-02-09 13:54:00 -05:00
case LuaValueType::STRING: write_string(sd.s); break;
case LuaValueType::TOKEN: write_string(sd.s); break;
case LuaValueType::NUMBER: write_double(sd.x); break;
case LuaValueType::BOOLEAN: write_bool(sd.x == 1.0); break;
case LuaValueType::VECTOR: write_double(sd.x); write_double(sd.y); write_double(sd.z); break;
default: assert(false);
}
}
2026-02-22 22:46:54 -05:00
// Read bytes into a caller-supplied buffer.
//
// This is the primitive read operation used by DataDeserializer.
// If there aren't enough bytes, calls raise_eof_on_read and
// fills the buffer with zeros.
//
void read_bytes_into(char *data, size_t size) {
int64_t avail = write_cursor_ - read_cursor_;
if (avail < int64_t(size)) {
BaseBufferConfig::raise_eof_on_read();
memset(data, 0, size);
return;
}
memcpy(data, read_cursor_, size);
read_cursor_ += size;
}
// Read a block of bytes from the buffer.
//
// Caution: the pointer returned is a pointer to the stream's buffer.
// It is only valid until you mutate the buffer. If the bytes aren't
// there, calls 'raise_eof_on_read', and then returns nullptr.
//
const char *read_bytes(int64_t bytes) {
int64_t avail = write_cursor_ - read_cursor_;
if (avail < bytes) {
2026-02-22 20:59:02 -05:00
BaseBufferConfig::raise_eof_on_read();
return nullptr;
}
char *data = read_cursor_;
read_cursor_ += bytes;
return data;
}
2026-02-22 22:46:54 -05:00
// Read methods — delegate to DataDeserializer.
//
uint8_t read_uint8() { return DD(this).read_uint8(); }
uint16_t read_uint16() { return DD(this).read_uint16(); }
uint32_t read_uint32() { return DD(this).read_uint32(); }
uint64_t read_uint64() { return DD(this).read_uint64(); }
int8_t read_int8() { return DD(this).read_int8(); }
int16_t read_int16() { return DD(this).read_int16(); }
int32_t read_int32() { return DD(this).read_int32(); }
int64_t read_int64() { return DD(this).read_int64(); }
bool read_bool() { return DD(this).read_bool(); }
char read_char() { return DD(this).read_char(); }
float read_float() { return DD(this).read_float(); }
double read_double() { return DD(this).read_double(); }
size_t read_length() { return DD(this).read_length(); }
string_type read_string_limit(uint64_t limit) { return DD(this).read_string_limit(limit); }
string_type read_string() { return DD(this).read_string(); }
// Read a string as a string_view.
//
2026-02-22 22:46:54 -05:00
// This is BaseBuffer-specific — it returns a view directly into
// the buffer, avoiding a copy.
//
std::string_view read_string_view_limit(uint64_t limit) {
size_t length = read_length();
if (length > limit) {
2026-02-22 20:59:02 -05:00
BaseBufferConfig::raise_string_too_long();
return std::string_view();
}
int64_t avail = write_cursor_ - read_cursor_;
if (avail < int64_t(length)) {
2026-02-22 20:59:02 -05:00
BaseBufferConfig::raise_eof_on_read();
return std::string_view();
}
std::string_view result(read_cursor_, length);
read_cursor_ += length;
return result;
}
// Read a string as a string_view.
//
std::string_view read_string_view() {
return read_string_view_limit(0x1000000);
}
2026-02-09 13:54:00 -05:00
// Read a LuaValueType
//
2026-02-09 13:54:00 -05:00
LuaValueType read_simple_dynamic_tag() {
return LuaValueType(read_uint8());
}
// Read a BaseLuaValue
//
template<class STRING>
void read_simple_dynamic(BaseLuaValue<STRING> *result) {
2026-02-09 13:54:00 -05:00
LuaValueType type = read_simple_dynamic_tag();
switch (type) {
2026-02-09 13:54:00 -05:00
case LuaValueType::STRING: result->set_string(read_string()); break;
case LuaValueType::TOKEN: result->set_token(read_string()); break;
case LuaValueType::NUMBER: result->set_number(read_double()); break;
case LuaValueType::BOOLEAN: result->set_boolean(read_bool()); break;
case LuaValueType::VECTOR: {
double x=read_double();
double y=read_double();
double z=read_double();
result->set_vector(x,y,z);
break;
}
default: result->set_uninitialized(); break;
}
}
// Attempt to do a "readline". If there is no newline in
// the buffer, returns empty string. If there is a newline,
// returns a block of text that ends in newline.
//
string_type readline() {
char *p = read_cursor_;
while ((p < write_cursor_) && (*p != '\n')) p++;
if (p == write_cursor_) {
return string_type();
} else {
p++;
string_type result(read_cursor_, p - read_cursor_);
read_cursor_ = p;
return result;
}
}
// Overwrite values previously written to the buffer.
//
// See the comment at the top of this file for an explanation.
//
void overwrite_int8(int64_t write_count_after, int64_t v) { overwrite_int_core<int8_t, int64_t>(write_count_after, v); }
void overwrite_int16(int64_t write_count_after, int64_t v) { overwrite_int_core<int16_t, int64_t>(write_count_after, v); }
void overwrite_int32(int64_t write_count_after, int64_t v) { overwrite_int_core<int32_t, int64_t>(write_count_after, v); }
void overwrite_int64(int64_t write_count_after, int64_t v) { overwrite_int_core<int64_t, int64_t>(write_count_after, v); }
void overwrite_uint8(int64_t write_count_after, uint64_t v) { overwrite_int_core<uint8_t, uint64_t>(write_count_after, v); }
void overwrite_uint16(int64_t write_count_after, uint64_t v) { overwrite_int_core<uint16_t, uint64_t>(write_count_after, v); }
void overwrite_uint32(int64_t write_count_after, uint64_t v) { overwrite_int_core<uint32_t, uint64_t>(write_count_after, v); }
void overwrite_uint64(int64_t write_count_after, uint64_t v) { overwrite_int_core<uint64_t, uint64_t>(write_count_after, v); }
// This is for unit testing.
//
bool layout_is(int64_t a, int64_t b, int64_t c) {
if (read_cursor_ - buf_lo_ != a) return false;
if (write_cursor_ - read_cursor_ != b) return false;
if (buf_hi_ - write_cursor_ != c) return false;
return true;
}
private:
void make_space_slow(int64_t bytes) {
assert(owned_ && "We don't own this buffer, can't grow it");
// Decide whether the current buffer is big enough.
int64_t data_size = (write_cursor_ - read_cursor_);
int64_t existing_size = (buf_hi_ - buf_lo_);
int64_t desired_size = 8192 + ((data_size + bytes) * 2);
// Update some simple things.
pre_read_count_ += (read_cursor_ - buf_lo_);
// Move the data to the beginning of the buffer, or to
// the beginning of a new buffer.
if (fixed_size_) {
assert((data_size + bytes <= existing_size) && "Not enough space in fixed-size buffer");
if (data_size > 0) memcpy(buf_lo_, read_cursor_, data_size);
} else if (existing_size >= desired_size) {
if (data_size > 0) memcpy(buf_lo_, read_cursor_, data_size);
} else {
2026-02-22 20:59:02 -05:00
char *nbuf = (char *)BaseBufferConfig::basebuffer_malloc(desired_size);
if (data_size > 0) memcpy(nbuf, read_cursor_, data_size);
2026-02-22 20:59:02 -05:00
if (buf_lo_ != nullptr) BaseBufferConfig::basebuffer_free(buf_lo_);
buf_lo_ = nbuf;
buf_hi_ = nbuf + desired_size;
}
// Update the pointers to the data region.
read_cursor_ = buf_lo_;
write_cursor_ = buf_lo_ + data_size;
}
template<class T, class XT>
void overwrite_int_core(int64_t write_count_after, XT vv) {
T v = vv;
assert(XT(v) == vv);
int64_t write_count_before = write_count_after - sizeof(v);
assert(write_count_before >= total_reads());
assert(write_count_after <= total_writes());
void *target = buf_lo_ + (write_count_before - pre_read_count_);
memcpy(target, &v, sizeof(v));
}
};