Files
integration/luprex/core/cpp/streambuffer.hpp

424 lines
15 KiB
C++
Raw Normal View History

//////////////////////////////////////////////////////////////
//
// STREAMBUFFER
//
// Serves as a buffer for buffered I/O operations. Has rather sophisticated
// methods to help serialize and deserialize data.
//
// The semantics of this class contain a lot of subtlety! Please read the
// documentation carefully.
//
// TELLING LINUX TO READ A FILE DESCRIPTOR INTO A STREAMBUFFER
//
// It is possible to read from a linux file descriptor, directly into a stream
// buffer. You should do this, it's very efficient. Here is how you do it:
//
// // With linux read, you have to pick an arbitrary buffer size.
// const int bufsize = 16384;
//
// // Allocate transient space in the streambuffer.
// char *space = streambuffer.alloc_space(bufsize);
//
// // Call the linux 'read' function.
// ssize_t bytes_read = read(fd, space, bufsize);
//
// // Append the bytes read to the streambuffer.
// streambuffer.wrote_space(bytes_read);
//
// Now, let's dig into the semantics of this. The method 'alloc_space' MUST be
// followed by 'wrote_space'. It is an error to invoke these methods unless you
// do them in that sequence. Together, these two methods count as a single
// 'write' operation into the StreamBuffer.
//
// 'alloc_space' allocates a block of bytes within the StreamBuffer. The
// pointer returned here is only valid until the 'wrote_space' operation. The
// method 'wrote_space' tells the StreamBuffer that the space has been populated
// with the specified amount of data. The data is then officially appended to
// the StreamBuffer. Again, the two methods 'alloc_space' followed by
// 'wrote_space' together count as a single write operation.
//
// THE OVERWRITE_INT METHODS:
//
// These overwrite methods are meant to help deal with this situation: you want
// to write a length followed by some data, but you don't know the length until
// after you've written the data. The workaround: write a dummy length, then
// write the data, and then overwrite the previously-written length with the
// correct length. This is the construction that accomplishes this:
//
// // Write the dummy length, this will get overwritten.
// streambuffer.write_int32(0);
//
// // Write the data, and calculate its length in bytes.
2021-07-20 14:48:53 -04:00
// int64_t write_count_1 = streambuffer.total_writes();
// write_data(stream);
2021-07-20 14:48:53 -04:00
// int64_t write_count_2 = streambuffer.total_writes();
// int64_t data_len = write_count_2 - write_count_1;
//
// // Overwrite the previously-written dummy length.
// streambuffer.overwrite_int32(write_count_1, data_len);
//
// Almost all of this is self-explanatory, but the last line is interesting. In
// order to know what part of the buffer to overwrite, overwrite_int uses
// write_count_1 as a pointer into the buffer - it points immedately to the
// right of the integer to overwrite.
//
// OVERWRITE_INT LIMITS
//
// If you use write_int to write an integer into the buffer, you are allowed to
// overwrite that integer UNTIL you do a read from the buffer. Once you do a
// read, it is no longer legal to overwrite ints that you wrote BEFORE the read.
//
// WRITE_STRING STORES THE STRING LENGTH, WRITE_BYTES DOES NOT
//
// write_string writes a string into the buffer and prepends a length. The
// encoding of the length field is designed to be efficient for short strings
// but still capable of encoding long lengths.
//
// write_bytes doesn't store the data length in the buffer. It's just a raw
// write of bytes.
//
// STREAM EXCEPTIONS
//
// If you do a read_int64, but the buffer doesn't contain the necessary 8 bytes,
// it throws a StreamEof exception. In general, during reading, the following
// common situations generate StreamEof or StreamCorruption exceptions:
//
// * not enough bytes to satisfy a 'read' call: StreamEof
// * call read_eof, but the buffer is not empty: StreamCorruption
// * call read_string, but the string is unreasonably long: StreamCorruption
//
// Exceptions are only generated when reading from a stream that contains bad
// data. Any other error generates a full-blown abort. For example, if you try
// to write to a stream that's not open for writing, that's an abort, not an
// exception. Write operations never generate exceptions.
//
// Sometimes, it is convenient to throw StreamCorruption yourself, if you detect
// that the data you've read from a stream is invalid. This can make error
// handling a little cleaner.
//
// READ BYTES POINTER VALIDITY
//
// When you call read_bytes, it returns a pointer to a block of bytes. This
// pointer only remains valid until you do a 'write' into the stream.
//
// NESTED DECODING
//
// Here is an interesting construct:
//
// // Read a message from the stream.
// size_t len = streambuffer.read_int32()
// const char *bytes = streambuffer.read_bytes(len);
//
// // Construct another stream object to decode the message.
// StreamBuffer substream(bytes, len);
// decode(substream);
//
// This is perfectly valid and a potentially convenient way to parse the
// contents of a message.
//
// UNREADING BYTES
//
// It's possible to 'unread' bytes that you've already read from a stream. This
// makes it possible to read those same bytes again.
//
// A common situation where this might be useful is: you're decoding a message,
// but you discover halfway through the process of decoding the message that you
// haven't received the whole message yet. In that case, it may be desirable to
// unread the partial message, so that you can wait for the rest of the message
// to be received.
//
// Here is the construction that accomplishes this:
//
// // Get the stream's read count before parsing the message.
2021-07-20 14:48:53 -04:00
// size_t read_count_before = streambuffer.total_reads();
//
// // Parse the message, but if there's an EOF, deal with it:
// try {
// // Parse the message.
// int32_t value1 = streambuffer.read_int32();
// std::string value2 = streambuffer.read_string(maxlen);
// int64_t value3 = streambuffer.read_int64();
//
// // Great! I got the whole message.
// execute_message(value1, value2, value3);
// } catch (StreamEof) {
// // I ran out of bytes. Unread the message.
// streambuffer.unread(read_count_before);
// }
//
// UNREAD LIMITS
//
// If you read bytes from a stream, that data can be 'unread' until you do a
// write. After a write, it is no longer possible to 'unread' data that you
// read before the write.
//
// STREAMBUFFERS THAT DON'T OWN THEIR OWN MEMORY
//
// If you create a streambuffer using this constructor:
//
// StreamBuffer(const char *data, uint64_t len);
//
// This StreamBuffer reads from an external (unowned) block of bytes, which is
// not copied! The StreamBuffer saves the pointer that you passed in. This
// pointer must remain valid until you're done with the StreamBuffer.
//
// A StreamBuffer that reads from an external block of bytes is read-only.
// Attempts to write to this buffer will be caught and will cause an abort. The
2021-07-20 14:48:53 -04:00
// total_writes for such a buffer returns the 'len' value that you initialized
// the buffer with.
//
// USING A STREAMBUFFER TO READ AN ENTIRE FILE
//
// If you wish to read an entire file and store the file contents in a
// StreamBuffer, you should probe the size of the file, then allocate a
// StreamBuffer of the correct size using this constructor:
//
// StreamBuffer(int64_t size);
//
// Then, you can use 'alloc_space' and 'wrote_space' to read the file into the
// buffer in a single read call.
//
// USING A STREAMBUFFER AS A LUA_WRITER OR LUA_READER
//
// You can use a streambuffer as a lua_Writer, as follows:
//
// lua_dump(L, stream.lua_writer(), stream.lua_writer_ud());
//
// Anything written to the lua_writer gets appended to the streambuffer, the
// same as if it had been written using write_bytes.
//
// You can use a streambuffer as a lua_Reader, as follows:
//
// lua_load (L, stream.lua_reader(), stream.lua_reader_ud(nbytes), ...)
//
// The exact semantics of the lua_reader are tricky, so be careful:
// lua_reader_ud calls 'read_bytes' immediately, and it stores the bytes in a
// "cache of bytes for lua." Then, when the lua_reader gets invoked, the reader
// returns the entire contents of the cache, and it clears the cache. Here are
// some consequences of this design:
//
// 1. The number of bytes read from the stream is always exactly equal to
// nbytes, even if lua never calls the lua_reader.
//
// 2. If the stream doesn't contain nbytes, a StreamEof exception gets thrown
// from lua_reader_ud, not from the lua_Reader. This is good, because it
// means exceptions don't get thrown from inside the lua runtime.
//
//////////////////////////////////////////////////////////////
#ifndef STREAMBUFFER_HPP
#define STREAMBUFFER_HPP
#include "luastack.hpp"
2021-07-18 17:48:39 -04:00
#include "util.hpp"
#include <cstdint>
#include <string>
#include <sstream>
#include <cassert>
2021-07-18 17:48:39 -04:00
#include <utility>
class StreamException
{
public:
virtual char const *what() const { return "General stream exception"; }
};
class StreamEof : public StreamException
{
public:
virtual char const *what() const { return "Stream ran out of data"; }
};
class StreamCorruption : public StreamException
{
public:
virtual char const *what() const { return "Stream contained invalid data"; }
};
class StreamBuffer {
public:
// Construct an empty buffer.
StreamBuffer();
// Construct an empty buffer, preallocate the specified amount of space.
StreamBuffer(int64_t size, bool fixed_size);
// Construct a streambuffer that reads from an external block of bytes.
StreamBuffer(const char *s, int64_t len);
// Delete a StreamBuffer.
~StreamBuffer();
// Get the total number of bytes ever read from this buffer.
2021-07-20 14:48:53 -04:00
int64_t total_reads() const;
// Get the total number of bytes ever written to this buffer.
2021-07-20 14:48:53 -04:00
int64_t total_writes() const;
2021-08-03 11:25:12 -04:00
// Amount of data inside the buffer.
int64_t fill() const;
2021-07-20 14:48:53 -04:00
// Discard all data. Reset total read and write counts.
// Frees up as much space as possible.
void clear();
2021-07-19 17:32:24 -04:00
// Write block of bytes into the buffer.
//
// Caution: this function doesn't write the length!
// It just writes the bytes.
//
void write_bytes(const char *bytes, int64_t len);
// Read a block of bytes from the buffer.
//
2021-07-20 14:48:53 -04:00
// Throws StreamEof if the specified number of bytes aren't present.
2021-07-19 17:32:24 -04:00
//
const char *read_bytes(int64_t bytes);
2021-07-19 17:32:24 -04:00
// Write integers and floats into the buffer.
//
// Note that integral parameters are all 64 bits. That's so that I can do
// runtime error checking to verify that the numbers are all in-range.
//
void write_int8(int64_t v);
void write_int16(int64_t v);
void write_int32(int64_t v);
void write_int64(int64_t v);
2021-07-19 17:32:24 -04:00
void write_uint8(uint64_t v);
void write_uint16(uint64_t v);
void write_uint32(uint64_t v);
void write_uint64(uint64_t v);
void write_float(float f);
void write_double(double d);
2021-07-19 17:32:24 -04:00
// Read fixed-size integers from the buffer.
//
// May throw StreamEof if the specified number of bytes aren't present.
//
int8_t read_int8();
int16_t read_int16();
int32_t read_int32();
int64_t read_int64();
2021-07-19 17:32:24 -04:00
uint8_t read_uint8() { return read_int8(); }
2021-03-05 16:53:17 -05:00
uint16_t read_uint16() { return read_int16(); }
uint32_t read_uint32() { return read_int32(); }
uint64_t read_uint64() { return read_int64(); }
2021-07-19 17:32:24 -04:00
float read_float();
double read_double();
2021-07-19 17:32:24 -04:00
// Write other types into the buffer.
//
// Note that strings are preceded by a length field. Reading
// a string works by reading the length field, and then reading
// the correct number of bytes.
//
void write_bool(bool b) { write_int8(b ? 1 : 0); }
void write_hashvalue(const util::HashValue &hv);
void write_string(const std::string &s);
2021-07-19 17:32:24 -04:00
// Read other types from the buffer.
//
2021-07-20 14:48:53 -04:00
// Throws StreamEof if the specified number of bytes aren't present.
2021-07-19 17:32:24 -04:00
// Read string with a length limit will throw 'StreamCorruption' if the
// length is too long.
//
bool read_bool() { return read_int8(); }
util::HashValue read_hashvalue();
std::string read_string();
std::string read_string_limit(int64_t max_allowed);
2021-07-19 17:32:24 -04:00
// Overwrite values previously written to the buffer.
//
// See the comment at the top of this file for an explanation.
//
void overwrite_int8(int64_t write_count_after, int64_t v);
void overwrite_int16(int64_t write_count_after, int64_t v);
void overwrite_int32(int64_t write_count_after, int64_t v);
void overwrite_int64(int64_t write_count_after, int64_t v);
void overwrite_uint8(int64_t write_count_after, uint64_t v);
void overwrite_uint16(int64_t write_count_after, uint64_t v);
void overwrite_uint32(int64_t write_count_after, uint64_t v);
void overwrite_uint64(int64_t write_count_after, uint64_t v);
// This function checks to see if the buffer is empty.
bool at_eof();
// Verify that the buffer is empty, if not, throw StreamCorruption.
void verify_eof();
// Rewind the read cursor to a previous position.
2021-07-20 14:48:53 -04:00
void unread_to(int64_t total_reads);
2021-03-05 14:33:01 -05:00
// Rewind the write cursor to a previous position.
2021-07-20 14:48:53 -04:00
void unwrite_to(int64_t total_writes);
// Copy the entire contents of this streambuffer into another one.
void copy_into(StreamBuffer *sb);
2021-08-03 11:25:12 -04:00
// Compare the contents of this streambuffer to another one.
bool contents_equal(const StreamBuffer *sb) const;
2021-07-18 17:48:39 -04:00
// Calculate a noncryptographic but good hash of what's in the buffer.
util::HashValue hash() const;
// Use the stream buffer as a lua_Writer.
static int lua_writer(lua_State *L, const void* p, size_t sz, void* ud);
void *lua_writer_ud();
// Use the stream buffer as a lua_Reader.
static const char *lua_reader(lua_State *L, void *data, size_t *size);
void *lua_reader_ud(int64_t bytes);
2021-07-19 17:32:24 -04:00
private:
// Start and end of the allocated block.
char *buf_lo_;
char *buf_hi_;
// The write and read cursors.
char *write_cursor_;
char *read_cursor_;
// Number of bytes read before buffer was last aligned.
int64_t pre_read_count_;
// True if we own this buffer.
bool owned_;
// True if we're not allowed to expand this buffer.
bool fixed_size_;
// Lua reader return value.
const char *lua_reader_data_;
int64_t lua_reader_size_;
// Initialize with a new buffer.
void init(bool fixed, bool owned, char *buf, int64_t size);
// Make the specified amount of space in the buffer for writing.
void make_space(int64_t bytes) {
int64_t available = buf_hi_ - write_cursor_;
if (available < bytes) make_space_slow(bytes);
}
void make_space_slow(int64_t bytes);
// Make sure the specified number of bytes are available to read.
void check_available(int64_t bytes) {
int64_t avail = write_cursor_ - read_cursor_;
if (avail < bytes) {
throw StreamEof();
}
}
// Implementation for the overwrite_int functions.
char *get_overwrite(int64_t size, int64_t write_count_after);
// This is for unit testing.
bool layout_is(int64_t a, int64_t b, int64_t c);
friend int lfn_unittests_streambuffer(lua_State *L);
};
#endif // STREAMBUFFER_HPP