Files
integration/luprex/cpp/core/streambuffer.hpp

443 lines
16 KiB
C++
Raw Normal View History

//////////////////////////////////////////////////////////////
//
// STREAMBUFFER
//
// Serves as a buffer for buffered I/O operations. Has rather sophisticated
// methods to help serialize and deserialize data.
//
// The semantics of this class contain a lot of subtlety! Please read the
// documentation carefully.
//
// TELLING LINUX TO READ A FILE DESCRIPTOR INTO A STREAMBUFFER
//
// It is possible to read from a linux file descriptor, directly into a stream
// buffer. You should do this, it's very efficient. Here is how you do it:
//
// // With linux read, you have to pick an arbitrary buffer size.
// const int bufsize = 16384;
//
// // Allocate transient space in the streambuffer.
// char *space = streambuffer.make_space(bufsize);
//
// // Call the linux 'read' function.
// ssize_t bytes_read = read(fd, space, bufsize);
//
// // Append the bytes read to the streambuffer.
// streambuffer.wrote_space(bytes_read);
//
// The make_space operation allocates an array of bytes where the data can be
// written, and returns a pointer to that array of bytes. The read operation
// fills some or all of the allocated bytes. Finally, the wrote_space operation
// notifies the StreamBuffer that some of the bytes have been filled with data.
// These bytes are appended to the StreamBuffer.
//
// The pointer returned by 'make_space' is only valid until you mutate the
// StreamBuffer. Therefore, you should call 'make_space', then immediately fill
// the bytes. It is imperative that 'wrote_space' be the first mutator after
// 'make_space.' You should think of 'make_space' followed by 'wrote_space' as
// a single two-phase operation.
//
// THE OVERWRITE_INT METHODS:
//
// These overwrite methods are meant to help deal with this situation: you want
// to write a length followed by some data, but you don't know the length until
// after you've written the data. The workaround: write a dummy length, then
// write the data, and then overwrite the previously-written length with the
// correct length. This is the construction that accomplishes this:
//
// // Write the dummy length, this will get overwritten.
// streambuffer.write_int32(0);
//
// // Write the data, and calculate its length in bytes.
2021-07-20 14:48:53 -04:00
// int64_t write_count_1 = streambuffer.total_writes();
// write_data(stream);
2021-07-20 14:48:53 -04:00
// int64_t write_count_2 = streambuffer.total_writes();
// int64_t data_len = write_count_2 - write_count_1;
//
// // Overwrite the previously-written dummy length.
// streambuffer.overwrite_int32(write_count_1, data_len);
//
// Almost all of this is self-explanatory, but the last line is interesting. In
// order to know what part of the buffer to overwrite, overwrite_int uses
// write_count_1 as a pointer into the buffer - it points immedately to the
// right of the integer to overwrite.
//
// OVERWRITE_INT LIMITS
//
// If you use write_int to write an integer into the buffer, you are allowed to
// overwrite that integer UNTIL you do a read from the buffer. Once you do a
// read, it is no longer legal to overwrite ints that you wrote BEFORE the read.
//
// WRITE_STRING STORES THE STRING LENGTH, WRITE_BYTES DOES NOT
//
// write_string writes a string into the buffer and prepends a length. The
// encoding of the length field is designed to be efficient for short strings
// but still capable of encoding long lengths.
//
// write_bytes doesn't store the data length in the buffer. It's just a raw
// write of bytes.
//
// STREAM EXCEPTIONS
//
// If you do a read_int64, but the buffer doesn't contain the necessary 8 bytes,
// it throws a StreamEof exception. In general, during reading, the following
// common situations generate StreamEof or StreamCorruption exceptions:
//
// * not enough bytes to satisfy a 'read' call: StreamEof
// * call read_eof, but the buffer is not empty: StreamCorruption
// * call read_string, but the string is unreasonably long: StreamCorruption
//
// Exceptions are only generated when reading from a stream that contains bad
// data. Any other error generates a full-blown abort. For example, if you try
// to write to a stream that's not open for writing, that's an abort, not an
// exception. Write operations never generate exceptions.
//
// Sometimes, it is convenient to throw StreamCorruption yourself, if you detect
// that the data you've read from a stream is invalid. This can make error
// handling a little cleaner.
//
// READ BYTES POINTER VALIDITY
//
// When you call read_bytes, it returns a pointer to a block of bytes. This
// pointer only remains valid until you do a 'write' into the stream.
//
// UNREADING BYTES
//
// It's possible to 'unread' bytes that you've already read from a stream. This
// makes it possible to read those same bytes again.
//
// A common situation where this might be useful is: you're decoding a message,
// but you discover halfway through the process of decoding the message that you
// haven't received the whole message yet. In that case, it may be desirable to
// unread the partial message, so that you can wait for the rest of the message
// to be received.
//
// Here is the construction that accomplishes this:
//
// // Get the stream's read count before parsing the message.
2021-07-20 14:48:53 -04:00
// size_t read_count_before = streambuffer.total_reads();
//
// // Parse the message, but if there's an EOF, deal with it:
// try {
// // Parse the message.
// int32_t value1 = streambuffer.read_int32();
// eng::string value2 = streambuffer.read_string(maxlen);
// int64_t value3 = streambuffer.read_int64();
//
// // Great! I got the whole message.
// execute_message(value1, value2, value3);
// } catch (StreamEof) {
// // I ran out of bytes. Unread the message.
// streambuffer.unread(read_count_before);
// }
//
// UNREAD LIMITS
//
// If you read bytes from a stream, that data can be 'unread' until you do a
// write. After a write, it is no longer possible to 'unread' data that you
// read before the write.
//
// STREAMBUFFERS THAT DON'T OWN THEIR OWN MEMORY
//
// If you create a streambuffer using this constructor:
//
// StreamBuffer(const char *data, uint64_t len);
//
// This StreamBuffer reads from an external (unowned) block of bytes, which is
// not copied! The StreamBuffer saves the pointer that you passed in. This
// pointer must remain valid until you're done with the StreamBuffer.
//
// A StreamBuffer that reads from an external block of bytes is read-only.
// Attempts to write to this buffer will be caught and will cause an abort. The
2021-07-20 14:48:53 -04:00
// total_writes for such a buffer returns the 'len' value that you initialized
// the buffer with.
//
// NESTED DECODING
//
// Here is an interesting construct:
//
// // Read a message from the stream.
// size_t len = streambuffer.read_int32()
// const char *bytes = streambuffer.read_bytes(len);
//
// // Construct another stream object to decode the message.
// StreamBuffer substream(bytes, len);
// decode(substream);
//
// This is perfectly valid and a potentially convenient way to parse the
// contents of a message. Note that the substream contains a pointer to
// the parent stream's buffer, and therefore, data corruption will occur
// if you mutate the parent stream while reading the substream.
//
// USING A STREAMBUFFER TO READ AN ENTIRE FILE
//
// If you wish to read an entire file and store the file contents in a
// StreamBuffer, you should probe the size of the file, then allocate a
// StreamBuffer of the correct size using this constructor:
//
// StreamBuffer(int64_t size);
//
// Then, you can use 'alloc_space' and 'wrote_space' to read the file into the
// buffer in a single read call.
//
// USING A STREAMBUFFER AS A LUA_WRITER OR LUA_READER
//
// You can use a streambuffer as a lua_Writer, as follows:
//
// lua_dump(L, stream.lua_writer(), stream.lua_writer_ud());
//
// Anything written to the lua_writer gets appended to the streambuffer, the
// same as if it had been written using write_bytes.
//
// You can use a streambuffer as a lua_Reader, as follows:
//
// lua_load (L, stream.lua_reader(), stream.lua_reader_ud(nbytes), ...)
//
// The exact semantics of the lua_reader are tricky, so be careful:
// lua_reader_ud calls 'read_bytes' immediately, and it stores the bytes in a
// "cache of bytes for lua." Then, when the lua_reader gets invoked, the reader
// returns the entire contents of the cache, and it clears the cache. Here are
// some consequences of this design:
//
// 1. The number of bytes read from the stream is always exactly equal to
// nbytes, even if lua never calls the lua_reader.
//
// 2. If the stream doesn't contain nbytes, a StreamEof exception gets thrown
// from lua_reader_ud, not from the lua_Reader. This is good, because it
// means exceptions don't get thrown from inside the lua runtime.
//
//////////////////////////////////////////////////////////////
#ifndef STREAMBUFFER_HPP
#define STREAMBUFFER_HPP
#include "wrap-string.hpp"
#include "wrap-sstream.hpp"
#include <utility>
#include <cstdint>
#include <cassert>
2023-07-24 17:22:35 -04:00
#include "base-writer.hpp"
#include "luastack.hpp"
#include "util.hpp"
2022-03-02 14:52:51 -05:00
class StreamException : public eng::nevernew
{
public:
virtual char const *what() const { return "General stream exception"; }
};
class StreamEof : public StreamException
{
public:
virtual char const *what() const { return "Stream ran out of data"; }
};
class StreamCorruption : public StreamException
{
public:
virtual char const *what() const { return "Stream contained invalid data"; }
};
2023-07-25 16:22:51 -04:00
class StreamBuffer : public eng::nevernew, public BaseReader<StreamBuffer>, public BaseWriter<StreamBuffer> {
2023-07-24 17:22:35 -04:00
public:
2023-07-25 16:22:51 -04:00
using read_string_type = eng::string;
// Construct an empty buffer.
StreamBuffer();
// Construct an empty buffer, preallocate the specified amount of space.
StreamBuffer(int64_t size, bool fixed_size);
// Construct a streambuffer that reads from an external block of bytes.
StreamBuffer(const char *s, int64_t len);
2021-12-15 14:18:19 -05:00
// Construct a streambuffer that reads from an external block of bytes.
StreamBuffer(std::string_view data);
// Delete a StreamBuffer.
~StreamBuffer();
// Get the total number of bytes ever read from this buffer.
2021-07-20 14:48:53 -04:00
int64_t total_reads() const;
// Get the total number of bytes ever written to this buffer.
2021-07-20 14:48:53 -04:00
int64_t total_writes() const;
2021-08-03 11:25:12 -04:00
// Amount of data inside the buffer.
int64_t fill() const;
// Get a pointer to the data.
const char *data() const;
2022-03-04 16:45:47 -05:00
// Get entire contents as a string_view
std::string_view view() const;
2021-07-20 14:48:53 -04:00
// Discard all data. Reset total read and write counts.
// Frees up as much space as possible.
void clear();
// Attempt to do a "readline". If there is no newline in
// the buffer, returns empty string. If there is a newline,
// returns a block of text that ends in newline.
eng::string readline();
2021-07-19 17:32:24 -04:00
// Write block of bytes into the buffer.
//
// Caution: this function doesn't write the length!
// It just writes the bytes.
//
void write_bytes(const char *bytes, int64_t len);
2023-07-24 17:22:35 -04:00
void write_bytes(std::string_view s);
2021-10-07 14:58:20 -04:00
2021-07-19 17:32:24 -04:00
// Read a block of bytes from the buffer.
//
// Caution: the pointer returned is a pointer to the stream's buffer. It is
// only valid until you mutate the buffer. Throws StreamEof if the specified
// number of bytes aren't present.
2021-07-19 17:32:24 -04:00
//
const char *read_bytes(int64_t bytes);
2023-07-25 16:22:51 -04:00
// Copy bytes from the StreamBuffer into an external buffer.
//
void read_bytes_into(char *target, int64_t len);
// Read a string as a string_view.
//
std::string_view read_string_view_limit(uint64_t limit);
std::string_view read_string_view() { return read_string_view_limit(0x1000000); }
2023-07-24 17:22:35 -04:00
// Read and write larger types.
2021-07-19 17:32:24 -04:00
//
2023-07-24 17:22:35 -04:00
// Throws StreamEof if the specified number of bytes aren't present.
// Read string with a length limit will throw 'StreamCorruption' if the
// length is too long.
2021-07-19 17:32:24 -04:00
//
void write_xyz(const util::XYZ &xyz);
void write_dxyz(const util::DXYZ &xyz);
util::XYZ read_xyz();
util::DXYZ read_dxyz();
2021-07-19 17:32:24 -04:00
void write_hashvalue(const util::HashValue &hv);
util::HashValue read_hashvalue();
// Read the entire contents of the buffer as a string.
//
eng::string read_entire_contents();
2021-07-19 17:32:24 -04:00
// Overwrite values previously written to the buffer.
//
// See the comment at the top of this file for an explanation.
//
void overwrite_int8(int64_t write_count_after, int64_t v);
void overwrite_int16(int64_t write_count_after, int64_t v);
void overwrite_int32(int64_t write_count_after, int64_t v);
void overwrite_int64(int64_t write_count_after, int64_t v);
void overwrite_uint8(int64_t write_count_after, uint64_t v);
void overwrite_uint16(int64_t write_count_after, uint64_t v);
void overwrite_uint32(int64_t write_count_after, uint64_t v);
void overwrite_uint64(int64_t write_count_after, uint64_t v);
// This function checks to see if the buffer is empty.
2021-10-07 14:58:20 -04:00
bool empty();
// Verify that the buffer is empty, if not, throw StreamCorruption.
2021-10-07 14:58:20 -04:00
void verify_empty();
2021-11-09 16:27:39 -05:00
// Make sure the specified number of bytes are available to read.
void check_available(int64_t bytes) {
int64_t avail = write_cursor_ - read_cursor_;
if (avail < bytes) {
throw StreamEof();
}
}
// Rewind the read cursor to a previous position.
2021-07-20 14:48:53 -04:00
void unread_to(int64_t total_reads);
2021-03-05 14:33:01 -05:00
// Rewind the write cursor to a previous position.
2021-07-20 14:48:53 -04:00
void unwrite_to(int64_t total_writes);
// Copy the entire contents of this streambuffer into another one.
void copy_into(StreamBuffer *sb);
// Transfer the entire contents of this streambuffer into another one.
void transfer_into(StreamBuffer *sb);
2021-08-03 11:25:12 -04:00
// Compare the contents of this streambuffer to another one.
bool contents_equal(const StreamBuffer *sb) const;
2021-07-18 17:48:39 -04:00
// Calculate a noncryptographic but good hash of what's in the buffer.
util::HashValue hash() const;
// Use the stream buffer as a lua_Writer.
static int lua_writer(lua_State *L, const void* p, size_t sz, void* ud);
void *lua_writer_ud();
// Use the stream buffer as a lua_Reader.
static const char *lua_reader(lua_State *L, void *data, size_t *size);
void *lua_reader_ud(int64_t bytes);
2021-07-19 17:32:24 -04:00
// Get an ostream that writes into the StreamBuffer.
std::ostream &ostream();
2023-07-24 17:22:35 -04:00
// Throw a StreamCorruption exception.
void raise_truncated() { throw StreamCorruption(); }
void raise_string_too_long() { throw StreamCorruption(); }
// This is always false, because this module throws exceptions
// when reading beyond EOF.
bool read_beyond_eof() { return false; }
2021-07-19 17:32:24 -04:00
private:
// Start and end of the allocated block.
char *buf_lo_;
char *buf_hi_;
// The write and read cursors.
char *write_cursor_;
char *read_cursor_;
// Number of bytes read before buffer was last aligned.
int64_t pre_read_count_;
// True if we own this buffer.
bool owned_;
// True if we're not allowed to expand this buffer.
bool fixed_size_;
// Lua reader return value.
const char *lua_reader_data_;
int64_t lua_reader_size_;
// The ostream. Only allocated on demand.
std::unique_ptr<std::ostream> ostream_;
2021-07-19 17:32:24 -04:00
// Initialize with a new buffer.
void init(bool fixed, bool owned, char *buf, int64_t size);
// Make the specified amount of space in the buffer for writing.
// Return a pointer to the space.
char *make_space(int64_t bytes) {
2021-07-19 17:32:24 -04:00
int64_t available = buf_hi_ - write_cursor_;
if (available < bytes) make_space_slow(bytes);
return write_cursor_;
2021-07-19 17:32:24 -04:00
}
void make_space_slow(int64_t bytes);
void wrote_space(int64_t bytes);
2021-07-19 17:32:24 -04:00
// Implementation for the overwrite_int functions.
char *get_overwrite(int64_t size, int64_t write_count_after);
// This is for unit testing.
bool layout_is(int64_t a, int64_t b, int64_t c);
friend int lfn_unittests_streambuffer(lua_State *L);
};
#endif // STREAMBUFFER_HPP