integration/luprex/cpp/core/streambuffer.hpp

//////////////////////////////////////////////////////////////
//
// STREAMBUFFER
//
// Serves as a buffer for buffered I/O operations.  Has rather sophisticated
// methods to help serialize and deserialize data.
//
// The semantics of this class contain a lot of subtlety!  Please read the
// documentation carefully.
//
// TELLING LINUX TO READ A FILE DESCRIPTOR INTO A STREAMBUFFER
//
// It is possible to read from a linux file descriptor, directly into a stream
// buffer.  You should do this, it's very efficient.  Here is how you do it:
//
//     // With linux read, you have to pick an arbitrary buffer size.
//     const int bufsize = 16384;
//
//     // Allocate transient space in the streambuffer.
//     char *space = streambuffer.make_space(bufsize);
//
//     // Call the linux 'read' function.
//     ssize_t bytes_read = read(fd, space, bufsize);
//
//     // Append the bytes read to the streambuffer.
//     streambuffer.wrote_space(bytes_read);
//
// The make_space operation allocates an array of bytes where the data can be
// written, and returns a pointer to that array of bytes.  The read operation
// fills some or all of the allocated bytes. Finally, the wrote_space operation
// notifies the StreamBuffer that some of the bytes have been filled with data.
// These bytes are appended to the StreamBuffer.
//
// The pointer returned by 'make_space' is only valid until you mutate the
// StreamBuffer. Therefore, you should call 'make_space', then immediately fill
// the bytes.  It is imperative that 'wrote_space' be the first mutator after
// 'make_space.'  You should think of 'make_space' followed by 'wrote_space' as
// a single two-phase operation.
//
// THE OVERWRITE_INT METHODS:
//
// These overwrite methods are meant to help deal with this situation: you want
// to write a length followed by some data, but you don't know the length until
// after you've written the data.  The workaround: write a dummy length, then
// write the data, and then overwrite the previously-written length with the
// correct length. This is the construction that accomplishes this:
//
//     // Write the dummy length, this will get overwritten.
//     streambuffer.write_int32(0);
//
//     // Write the data, and calculate its length in bytes.
//     int64_t write_count_1 = streambuffer.total_writes();
//     write_data(stream);
//     int64_t write_count_2 = streambuffer.total_writes();
//     int64_t data_len = write_count_2 - write_count_1;
//
//     // Overwrite the previously-written dummy length.
//     streambuffer.overwrite_int32(write_count_1, data_len);
//
// Almost all of this is self-explanatory, but the last line is interesting. In
// order to know what part of the buffer to overwrite, overwrite_int uses
// write_count_1 as a pointer into the buffer - it points immedately to the
// right of the integer to overwrite.
//
// OVERWRITE_INT LIMITS
//
// If you use write_int to write an integer into the buffer, you are allowed to
// overwrite that integer UNTIL you do a read from the buffer.  Once you do a
// read, it is no longer legal to overwrite ints that you wrote BEFORE the read.
//
// WRITE_STRING STORES THE STRING LENGTH, WRITE_BYTES DOES NOT
//
// write_string writes a string into the buffer and prepends a length. The
// encoding of the length field is designed to be efficient for short strings
// but still capable of encoding long lengths.
//
// write_bytes doesn't store the data length in the buffer.  It's just a raw
// write of bytes.
//
// STREAM EXCEPTIONS
//
// If you do a read_int64, but the buffer doesn't contain the necessary 8 bytes,
// it throws a StreamEof exception. In general, during reading, the following
// common situations generate StreamEof or StreamCorruption exceptions:
//
//   * not enough bytes to satisfy a 'read' call: StreamEof
//   * call read_eof, but the buffer is not empty: StreamCorruption
//   * call read_string, but the string is unreasonably long: StreamCorruption
//
// Exceptions are only generated when reading from a stream that contains bad
// data.  Any other error generates a full-blown abort.  For example, if you try
// to write to a stream that's not open for writing, that's an abort, not an
// exception.  Write operations never generate exceptions.
//
// Sometimes, it is convenient to throw StreamCorruption yourself, if you detect
// that the data you've read from a stream is invalid.  This can make error
// handling a little cleaner.
//
// READ BYTES POINTER VALIDITY
//
// When you call read_bytes, it returns a pointer to a block of bytes. This
// pointer only remains valid until you do a 'write' into the stream.
//
// UNREADING BYTES
//
// It's possible to 'unread' bytes that you've already read from a stream. This
// makes it possible to read those same bytes again.
//
// A common situation where this might be useful is: you're decoding a message,
// but you discover halfway through the process of decoding the message that you
// haven't received the whole message yet.  In that case, it may be desirable to
// unread the partial message, so that you can wait for the rest of the message
// to be received.
//
// Here is the construction that accomplishes this:
//
//     // Get the stream's read count before parsing the message.
//     size_t read_count_before = streambuffer.total_reads();
//
//     // Parse the message, but if there's an EOF, deal with it:
//     try {
//         // Parse the message.
//         int32_t value1 = streambuffer.read_int32();
//         eng::string value2 = streambuffer.read_string(maxlen);
//         int64_t value3 = streambuffer.read_int64();
//
//         // Great! I got the whole message.
//         execute_message(value1, value2, value3);
//     } catch (StreamEof) {
//         // I ran out of bytes.  Unread the message.
//         streambuffer.unread(read_count_before);
//     }
//
// UNREAD LIMITS
//
// If you read bytes from a stream, that data can be 'unread' until you do a
// write.  After a write, it is no longer possible to 'unread' data that you
// read before the write.
//
// STREAMBUFFERS THAT DON'T OWN THEIR OWN MEMORY
//
// If you create a streambuffer using this constructor:
//
//     StreamBuffer(const char *data, uint64_t len);
//
// This StreamBuffer reads from an external (unowned) block of bytes, which is
// not copied! The StreamBuffer saves the pointer that you passed in.  This
// pointer must remain valid until you're done with the StreamBuffer.
//
// A StreamBuffer that reads from an external block of bytes is read-only.
// Attempts to write to this buffer will be caught and will cause an abort. The
// total_writes for such a buffer returns the 'len' value that you initialized
// the buffer with.
//
// NESTED DECODING
//
// Here is an interesting construct:
//
//     // Read a message from the stream.
//     size_t len = streambuffer.read_int32()
//     const char *bytes = streambuffer.read_bytes(len);
//
//     // Construct another stream object to decode the message.
//     StreamBuffer substream(bytes, len);
//     decode(substream);
//
// This is perfectly valid and a potentially convenient way to parse the
// contents of a message.  Note that the substream contains a pointer to
// the parent stream's buffer, and therefore, data corruption will occur
// if you mutate the parent stream while reading the substream.
//
// USING A STREAMBUFFER TO READ AN ENTIRE FILE
//
// If you wish to read an entire file and store the file contents in a
// StreamBuffer, you should probe the size of the file, then allocate a
// StreamBuffer of the correct size using this constructor:
//
//     StreamBuffer(int64_t size);
//
// Then, you can use 'alloc_space' and 'wrote_space' to read the file into the
// buffer in a single read call.
//
// USING A STREAMBUFFER AS A LUA_WRITER OR LUA_READER
//
// You can use a streambuffer as a lua_Writer, as follows:
//
//       lua_dump(L, lua_writer_into_streambuffer, &sb);
//
// Anything written to the lua_writer gets appended to the streambuffer, the
// same as if it had been written using write_bytes.
//
// You can't use streambuffer as a lua_Reader directly, but you can get a
// string_view out of it and then use that to construct a lua_Reader, as
// follows:
//
//      LuaStringViewReader svr(mystreambuffer.view());
//      lua_load (L, svr.lua_reader(), svr.lua_reader_userdata());
//
//////////////////////////////////////////////////////////////


#ifndef STREAMBUFFER_HPP
#define STREAMBUFFER_HPP

#include "wrap-string.hpp"
#include "wrap-sstream.hpp"

#include <utility>
#include <cstdint>
#include <cassert>

#include "base-buffer.hpp"
#include "luastack.hpp"
#include "util.hpp"

class StreamException : public eng::nevernew
{
public:
    virtual char const *what() const { return "General stream exception"; }
};

class StreamEofOnRead : public StreamException
{
public:
    virtual char const *what() const { return "Stream ran out of data"; }
};

class StreamStringTooLong: public StreamException
{
public:
    virtual char const *what() const { return "Stream contained a string that was too long"; }
};

class StreamIntegerTruncated: public StreamException
{
public:
    virtual char const *what() const { return "You truncated an integer when writing to a stream"; }
};

class StreamCorruption: public StreamException
{
public:
    virtual char const *what() const { return "Stream Corruption"; }
};

using LuaValue = BaseLuaValue<eng::string>;

class StreamBufferConfig {
public:
    using string_type = eng::string;
    using fvector_type = util::XYZ;
    using dvector_type = util::DXYZ;
    using luavalue_type = LuaValue;
    void *basebuffer_malloc(size_t size) { return eng::malloc(size); }
    void basebuffer_free(void *p) { eng::free(p); }
    void clear_error_flags() { }
    void raise_eof_on_read() { throw StreamEofOnRead(); }
    void raise_string_too_long() { throw StreamStringTooLong(); }
    void raise_integer_truncated() { throw StreamIntegerTruncated(); }
};

class StreamBuffer : public eng::nevernew, public BaseBuffer<StreamBufferConfig> {
public:
    using BaseBuffer::BaseBuffer;

    void write_hashvalue(const util::HashValue &h) {
        write_uint64(h.first);
        write_uint64(h.second);
    }

    util::HashValue read_hashvalue() {
        uint64_t f = read_uint64();
        uint64_t s = read_uint64();
        return util::HashValue(f, s);
    }
};

// Use a streambuffer as a lua_writer.
int lua_writer_into_streambuffer(lua_State *L, const void* bytes, size_t sz, void* sb);

#endif // STREAMBUFFER_HPP