////////////////////////////////////////////////////////////// // // STREAMBUFFER // // Serves as a buffer for buffered I/O operations. Has rather sophisticated // methods to help serialize and deserialize data. // // The semantics of this class contain a lot of subtlety! Please read the // documentation carefully. // // TELLING LINUX TO READ A FILE DESCRIPTOR INTO A STREAMBUFFER // // It is possible to read from a linux file descriptor, directly into a stream // buffer. You should do this, it's very efficient. Here is how you do it: // // // With linux read, you have to pick an arbitrary buffer size. // const int bufsize = 16384; // // // Allocate transient space in the streambuffer. // char *space = streambuffer.make_space(bufsize); // // // Call the linux 'read' function. // ssize_t bytes_read = read(fd, space, bufsize); // // // Append the bytes read to the streambuffer. // streambuffer.wrote_space(bytes_read); // // The make_space operation allocates an array of bytes where the data can be // written, and returns a pointer to that array of bytes. The read operation // fills some or all of the allocated bytes. Finally, the wrote_space operation // notifies the StreamBuffer that some of the bytes have been filled with data. // These bytes are appended to the StreamBuffer. // // The pointer returned by 'make_space' is only valid until you mutate the // StreamBuffer. Therefore, you should call 'make_space', then immediately fill // the bytes. It is imperative that 'wrote_space' be the first mutator after // 'make_space.' You should think of 'make_space' followed by 'wrote_space' as // a single two-phase operation. // // THE OVERWRITE_INT METHODS: // // These overwrite methods are meant to help deal with this situation: you want // to write a length followed by some data, but you don't know the length until // after you've written the data. The workaround: write a dummy length, then // write the data, and then overwrite the previously-written length with the // correct length. This is the construction that accomplishes this: // // // Write the dummy length, this will get overwritten. // streambuffer.write_int32(0); // // // Write the data, and calculate its length in bytes. // int64_t write_count_1 = streambuffer.total_writes(); // write_data(stream); // int64_t write_count_2 = streambuffer.total_writes(); // int64_t data_len = write_count_2 - write_count_1; // // // Overwrite the previously-written dummy length. // streambuffer.overwrite_int32(write_count_1, data_len); // // Almost all of this is self-explanatory, but the last line is interesting. In // order to know what part of the buffer to overwrite, overwrite_int uses // write_count_1 as a pointer into the buffer - it points immedately to the // right of the integer to overwrite. // // OVERWRITE_INT LIMITS // // If you use write_int to write an integer into the buffer, you are allowed to // overwrite that integer UNTIL you do a read from the buffer. Once you do a // read, it is no longer legal to overwrite ints that you wrote BEFORE the read. // // WRITE_STRING STORES THE STRING LENGTH, WRITE_BYTES DOES NOT // // write_string writes a string into the buffer and prepends a length. The // encoding of the length field is designed to be efficient for short strings // but still capable of encoding long lengths. // // write_bytes doesn't store the data length in the buffer. It's just a raw // write of bytes. // // STREAM EXCEPTIONS // // If you do a read_int64, but the buffer doesn't contain the necessary 8 bytes, // it throws a StreamEof exception. In general, during reading, the following // common situations generate StreamEof or StreamCorruption exceptions: // // * not enough bytes to satisfy a 'read' call: StreamEof // * call read_eof, but the buffer is not empty: StreamCorruption // * call read_string, but the string is unreasonably long: StreamCorruption // // Exceptions are only generated when reading from a stream that contains bad // data. Any other error generates a full-blown abort. For example, if you try // to write to a stream that's not open for writing, that's an abort, not an // exception. Write operations never generate exceptions. // // Sometimes, it is convenient to throw StreamCorruption yourself, if you detect // that the data you've read from a stream is invalid. This can make error // handling a little cleaner. // // READ BYTES POINTER VALIDITY // // When you call read_bytes, it returns a pointer to a block of bytes. This // pointer only remains valid until you do a 'write' into the stream. // // UNREADING BYTES // // It's possible to 'unread' bytes that you've already read from a stream. This // makes it possible to read those same bytes again. // // A common situation where this might be useful is: you're decoding a message, // but you discover halfway through the process of decoding the message that you // haven't received the whole message yet. In that case, it may be desirable to // unread the partial message, so that you can wait for the rest of the message // to be received. // // Here is the construction that accomplishes this: // // // Get the stream's read count before parsing the message. // size_t read_count_before = streambuffer.total_reads(); // // // Parse the message, but if there's an EOF, deal with it: // try { // // Parse the message. // int32_t value1 = streambuffer.read_int32(); // std::string value2 = streambuffer.read_string(maxlen); // int64_t value3 = streambuffer.read_int64(); // // // Great! I got the whole message. // execute_message(value1, value2, value3); // } catch (StreamEof) { // // I ran out of bytes. Unread the message. // streambuffer.unread(read_count_before); // } // // UNREAD LIMITS // // If you read bytes from a stream, that data can be 'unread' until you do a // write. After a write, it is no longer possible to 'unread' data that you // read before the write. // // STREAMBUFFERS THAT DON'T OWN THEIR OWN MEMORY // // If you create a streambuffer using this constructor: // // StreamBuffer(const char *data, uint64_t len); // // This StreamBuffer reads from an external (unowned) block of bytes, which is // not copied! The StreamBuffer saves the pointer that you passed in. This // pointer must remain valid until you're done with the StreamBuffer. // // A StreamBuffer that reads from an external block of bytes is read-only. // Attempts to write to this buffer will be caught and will cause an abort. The // total_writes for such a buffer returns the 'len' value that you initialized // the buffer with. // // NESTED DECODING // // Here is an interesting construct: // // // Read a message from the stream. // size_t len = streambuffer.read_int32() // const char *bytes = streambuffer.read_bytes(len); // // // Construct another stream object to decode the message. // StreamBuffer substream(bytes, len); // decode(substream); // // This is perfectly valid and a potentially convenient way to parse the // contents of a message. Note that the substream contains a pointer to // the parent stream's buffer, and therefore, data corruption will occur // if you mutate the parent stream while reading the substream. // // USING A STREAMBUFFER TO READ AN ENTIRE FILE // // If you wish to read an entire file and store the file contents in a // StreamBuffer, you should probe the size of the file, then allocate a // StreamBuffer of the correct size using this constructor: // // StreamBuffer(int64_t size); // // Then, you can use 'alloc_space' and 'wrote_space' to read the file into the // buffer in a single read call. // // USING A STREAMBUFFER AS A LUA_WRITER OR LUA_READER // // You can use a streambuffer as a lua_Writer, as follows: // // lua_dump(L, stream.lua_writer(), stream.lua_writer_ud()); // // Anything written to the lua_writer gets appended to the streambuffer, the // same as if it had been written using write_bytes. // // You can use a streambuffer as a lua_Reader, as follows: // // lua_load (L, stream.lua_reader(), stream.lua_reader_ud(nbytes), ...) // // The exact semantics of the lua_reader are tricky, so be careful: // lua_reader_ud calls 'read_bytes' immediately, and it stores the bytes in a // "cache of bytes for lua." Then, when the lua_reader gets invoked, the reader // returns the entire contents of the cache, and it clears the cache. Here are // some consequences of this design: // // 1. The number of bytes read from the stream is always exactly equal to // nbytes, even if lua never calls the lua_reader. // // 2. If the stream doesn't contain nbytes, a StreamEof exception gets thrown // from lua_reader_ud, not from the lua_Reader. This is good, because it // means exceptions don't get thrown from inside the lua runtime. // ////////////////////////////////////////////////////////////// #ifndef STREAMBUFFER_HPP #define STREAMBUFFER_HPP #include "wrap-string.hpp" #include "wrap-sstream.hpp" #include "wrap-utility.hpp" #include "luastack.hpp" #include "util.hpp" #include #include class StreamException { public: virtual char const *what() const { return "General stream exception"; } }; class StreamEof : public StreamException { public: virtual char const *what() const { return "Stream ran out of data"; } }; class StreamCorruption : public StreamException { public: virtual char const *what() const { return "Stream contained invalid data"; } }; class StreamBuffer { public: // Construct an empty buffer. StreamBuffer(); // Construct an empty buffer, preallocate the specified amount of space. StreamBuffer(int64_t size, bool fixed_size); // Construct a streambuffer that reads from an external block of bytes. StreamBuffer(const char *s, int64_t len); // Construct a streambuffer that reads from an external block of bytes. StreamBuffer(const std::string &data); // Delete a StreamBuffer. ~StreamBuffer(); // Get the total number of bytes ever read from this buffer. int64_t total_reads() const; // Get the total number of bytes ever written to this buffer. int64_t total_writes() const; // Amount of data inside the buffer. int64_t fill() const; // Get a pointer to the data. const char *data() const; // Discard all data. Reset total read and write counts. // Frees up as much space as possible. void clear(); // Attempt to do a "readline". If there is no newline in // the buffer, returns empty string. If there is a newline, // returns a block of text that ends in newline. std::string readline(); // Write block of bytes into the buffer. // // Caution: this function doesn't write the length! // It just writes the bytes. // void write_bytes(const char *bytes, int64_t len); void write_bytes(const std::string &bytes); // Read a block of bytes from the buffer. // // Caution: the pointer returned is a pointer to the stream's buffer. It is // only valid until you mutate the buffer. Throws StreamEof if the specified // number of bytes aren't present. // const char *read_bytes(int64_t bytes); // Write integers and floats into the buffer. // // Note that integral parameters are all 64 bits. That's so that I can do // runtime error checking to verify that the numbers are all in-range. // void write_int8(int64_t v); void write_int16(int64_t v); void write_int32(int64_t v); void write_int64(int64_t v); void write_uint8(uint64_t v); void write_uint16(uint64_t v); void write_uint32(uint64_t v); void write_uint64(uint64_t v); void write_float(float f); void write_double(double d); // Read fixed-size integers from the buffer. // // May throw StreamEof if the specified number of bytes aren't present. // int8_t read_int8(); int16_t read_int16(); int32_t read_int32(); int64_t read_int64(); uint8_t read_uint8() { return read_int8(); } uint16_t read_uint16() { return read_int16(); } uint32_t read_uint32() { return read_int32(); } uint64_t read_uint64() { return read_int64(); } float read_float(); double read_double(); // Write other types into the buffer. // // Note that strings are preceded by a length field. Reading // a string works by reading the length field, and then reading // the correct number of bytes. // void write_bool(bool b) { write_int8(b ? 1 : 0); } void write_hashvalue(const util::HashValue &hv); void write_string(const std::string &s); // Read other types from the buffer. // // Throws StreamEof if the specified number of bytes aren't present. // Read string with a length limit will throw 'StreamCorruption' if the // length is too long. // bool read_bool() { return read_int8(); } util::HashValue read_hashvalue(); std::string read_string(); std::string read_string_limit(int64_t max_allowed); // Read the entire contents of the buffer as a string. // std::string read_entire_contents(); // Overwrite values previously written to the buffer. // // See the comment at the top of this file for an explanation. // void overwrite_int8(int64_t write_count_after, int64_t v); void overwrite_int16(int64_t write_count_after, int64_t v); void overwrite_int32(int64_t write_count_after, int64_t v); void overwrite_int64(int64_t write_count_after, int64_t v); void overwrite_uint8(int64_t write_count_after, uint64_t v); void overwrite_uint16(int64_t write_count_after, uint64_t v); void overwrite_uint32(int64_t write_count_after, uint64_t v); void overwrite_uint64(int64_t write_count_after, uint64_t v); // This function checks to see if the buffer is empty. bool empty(); // Verify that the buffer is empty, if not, throw StreamCorruption. void verify_empty(); // Make sure the specified number of bytes are available to read. void check_available(int64_t bytes) { int64_t avail = write_cursor_ - read_cursor_; if (avail < bytes) { throw StreamEof(); } } // Rewind the read cursor to a previous position. void unread_to(int64_t total_reads); // Rewind the write cursor to a previous position. void unwrite_to(int64_t total_writes); // Copy the entire contents of this streambuffer into another one. void copy_into(StreamBuffer *sb); // Transfer the entire contents of this streambuffer into another one. void transfer_into(StreamBuffer *sb); // Compare the contents of this streambuffer to another one. bool contents_equal(const StreamBuffer *sb) const; // Calculate a noncryptographic but good hash of what's in the buffer. util::HashValue hash() const; // Use the stream buffer as a lua_Writer. static int lua_writer(lua_State *L, const void* p, size_t sz, void* ud); void *lua_writer_ud(); // Use the stream buffer as a lua_Reader. static const char *lua_reader(lua_State *L, void *data, size_t *size); void *lua_reader_ud(int64_t bytes); // Get an ostream that writes into the StreamBuffer. std::ostream &ostream(); private: // Start and end of the allocated block. char *buf_lo_; char *buf_hi_; // The write and read cursors. char *write_cursor_; char *read_cursor_; // Number of bytes read before buffer was last aligned. int64_t pre_read_count_; // True if we own this buffer. bool owned_; // True if we're not allowed to expand this buffer. bool fixed_size_; // Lua reader return value. const char *lua_reader_data_; int64_t lua_reader_size_; // The ostream. Only allocated on demand. std::unique_ptr ostream_; // Initialize with a new buffer. void init(bool fixed, bool owned, char *buf, int64_t size); // Make the specified amount of space in the buffer for writing. // Return a pointer to the space. char *make_space(int64_t bytes) { int64_t available = buf_hi_ - write_cursor_; if (available < bytes) make_space_slow(bytes); return write_cursor_; } void make_space_slow(int64_t bytes); void wrote_space(int64_t bytes); // Implementation for the overwrite_int functions. char *get_overwrite(int64_t size, int64_t write_count_after); // This is for unit testing. bool layout_is(int64_t a, int64_t b, int64_t c); friend int lfn_unittests_streambuffer(lua_State *L); }; #endif // STREAMBUFFER_HPP