From 74f7686b85767bd76ce1ed93bd4bd66bf5c514eb Mon Sep 17 00:00:00 2001 From: jyelon Date: Wed, 5 Apr 2023 15:06:52 -0400 Subject: [PATCH] Implemented a serialize and a deserialize routine --- luprex/Makefile | 1 + luprex/cpp/core/serializelua.cpp | 370 ++++++++++++++++++++++--------- luprex/cpp/core/streambuffer.cpp | 4 +- luprex/cpp/core/streambuffer.hpp | 4 +- luprex/cpp/core/util.hpp | 2 +- 5 files changed, 276 insertions(+), 105 deletions(-) diff --git a/luprex/Makefile b/luprex/Makefile index 7c484fca..a4412cb2 100644 --- a/luprex/Makefile +++ b/luprex/Makefile @@ -108,6 +108,7 @@ OBJ_CORE=\ obj/core/lpxclient.o\ obj/core/eng-tests.o\ obj/core/printbuffer.o\ + obj/core/serializelua.o\ OBJ_DRV=\ obj/drv/drvutil.o\ diff --git a/luprex/cpp/core/serializelua.cpp b/luprex/cpp/core/serializelua.cpp index 633c8c67..4c690346 100644 --- a/luprex/cpp/core/serializelua.cpp +++ b/luprex/cpp/core/serializelua.cpp @@ -7,22 +7,121 @@ enum PackCodes { LUA_PK_ENDTABLE }; +class DeserializeError { +}; + class Deserializer { LuaVar id_to_value_; LuaStack LS_; - int next_id_; eng::string &error_; StreamBuffer *sb_; + int next_id_; + void deserialize_table_r(LuaSlot target) { + LuaVar key, val; + LuaStack LS(LS_.state(), key, val); + LS.newtable(target); + LS.rawset(id_to_value_, next_id_++, target); + bool hasmeta = sb_->read_bool(); + if (hasmeta) { + uint8_t vtype = sb_->read_uint8(); + deserialize_r(vtype, val); + if (!LS.istable(val)) { + error_ = "serialized data contains invalid metatable"; + throw DeserializeError(); + } + LS.setmetatable(target, val); + } + while (true) { + uint8_t ktype = sb_->read_uint8(); + if (ktype == LUA_PK_ENDTABLE) { + break; + } + deserialize_r(ktype, key); + uint8_t vtype = sb_->read_uint8(); + deserialize_r(vtype, val); + LS.rawset(target, key, val); + } + } + + void deserialize_r(uint8_t kind, LuaSlot target) { + switch (kind) { + case LUA_TNIL: { + LS_.set(target, LuaNil); + return; + } + case LUA_PK_TRUE: { + LS_.set(target, true); + return; + } + case LUA_PK_FALSE: { + LS_.set(target, false); + return; + } + case LUA_TLIGHTUSERDATA: { + LS_.set(target, LuaToken(sb_->read_uint64())); + return; + } + case LUA_TNUMBER: { + LS_.set(target, sb_->read_double()); + return; + } + case LUA_TSTRING: { + LS_.set(target, sb_->read_string()); + LS_.rawset(id_to_value_, next_id_++, target); + return; + } + case LUA_TT_GENERAL: { + deserialize_table_r(target); + return; + } + case LUA_TT_GLOBALENV: { + LS_.getglobaltable(target); + return; + } + case LUA_TT_TANGIBLE: { + LS_.maketan(target, sb_->read_int64()); + return; + } + case LUA_TT_CLASS: { + LS_.makeclass(target, sb_->read_string()); + return; + } + case LUA_PK_REFERENCE: { + int32_t key = sb_->read_int32(); + LS_.rawget(target, id_to_value_, key); + if (LS_.isnil(target)) { + error_ = "invalid backward reference in serialized data"; + throw DeserializeError(); + } + return; + } + default: { + error_ = util::ss("unrecognized type token in serialized data: ", kind); + throw DeserializeError(); + } + } + } + +public: Deserializer(LuaStack &LS0, LuaSlot val, StreamBuffer *sb, eng::string &error) : - LS_(LS0.state(), lookup_, value_to_id_), error_(error) { - next_id_ = 1; + LS_(LS0.state(), id_to_value_), error_(error), sb_(sb), next_id_(1) { LS_.newtable(id_to_value_); + int top = lua_gettop(LS_.state()); try { + uint16_t v = sb_->read_uint16(); + if (v != 0xD096) { + error_ = "This is not a serialized data structure"; + return; + } uint8_t b = sb_->read_uint8(); deserialize_r(b, val); - } catch (StreamEof e) { + } catch (const StreamEof &e) { error_ = "EOF reached while deserializing data"; + lua_settop(LS_.state(), top); + LS_.set(val, LuaNil); + } catch (DeserializeError e) { + lua_settop(LS_.state(), top); LS_.set(val, LuaNil); } LS_.result(); @@ -39,22 +138,23 @@ class Serializer { LuaVar lookup_; LuaVar value_to_id_; LuaStack LS_; - int next_id_; eng::string &error_; StreamBuffer *sb_; + int next_id_; - void serialize_keyvals_r(LuaSlot tab) { + void serialize_table_r(LuaSlot tab) { LuaVar key, val; LuaStack SLS(LS_.state(), key, val); + sb_->write_uint8(LUA_TT_GENERAL); SLS.getmetatable(val, tab); - if (!LS.isnil(val)) { - error_ = "Cannot serialize metatables"; - SLS.result(); - return; + if (SLS.isnil(val)) { + sb_->write_bool(false); + } else { + sb_->write_bool(true); + serialize_r(val); } - sb_->pack_uint8(LUA_TT_GENERAL); SLS.set(key, LuaNil); - while (LS.next(tab, key, val)) { + while (SLS.next(tab, key, val)) { serialize_r(key); if (!error_.empty()) { SLS.result(); @@ -66,107 +166,109 @@ class Serializer { return; } } - sb_->pack_uint8(LUA_PK_ENDTABLE); + sb_->write_uint8(LUA_PK_ENDTABLE); SLS.result(); } void serialize_r(LuaSlot val) { int tt = LS_.xtype(val); switch (tt) { - case LUA_TNIL: { - sb_->pack_uint8(LUA_TNIL); - return; - } - case LUA_TBOOLEAN: { - if (LS_.ckboolean(val)) { - sb_->pack_uint8(LUA_PK_TRUE); - } else { - sb_->pack_uint8(LUA_PK_FALSE); + case LUA_TNIL: { + sb_->write_uint8(LUA_TNIL); + return; } - return; - } - case LUA_TLIGHTUSERDATA: { - sb_->pack_uint8(LUA_TLIGHTUSERDATA); - sb_->pack_uint64(LS_.cktoken(val).value); - return; - } - case LUA_TNUMBER: { - sb_->pack_uint8(LUA_TNUMBER); - sb_->pack_double(LS_.cknumber(val)); - return; - } - case LUA_TSTRING: { - LS_.rawget(lookup_, value_to_id_, val); - if (!LS_.isnil(lookup_)) { - sb_->pack_uint8(LUA_PK_REFERENCE); - sb_->pack_uint32(LS.ckint(lookup_)); - } else { - LS_.rawset(value_to_id_, val, next_id_++); - sb_->pack_uint8(LUA_TSTRING); - sb_->pack_string(LS_.ckstring(val)); + case LUA_TBOOLEAN: { + if (LS_.ckboolean(val)) { + sb_->write_uint8(LUA_PK_TRUE); + } else { + sb_->write_uint8(LUA_PK_FALSE); + } + return; } - return; - } - case LUA_TTABLE: { - // LS.xtype should never return LUA_TTABLE. - error_ = "Bad xtype in serialization"; - return; - } - case LUA_TFUNCTION: { - error_ = "Cannot serialize closures"; - return; - } - case LUA_TUSERDATA: { - error_ = "Cannot serialize userdata"; - return; - } - case LUA_TTHREAD: { - error_ = "Cannot serialize coroutines"; - return; - } - case LUA_TT_GENERAL: { - LS_.rawget(lookup_, value_to_id_, val); - if (!LS_.isnil(lookup_)) { - sb_->pack_uint8(LUA_PK_REFERENCE); - sb_->pack_uint32(LS.ckint(lookup_)); - } else { - LS_.rawset(value_to_id_, val, next_id_++); - serialize_table_r(val); + case LUA_TLIGHTUSERDATA: { + sb_->write_uint8(LUA_TLIGHTUSERDATA); + sb_->write_uint64(LS_.cktoken(val).value); + return; + } + case LUA_TNUMBER: { + sb_->write_uint8(LUA_TNUMBER); + sb_->write_double(LS_.cknumber(val)); + return; + } + case LUA_TSTRING: { + LS_.rawget(lookup_, value_to_id_, val); + if (!LS_.isnil(lookup_)) { + sb_->write_uint8(LUA_PK_REFERENCE); + sb_->write_int32(LS_.ckint(lookup_)); + } else { + LS_.rawset(value_to_id_, val, next_id_++); + sb_->write_uint8(LUA_TSTRING); + sb_->write_string(LS_.ckstring(val)); + } + return; + } + case LUA_TTABLE: { + // LS.xtype should never return LUA_TTABLE. + error_ = "Bad xtype in serialization"; + return; + } + case LUA_TFUNCTION: { + error_ = "Cannot serialize closures"; + return; + } + case LUA_TUSERDATA: { + error_ = "Cannot serialize userdata"; + return; + } + case LUA_TTHREAD: { + error_ = "Cannot serialize coroutines"; + return; + } + case LUA_TT_GENERAL: { + LS_.rawget(lookup_, value_to_id_, val); + if (!LS_.isnil(lookup_)) { + sb_->write_uint8(LUA_PK_REFERENCE); + sb_->write_int32(LS_.ckint(lookup_)); + } else { + LS_.rawset(value_to_id_, val, next_id_++); + serialize_table_r(val); + } + return; + } + case LUA_TT_REGISTRY: { + error_ = "Pointer to registry found in serialization, shouldn't happen"; + return; + } + case LUA_TT_GLOBALENV: { + sb_->write_uint8(LUA_TT_GLOBALENV); + return; + } + case LUA_TT_TANGIBLE: { + sb_->write_uint8(LUA_TT_TANGIBLE); + sb_->write_int64(LS_.tanid(val)); + return; + } + case LUA_TT_TANGIBLEMETA: { + error_ = "Pointer to a tangible metatable found in serialization, shouldn't happen"; + return; + } + case LUA_TT_CLASS: { + sb_->write_uint8(LUA_TT_CLASS); + sb_->write_string(LS_.classname(val)); + return; + } + default: { + error_ = "Unrecognized xtype in serialization"; + return; } - return; - } - case LUA_TT_REGISTRY: { - error_ = "Pointer to registry found in serialization, shouldn't happen"; - return; - } - case LUA_TT_GLOBALENV: { - sb_->pack_uint8(LUA_TT_GLOBALENV); - return; - } - case LUA_TT_TANGIBLE: { - sb_->pack_uint8(LUA_TT_TANGIBLE); - sb_->pack_int64(LS_.tanid(val)); - return; - } - case LUA_TT_TANGIBLEMETA: { - error_ = "Pointer to a tangible metatable found in serialization, shouldn't happen"; - return; - } - case LUA_TT_CLASS: { - sb_->pack_uint8(LUA_TT_CLASS); - sb_->pack_string(LS_.classname(val)); - return; - } - default: { - error_ = "Unrecognized xtype in serialization"; - return; } } +public: Serializer(LuaStack &LS0, LuaSlot val, StreamBuffer *sb, eng::string &error) : - LS_(LS0.state(), lookup_, value_to_id_), error_(error) { - next_id_ = 1; + LS_(LS0.state(), lookup_, value_to_id_), error_(error), sb_(sb), next_id_(1) { LS_.newtable(value_to_id_); + sb_->write_uint16(0xD096); serialize_r(val); LS_.result(); } @@ -181,9 +283,77 @@ eng::string serialize_lua(LuaStack &LS0, LuaSlot val, StreamBuffer *sb) { eng::string deserialize_lua(LuaStack &LS0, LuaSlot val, StreamBuffer *sb) { eng::string error; Deserializer dsz(LS0, val, sb, error); - if (!error_.empty()) { + if (!error.empty()) { LS0.set(val, LuaNil); } return error; } +LuaDefine(table_serialize, "value", + "|Serialize any lua value, returning a string" + "|" + "|Converts lua values into a binary string. The string" + "|can then be deserialized to produce a copy of the value." + "|" + "|Supports these atomic types:" + "|" + "| nil, string, number, boolean, token" + "|" + "|Does not support these types at all:" + "|" + "| function, thread" + "|" + "|Supports simple tables. When it finds a simple table, it recurses" + "|into the table making a deep copy. Metatables are also supported." + "|" + "|Cycles are supported: if you have two tables which point to each other," + "|they can be serialized, and the deserialized copy will contain two" + "|tables that point to each other." + "|" + "|Duplicate tables and duplicate strings are handled efficiently." + "|For example, if the data contains two copies of a 10KB string, the" + "|serialized data will only contain one copy of the string." + "|" + "|Tangibles are tables, but they are handled specially. When the" + "|serialize routine finds a tangible, it just stores the tangible's ID." + "|The deserialized copy will then contain the same tangible by ID." + "|" + "|Classes are also tables, but they too are handled specially. When" + "|the serialize routine finds a class, it stores the class name, and" + "|the deserialized copy will then contain the same class." + "|" + "|The global environment table is also treated specially: if you" + "|serialize it, it will not recurse into it, instead, the deserialized" + "|copy will just contain a pointer to the global environment." + "|") { + LuaArg value; + LuaRet str; + LuaStack LS(L, value, str); + StreamBuffer sb; + eng::string error = serialize_lua(LS, value, &sb); + if (!error.empty()) { + luaL_error(L, "%s", error.c_str()); + return LS.result(); + } + LS.set(str, sb.view()); + return LS.result(); +} + +LuaDefine(table_deserialize, "binary", + "|Deserialize a serialized block, returning a value" + "|" + "|See doc(table.serialize) for more information about what" + "|kind of data can be serialized and how it is deserialized." + "|") { + LuaArg str; + LuaRet value; + LuaStack LS(L, value, str); + std::string_view s = LS.ckstringview(str); + StreamBuffer sb(s); + eng::string error = deserialize_lua(LS, value, &sb); + if (!error.empty()) { + luaL_error(L, "%s", error.c_str()); + return LS.result(); + } + return LS.result(); +} \ No newline at end of file diff --git a/luprex/cpp/core/streambuffer.cpp b/luprex/cpp/core/streambuffer.cpp index af208806..f9497b6b 100644 --- a/luprex/cpp/core/streambuffer.cpp +++ b/luprex/cpp/core/streambuffer.cpp @@ -34,8 +34,8 @@ StreamBuffer::StreamBuffer(const char *s, int64_t size) { write_cursor_ = buf_hi_; } -StreamBuffer::StreamBuffer(const eng::string &src) { - init(true, false, const_cast(src.c_str()), src.size()); +StreamBuffer::StreamBuffer(std::string_view data) { + init(true, false, const_cast(data.data()), data.size()); write_cursor_ = buf_hi_; } diff --git a/luprex/cpp/core/streambuffer.hpp b/luprex/cpp/core/streambuffer.hpp index 96166103..9b3243ba 100644 --- a/luprex/cpp/core/streambuffer.hpp +++ b/luprex/cpp/core/streambuffer.hpp @@ -252,8 +252,8 @@ public: StreamBuffer(const char *s, int64_t len); // Construct a streambuffer that reads from an external block of bytes. - StreamBuffer(const eng::string &data); - + StreamBuffer(std::string_view data); + // Delete a StreamBuffer. ~StreamBuffer(); diff --git a/luprex/cpp/core/util.hpp b/luprex/cpp/core/util.hpp index 438c008f..7485f475 100644 --- a/luprex/cpp/core/util.hpp +++ b/luprex/cpp/core/util.hpp @@ -354,7 +354,7 @@ inline eng::string ss(const ARGS & ... args) { // // This is a variant of ostringstream in which it is possible // to get the contents without copying. To get the contents -// without copying, use oss.size() and oss.c_str() +// without copying, use oss.view(). // class ostringstream : public eng::ostringstream { class rstringbuf : public std::basic_stringbuf {