/////////////////////////////////////////////////////////////////////// // // NAMESPACE SV // // * Operate on string_view or just characters. // * Do not allocate memory. // * Do not copy strings. // // NAMESPACE UTIL // // * General purpose utility functions. // * Sort of a catch-all. // /////////////////////////////////////////////////////////////////////// #ifndef UTIL_HPP #define UTIL_HPP #include "wrap-string.hpp" #include "wrap-set.hpp" #include "wrap-map.hpp" #include "wrap-vector.hpp" #include "wrap-sstream.hpp" #include #include #include #include #include #include #include // #include #include #include "spookyv2.hpp" namespace sv { // Bring this into our namespace. using string_view = std::string_view; // Test character class, ignoring current locale and unicode issues. inline bool ascii_isupper(char c) { return (c >= 'A') && (c <= 'Z'); } inline bool ascii_islower(char c) { return (c >= 'a') && (c <= 'z'); } inline bool ascii_isdigit(char c) { return (c >= '0') && (c <= '9'); } inline bool ascii_isalpha(char c) { return ascii_isupper(c) || ascii_islower(c); } inline bool ascii_isualpha(char c) { return ascii_isalpha(c) || (c == '_'); } inline bool ascii_isalnum(char c) { return ascii_isalpha(c) || ascii_isdigit(c); } inline bool ascii_isualnum(char c) { return ascii_isalpha(c) || ascii_isdigit(c) || (c == '_'); } inline bool ascii_isspace(char c) { return (c==' ')||(c=='\t')||(c=='\r')||(c=='\n')||(c=='\f')||(c=='\v'); } // Check for the null string_view // // Note that the null string view is an empty string, // but not every empty string is the null string view. // inline bool isnull(string_view v) { return v.data() == nullptr; } // Return true if the two strings are equal, ignoring case. // bool case_insensitive_eq(std::string_view s1, std::string_view s2); // Check if numbers can be parsed as int64/double bool valid_double(string_view v); bool valid_int64(string_view v); bool valid_hex64(string_view v); // Check if a hostname is a valid DNS (ascii) hostname. bool valid_hostname(string_view v); // Convert strings to numbers. Returns errval on failure. // // The integer parser accepts a sequence of digits, // with or without a + or - sign. The hex parser // does not allow a + or - sign. For both the int64 // and hex64 parser, it is a failure if the number // does not fit in 64 bits. The double parser does // not accept the strings 'nan' or 'inf'. // double to_double(string_view v, double errval = std::numeric_limits::quiet_NaN()); int64_t to_int64(string_view v, int64_t errval = std::numeric_limits::max()); uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits::max()); // Trim whitspace from a string_view. string_view ltrim(string_view v); string_view rtrim(string_view v); string_view trim(string_view v); // Trim specific character (all occurrences) from a string_view. string_view ltrim(string_view v, char c); string_view rtrim(string_view v, char c); string_view trim(string_view v, char c); // Return true if the string has the specified prefix or suffix. bool has_prefix(string_view s, string_view prefix); bool has_suffix(string_view s, string_view suffix); // Return the length of the common prefix of A and B. int common_prefix_length(string_view a, string_view b); // Return true if the string is a lua identifier. bool is_lua_id(string_view s); // Return true if the string is a valid lua classname. bool is_lua_classname(string_view s); // Return true if the line of code is a lua comment. bool is_lua_comment(string_view s); // Return true if the line is entirely whitespace. bool is_whitespace(string_view s); // Get the function name from a lua function prototype. // Returns empty string if the prototype is malformed or // is not a lua function prototype at all. string_view lua_function_proto_name(string_view s); // Return the first character, but if the view is empty, // return zero. inline char zfront(string_view &s) { return s.empty() ? char(0) : s.front(); } // Read whitespace from a string_view. // string_view read_space(string_view &source); // Read from a string_view until separator is reached. // // If the separator appears in the source, returns everything // before the separator, and updates the source to everything // after the separator. // // If the separator doesn't appear in the source, returns // the entire source, and replaces source with the null string_view. // string_view read_to_sep(string_view &source, char sep); // Read from a string_view until newline is reached. // // If there's a line-break in the source (newline or CRLF), // returns the text before the line-break, and updates the // source to the text after the line-break. // // If there's no line-break in the source, returns the entire source, // and updates source to the null string_view. // string_view read_to_line(string_view &source); // Read a prefix string from a string_view. // // Returns false if the string view doesn't start with // the specified prefix. // bool read_prefix(string_view &source, string_view prefix); // Read from a string_view until whitespace is reached. // // If there's any whitespace in the source, returns the text // before the whitespace, and update the source to the text // after the whitespace. // // If there's no whitespace in the source, returns the entire // source, and updates the source to the null string_view. // string_view read_to_space(string_view &source); // Read up to nbytes from a string_view. // string_view read_nbytes(string_view &source, int nbytes); // Read an identifier from a string_view // // If there's no valid identifier, returns empty string. // Underscores are not allowed in the identifier. // string_view read_simple_identifier(string_view &source); // Read an identifier from a string_view // // If there's no valid identifier, returns empty string. // Lua identifiers are allowed to have underscores. // string_view read_lua_identifier(string_view &source); // Read a number from a string view // // This is basically a regex pattern matching routine // hardwired with the regex for numbers. You must // specify which of the following parts of the regex // are allowed or not: // // * plus sign // * minus sign // * decimal point // * scientific notation exponents // // Returns the number as a string_view. There is // no guarantee that the number is small enough to // fit into any particular number of bits. This // always uses base 10. // std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp); // Read an ascii character from a string. // // Returns -1 if the string is empty. // int32_t read_ascii_char(string_view &source); // Read a UTF8 codepoint from a string_view. // // See documentation in unicode-stuff.hpp // int32_t read_codepoint_utf8(string_view &source); // Return true if the string is valid utf-8. // // See documentation in unicode-stuff.hpp // bool valid_utf8(string_view s); // Check if a UTF8 string contains a substring. // // Eventually, we're going to have a case-insensitive version of this, // but it's really hard to write! // bool contains_substring_utf8(string_view haystack, string_view needle); // Return true if the number conforms to the spec. // See read_number for more information. // bool valid_number(string_view v, bool plus, bool minus, bool dec, bool exp); } // namespace sv namespace util { enum MessageType { MSG_NULL, MSG_DIFF, MSG_ACK, MSG_INVOKE, }; // Note: IdVector is weird in that it deliberately uses std::vector // instead of eng::vector. This is because we want plane scans // to not touch the engine heap. // using IdVector = std::vector; using StringVec = eng::vector; using StringPair = std::pair; using StringSet = eng::set; using LuaSourceVec = eng::vector; using LuaSourcePtr = std::unique_ptr; using HashValue = std::pair; using SharedStdString = std::shared_ptr; using SharedStdStringVec = std::vector; // Ascii uppercase and lowercase. eng::string ascii_tolower(std::string_view c); eng::string ascii_toupper(std::string_view c); // Output a string to a stream using Lua string escaping and quoting. void quote_string(const eng::string &str, std::ostream *os); // base64 encode. void base64_encode(std::string_view v, std::ostream *oss); // base64 decode. // // Returns true if the base64 was 'clean' base64, as // opposed to base64 with extraneous characters. // bool base64_decode(std::string_view v, std::ostream *oss); // ID vector quick create. IdVector id_vector_create(int64_t id1=-1, int64_t id2=-1, int64_t id3=-1, int64_t id4=-1); // Print an ID vector to a stream. void print_id_vector(const IdVector &idv, std::ostream *os); void print_id_vector(const std::vector &idv, std::ostream *os); // ID vector debug string. eng::string id_vector_debug_string(const IdVector &idv); // Unions and sorts two ID vectors. IdVector sort_union_id_vectors(const IdVector &v1, const IdVector &v2); // Get a 128-bit hashvalue for a string. HashValue hash_string(std::string_view str); // Get a 128-bit hashvalue for a string, with a previous value. HashValue hash_string(HashValue prev, std::string_view str); // Get a 128-bit hashvalue for an ID vector. HashValue hash_id_vector(const IdVector &idv); // Convert a 128-bit hash to a hexadecimal string. eng::string hash_to_hex(const HashValue &hash); // Hash four integers together to 64 bits. // This is a good hash, but not cryptographically good. uint64_t hash_ints(uint64_t n1, uint64_t n2, uint64_t n3, uint64_t n4); // Hash a single 64-bit integer. // This is a good hash, but not cryptographically good. // Published by David Stafford in his article 'Better Bit Mixing'. inline uint64_t hash_int(uint64_t x) { x = (x ^ (x >> 30)) * UINT64_C(0xbf58476d1ce4e5b9); x = (x ^ (x >> 27)) * UINT64_C(0x94d049bb133111eb); x = x ^ (x >> 31); return x; } // Convert a 64-bit hash value into a floating point number between 0 and 1. double hash_to_double(uint64_t hash); // Split a string into multiple strings StringVec split(const eng::string &s, char sep); // Split a string into multiple strings using \r or \n StringVec split_lines(const eng::string &s); // Split a string into multiple lines using |, remove any leading blank line. StringVec split_docstring(const eng::string &s); // Join multiple strings into one string eng::string join(const StringVec &strs, eng::string sep); // Return N repetitions of string A eng::string repeat_string(const eng::string &a, int n); // String to lowercase/uppercase. Ascii only, no unicode. eng::string tolower(eng::string input); eng::string toupper(eng::string input); // Convert a codepoint number into a utf8 string. // If the codepoint is invalid, returns empty string. eng::string get_codepoint_utf8(int32_t cp); // Write a codepoint in utf8 to a stream. // If the codepoint is invalid, writes nothing and returns false. bool write_codepoint_utf8(int32_t cp, std::ostream *out); // Calculate distance between two points double distance_squared(double x1, double y1, double x2, double y2); // Make a LuaSourceVec with one element, for unit testing. LuaSourcePtr make_lua_source(const eng::string &code); // Remove items from a vector that are nullptr. template void remove_nullptrs(T &vec) { auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return x != nullptr; }); vec.erase(iter, vec.end()); } // Remove items from a vector that are marked for deletion. template void remove_marked_items(T &vec) { auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return !x.marked_for_deletion(); }); vec.erase(iter, vec.end()); } // An XYZ coordinate, general purpose. template struct NumXYZ { using Number = NUMBER; Number x, y, z; NumXYZ() { x=0; y=0; z=0; } NumXYZ(Number ix, Number iy, Number iz) { x=ix; y=iy; z=iz; } void operator =(const NumXYZ &other) { x = other.x; y = other.y; z = other.z; } void operator =(const NumXYZ &other) { x = other.x; y = other.y; z = other.z; } void operator =(Number n) { x = n; y = n; z = n; } bool operator ==(const NumXYZ &o) const { return x==o.x && y == o.y && z==o.z; } bool operator !=(const NumXYZ &o) const { return x!=o.x || y != o.y || z!=o.z; } NumXYZ operator -(const NumXYZ &o) const { return NumXYZ(x-o.x, y-o.y, z-o.z); } NumXYZ operator +(const NumXYZ &o) const { return NumXYZ(x+o.x, y+o.y, z+o.z); } NumXYZ operator *(float scale) const { return NumXYZ(x*scale, y*scale, z*scale); } template const NumXYZ convert() const { NumXYZ r; r.x=ONUMBER(x); r.y=ONUMBER(y); r.z=ONUMBER(z); return r; } eng::string debug_string() const { eng::ostringstream oss; oss << "(" << x << "," << y << "," << z << ")"; return oss.str(); } }; using XYZ=NumXYZ; using DXYZ=NumXYZ; // util::ostringstream // // This is a variant of ostringstream in which it is possible // to get the contents without copying. To get the contents // without copying, use oss.view(). // class ostringstream : public eng::ostringstream { class rstringbuf : public std::basic_stringbuf { public: char *eback() const { return std::streambuf::eback(); } char *pptr() const { return std::streambuf::pptr(); } }; rstringbuf rstringbuf_; public: ostringstream() { std::basic_ostream::rdbuf(&rstringbuf_); } char *data() const { return rstringbuf_.eback(); } size_t size() const { return rstringbuf_.pptr() - rstringbuf_.eback(); } std::string_view view() const { return std::string_view(data(), size()); } eng::string str() const { return rstringbuf_.str(); } }; // send_to_stream: send all arguments to the specified stream. inline void send_to_stream(std::ostream &os) {} template inline void send_to_stream(std::ostream &os, const ARG &arg, const REST & ... rest) { os << arg; send_to_stream(os, rest...); } // ss: convert all arguments to a string by sending them to a stringstream. template inline eng::string ss(const ARGS & ... args) { eng::ostringstream oss; send_to_stream(oss, args...); return oss.str(); } // dprintf / dprint // // Send a debugging message to somewhere that it can be seen. This routine // initially just sends output to stderr. But it can be hooked to send output // somewhere else, like to a debug output window. // // The hook function must be a function that accepts a single line of text. The // hook function will always be passed one line, consisting of printable // characters only. There will be no control characters. The newline is // implied. // void dprintview(std::string_view view); void dprintf(const char *format, ...); void hook_dprint(void (*func)(const char *oneline, size_t size)); template inline void dprint(const ARGS & ... args) { util::ostringstream oss; send_to_stream(oss, args...); dprintview(oss.view()); } // A better API than std::setfill, std::hex, std::setw, std::setprecision // // Usage examples: // std::cout << util::hex.width(5).fill('0').val(123) // std::cout << util::dec.fill('$').precision(val(123) // // The reason that other API is bad is that it can leave std::cout // in an unpredictable state. This API always leaves the stream clean. // template class FormattedNumber { public: VALUE value_; bool hex_; int width_; char fill_; int precision_; constexpr FormattedNumber(VALUE v, bool h, int w, char f, int p) : value_(v), hex_(h), width_(w), fill_(f), precision_(p) {} constexpr FormattedNumber width(int w) const { return FormattedNumber(value_, hex_, w, fill_, precision_); } constexpr FormattedNumber fill(char f) const { return FormattedNumber(value_, hex_, width_, f, precision_); } constexpr FormattedNumber precision(int p) const { return FormattedNumber(value_, hex_, width_, fill_, p); } template constexpr FormattedNumber val(NVALUE v) const { return FormattedNumber(v, hex_, width_, fill_, precision_); } }; constexpr auto hex = FormattedNumber(0, true, 0, '0', 6); constexpr auto hex8 = FormattedNumber(0, true, 2, '0', 6); constexpr auto hex16 = FormattedNumber(0, true, 4, '0', 6); constexpr auto hex32 = FormattedNumber(0, true, 8, '0', 6); constexpr auto hex64 = FormattedNumber(0, true, 16, '0', 6); constexpr auto dec = FormattedNumber(0, false, 0, ' ', 6); } // namespace util template inline std::ostream &operator<<(std::ostream &oss, util::FormattedNumber n) { if (n.hex_) oss << std::hex; else oss << std::dec; oss << std::setprecision(n.precision_) << std::setfill(n.fill_) << std::setw(n.width_) << n.value_; oss << std::dec << std::setfill(' ') << std::setprecision(6); return oss; } inline std::ostream &operator<<(std::ostream &oss, const util::XYZ &xyz) { oss << xyz.x << "," << xyz.y << "," << xyz.z; return oss; } inline std::ostream &operator<<(std::ostream &oss, const util::DXYZ &xyz) { oss << xyz.x << "," << xyz.y << "," << xyz.z; return oss; } #endif // UTIL_HPP