396 lines
13 KiB
C++
396 lines
13 KiB
C++
///////////////////////////////////////////////////////////////////////
|
|
//
|
|
// NAMESPACE SV
|
|
//
|
|
// * Operate on string_view or just characters.
|
|
// * Do not allocate memory.
|
|
// * Do not copy strings.
|
|
//
|
|
// NAMESPACE UTIL
|
|
//
|
|
// * General purpose utility functions.
|
|
// * Sort of a catch-all.
|
|
//
|
|
///////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef UTIL_HPP
|
|
#define UTIL_HPP
|
|
|
|
#include "wrap-string.hpp"
|
|
#include "wrap-set.hpp"
|
|
#include "wrap-map.hpp"
|
|
#include "wrap-vector.hpp"
|
|
#include "wrap-sstream.hpp"
|
|
#include <ostream>
|
|
#include <memory>
|
|
#include <utility>
|
|
#include <algorithm>
|
|
#include <string_view>
|
|
#include <cstdint>
|
|
#include <limits>
|
|
#include <iomanip>
|
|
|
|
#include "luastack.hpp"
|
|
#include "spookyv2.hpp"
|
|
|
|
namespace sv {
|
|
|
|
// Bring this into our namespace.
|
|
using string_view = std::string_view;
|
|
|
|
// Test character class, ignoring current locale and unicode issues.
|
|
inline bool ascii_isupper(char c) { return (c >= 'A') && (c <= 'Z'); }
|
|
inline bool ascii_islower(char c) { return (c >= 'a') && (c <= 'z'); }
|
|
inline bool ascii_isdigit(char c) { return (c >= '0') && (c <= '9'); }
|
|
inline bool ascii_isalpha(char c) { return ascii_isupper(c) || ascii_islower(c); }
|
|
inline bool ascii_isalnum(char c) { return ascii_isalpha(c) || ascii_isdigit(c); }
|
|
inline bool ascii_isspace(char c) { return (c==' ')||(c=='\t')||(c=='\r')||(c=='\n')||(c=='\f')||(c=='\v'); }
|
|
|
|
// Check for the null string_view
|
|
//
|
|
// Note that the null string view is an empty string,
|
|
// but not every empty string is the null string view.
|
|
//
|
|
inline bool isnull(string_view v) { return v.data() == nullptr; }
|
|
|
|
// Return true if the two strings are equal, ignoring case.
|
|
//
|
|
bool case_insensitive_eq(std::string_view s1, std::string_view s2);
|
|
|
|
// Check if numbers can be parsed as int64/double
|
|
bool valid_double(string_view v);
|
|
bool valid_int64(string_view v);
|
|
bool valid_hex64(string_view v);
|
|
|
|
// Convert strings to numbers. Returns errval on failure.
|
|
//
|
|
// The integer parser accepts a sequence of digits,
|
|
// with or without a + or - sign. The hex parser
|
|
// does not allow a + or - sign. For both the int64
|
|
// and hex64 parser, it is a failure if the number
|
|
// does not fit in 64 bits. The double parser does
|
|
// not accept the strings 'nan' or 'inf'.
|
|
//
|
|
double to_double(string_view v, double errval = std::numeric_limits<double>::quiet_NaN());
|
|
int64_t to_int64(string_view v, int64_t errval = std::numeric_limits<int64_t>::max());
|
|
uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits<uint64_t>::max());
|
|
|
|
// Trim whitspace from a string_view.
|
|
string_view ltrim(string_view v);
|
|
string_view rtrim(string_view v);
|
|
string_view trim(string_view v);
|
|
|
|
// Trim specific character (all occurrences) from a string_view.
|
|
string_view ltrim(string_view v, char c);
|
|
string_view rtrim(string_view v, char c);
|
|
string_view trim(string_view v, char c);
|
|
|
|
// Return true if the string has the specified prefix or suffix.
|
|
bool has_prefix(string_view s, string_view prefix);
|
|
bool has_suffix(string_view s, string_view suffix);
|
|
|
|
// Return the length of the common prefix of A and B.
|
|
int common_prefix_length(string_view a, string_view b);
|
|
|
|
// Return true if the string is a lua identifier.
|
|
bool is_lua_id(string_view s);
|
|
|
|
// Return true if the line of code is a lua comment.
|
|
bool is_lua_comment(string_view s);
|
|
|
|
// Return the first character, but if the view is empty,
|
|
// return zero.
|
|
inline char zfront(string_view &s) {
|
|
return s.empty() ? char(0) : s.front();
|
|
}
|
|
|
|
// Read from a string_view until separator is reached.
|
|
//
|
|
// If the separator appears in the source, returns everything
|
|
// before the separator, and updates the source to everything
|
|
// after the separator.
|
|
//
|
|
// If the separator doesn't appear in the source, returns
|
|
// the entire source, and replaces source with the null string_view.
|
|
//
|
|
string_view read_to_sep(string_view &source, char sep);
|
|
|
|
// Read from a string_view until newline is reached.
|
|
//
|
|
// If there's a line-break in the source (newline or CRLF),
|
|
// returns the text before the line-break, and updates the
|
|
// source to the text after the line-break.
|
|
//
|
|
// If there's no line-break in the source, returns the entire source,
|
|
// and updates source to the null string_view.
|
|
//
|
|
string_view read_to_line(string_view &source);
|
|
|
|
// Read a prefix string from a string_view.
|
|
//
|
|
// Returns false if the string view doesn't start with
|
|
// the specified prefix.
|
|
//
|
|
bool read_prefix(string_view &source, string_view prefix);
|
|
|
|
// Read from a string_view until whitespace is reached.
|
|
//
|
|
// If there's any whitespace in the source, returns the text
|
|
// before the whitespace, and update the source to the text
|
|
// after the whitespace.
|
|
//
|
|
// If there's no whitespace in the source, returns the entire
|
|
// source, and updates the source to the null string_view.
|
|
//
|
|
string_view read_to_space(string_view &source);
|
|
|
|
// Read up to nbytes from a string_view.
|
|
//
|
|
string_view read_nbytes(string_view &source, int nbytes);
|
|
|
|
// Read an ascii identifier from a string_view
|
|
//
|
|
// If there's no valid identifier, returns empty string.
|
|
//
|
|
string_view read_ascii_identifier(string_view &source);
|
|
|
|
// Read a number from a string view
|
|
//
|
|
// This is basically a regex pattern matching routine
|
|
// hardwired with the regex for numbers. You must
|
|
// specify which of the following parts of the regex
|
|
// are allowed or not:
|
|
//
|
|
// * plus sign
|
|
// * minus sign
|
|
// * decimal point
|
|
// * scientific notation exponents
|
|
//
|
|
// Returns the number as a string_view. There is
|
|
// no guarantee that the number is small enough to
|
|
// fit into any particular number of bits. This
|
|
// always uses base 10.
|
|
//
|
|
std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp);
|
|
|
|
// Read an ascii character from a string.
|
|
//
|
|
// Returns -1 if the string is empty.
|
|
//
|
|
int32_t read_ascii_char(string_view &source);
|
|
|
|
// Read a UTF8 codepoint from a string_view.
|
|
//
|
|
// If the next thing in the string_view isn't a valid
|
|
// codepoint, returns -1 and doesn't update the view.
|
|
//
|
|
int32_t read_codepoint_utf8(string_view &source);
|
|
|
|
// Return true if the string is valid utf-8.
|
|
bool valid_utf8(string_view s);
|
|
|
|
// Return true if the number conforms to the spec.
|
|
// See read_number for more information.
|
|
//
|
|
bool valid_number(string_view v, bool plus, bool minus, bool dec, bool exp);
|
|
|
|
} // namespace sv
|
|
|
|
namespace util {
|
|
|
|
enum WorldType {
|
|
WORLD_TYPE_STANDALONE,
|
|
WORLD_TYPE_C_SYNC,
|
|
WORLD_TYPE_S_SYNC,
|
|
WORLD_TYPE_MASTER,
|
|
};
|
|
|
|
enum MessageType {
|
|
MSG_NULL,
|
|
MSG_DIFF,
|
|
MSG_ACK,
|
|
MSG_INVOKE,
|
|
};
|
|
|
|
using StringVec = eng::vector<eng::string>;
|
|
using StringPair = std::pair<eng::string, eng::string>;
|
|
using StringSet = eng::set<eng::string>;
|
|
using LuaSourceVec = eng::vector<StringPair>;
|
|
using LuaSourcePtr = std::unique_ptr<LuaSourceVec>;
|
|
using HashValue = std::pair<uint64_t, uint64_t>;
|
|
using IdVector = eng::vector<int64_t>;
|
|
|
|
// Ascii uppercase and lowercase.
|
|
eng::string ascii_tolower(std::string_view c);
|
|
eng::string ascii_toupper(std::string_view c);
|
|
|
|
// Return seconds elapsed, for profiling purposes.
|
|
double profiling_clock();
|
|
|
|
// Output a string to a stream using Lua string escaping and quoting.
|
|
void quote_string(const eng::string &str, std::ostream *os);
|
|
|
|
// base64 encode.
|
|
void base64_encode(std::string_view v, std::ostream *oss);
|
|
|
|
// base64 decode.
|
|
//
|
|
// Returns true if the base64 was 'clean' base64, as
|
|
// opposed to base64 with extraneous characters.
|
|
//
|
|
bool base64_decode(std::string_view v, std::ostream *oss);
|
|
|
|
// ID vector quick create.
|
|
IdVector id_vector_create(int64_t id1=-1, int64_t id2=-1, int64_t id3=-1, int64_t id4=-1);
|
|
|
|
// ID vector debug string.
|
|
eng::string id_vector_debug_string(const IdVector &idv);
|
|
|
|
// Unions and sorts two ID vectors.
|
|
IdVector sort_union_id_vectors(const IdVector &v1, const IdVector &v2);
|
|
|
|
// Get a 128-bit hashvalue for a string.
|
|
HashValue hash_string(const eng::string &str);
|
|
|
|
// Get a 128-bit hashvalue for an ID vector.
|
|
HashValue hash_id_vector(const IdVector &idv);
|
|
|
|
// Convert a 128-bit hash to a hexadecimal string.
|
|
eng::string hash_to_hex(const HashValue &hash);
|
|
|
|
// Hash four integers together to 64 bits.
|
|
// This is a good hash, but not cryptographically good.
|
|
uint64_t hash_ints(uint64_t n1, uint64_t n2, uint64_t n3, uint64_t n4);
|
|
|
|
// Convert a 64-bit hash value into a floating point number between 0 and 1.
|
|
double hash_to_double(uint64_t hash);
|
|
|
|
// Split a string into multiple strings
|
|
StringVec split(const eng::string &s, char sep);
|
|
|
|
// Split a string into multiple strings using \r or \n
|
|
StringVec split_lines(const eng::string &s);
|
|
|
|
// Split a string into multiple lines using |, remove any leading blank line.
|
|
StringVec split_docstring(const eng::string &s);
|
|
|
|
// Join multiple strings into one string
|
|
eng::string join(const StringVec &strs, eng::string sep);
|
|
|
|
// Return N repetitions of string A
|
|
eng::string repeat_string(const eng::string &a, int n);
|
|
|
|
// String to lowercase/uppercase. Ascii only, no unicode.
|
|
eng::string tolower(eng::string input);
|
|
eng::string toupper(eng::string input);
|
|
|
|
// Convert a codepoint number into a utf8 string.
|
|
// If the codepoint is invalid, returns empty string.
|
|
eng::string get_codepoint_utf8(int32_t cp);
|
|
|
|
// Write a codepoint in utf8 to a stream.
|
|
// If the codepoint is invalid, writes nothing and returns false.
|
|
bool write_codepoint_utf8(int32_t cp, std::ostream *out);
|
|
|
|
// Calculate distance between two points
|
|
double distance_squared(double x1, double y1, double x2, double y2);
|
|
|
|
// Make a LuaSourceVec with one element, for unit testing.
|
|
LuaSourcePtr make_lua_source(const eng::string &code);
|
|
|
|
// Remove items from a vector that are nullptr.
|
|
template<class T>
|
|
void remove_nullptrs(T &vec) {
|
|
auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return x != nullptr; });
|
|
vec.erase(iter, vec.end());
|
|
}
|
|
|
|
// Remove items from a vector that are marked for deletion.
|
|
template<class T>
|
|
void remove_marked_items(T &vec) {
|
|
auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return !x.marked_for_deletion(); });
|
|
vec.erase(iter, vec.end());
|
|
}
|
|
|
|
// An XYZ coordinate, general purpose.
|
|
struct XYZ {
|
|
float x, y, z;
|
|
XYZ() { x=0; y=0; z=0; }
|
|
XYZ(float ix, float iy, float iz) { x=ix; y=iy; z=iz; }
|
|
bool operator ==(const XYZ &o) const { return x==o.x && y == o.y && z==o.z; }
|
|
bool operator !=(const XYZ &o) const { return x!=o.x || y != o.y || z!=o.z; }
|
|
eng::string debug_string() const;
|
|
};
|
|
|
|
class NullStreamBuffer : public std::streambuf
|
|
{
|
|
public:
|
|
int overflow(int c) { return c; }
|
|
};
|
|
|
|
// send_to_stream: send all arguments to the specified stream.
|
|
inline void send_to_stream(std::ostream &os) {}
|
|
template <class ARG, class... REST>
|
|
inline void send_to_stream(std::ostream &os, const ARG &arg, const REST & ... rest) {
|
|
os << arg;
|
|
send_to_stream(os, rest...);
|
|
}
|
|
|
|
// ss: convert all arguments to a string by sending them to a stringstream.
|
|
template <class... ARGS>
|
|
inline eng::string ss(const ARGS & ... args) {
|
|
eng::ostringstream oss;
|
|
send_to_stream(oss, args...);
|
|
return oss.str();
|
|
}
|
|
|
|
// A better API than std::setfill, std::hex, std::setw, std::setprecision
|
|
//
|
|
// Usage examples:
|
|
// std::cout << util::hex.width(5).fill('0').val(123)
|
|
// std::cout << util::dec.fill('$').precision(val(123)
|
|
//
|
|
// The reason that other API is bad is that it can leave std::cout
|
|
// in an unpredictable state. This API always leaves the stream clean.
|
|
//
|
|
template <class VALUE>
|
|
class FormattedNumber {
|
|
public:
|
|
VALUE value_;
|
|
bool hex_;
|
|
int width_;
|
|
char fill_;
|
|
int precision_;
|
|
|
|
constexpr FormattedNumber(VALUE v, bool h, int w, char f, int p)
|
|
: value_(v), hex_(h), width_(w), fill_(f), precision_(p) {}
|
|
|
|
constexpr FormattedNumber width(int w) const { return FormattedNumber(value_, hex_, w, fill_, precision_); }
|
|
constexpr FormattedNumber fill(char f) const { return FormattedNumber(value_, hex_, width_, f, precision_); }
|
|
constexpr FormattedNumber precision(int p) const { return FormattedNumber(value_, hex_, width_, fill_, p); }
|
|
|
|
template <class NVALUE>
|
|
constexpr FormattedNumber val(NVALUE v) const { return FormattedNumber(v, hex_, width_, fill_, precision_); }
|
|
};
|
|
|
|
constexpr auto hex = FormattedNumber<int>(0, true, 0, '0', 6);
|
|
constexpr auto hex8 = FormattedNumber<int>(0, true, 2, '0', 6);
|
|
constexpr auto hex16 = FormattedNumber<int>(0, true, 4, '0', 6);
|
|
constexpr auto hex32 = FormattedNumber<int>(0, true, 8, '0', 6);
|
|
constexpr auto hex64 = FormattedNumber<int>(0, true, 16, '0', 6);
|
|
constexpr auto dec = FormattedNumber<int>(0, false, 0, ' ', 6);
|
|
|
|
} // namespace util
|
|
|
|
template<class VALUE>
|
|
inline std::ostream &operator<<(std::ostream &oss, util::FormattedNumber<VALUE> n) {
|
|
if (n.hex_) oss << std::hex;
|
|
else oss << std::dec;
|
|
oss << std::setprecision(n.precision_) << std::setfill(n.fill_) << std::setw(n.width_) << n.value_;
|
|
oss << std::dec << std::setfill(' ') << std::setprecision(6);
|
|
return oss;
|
|
}
|
|
|
|
|
|
#endif // UTIL_HPP
|