Files
integration/luprex/core/cpp/util.hpp

396 lines
13 KiB
C++

///////////////////////////////////////////////////////////////////////
//
// NAMESPACE SV
//
// * Operate on string_view or just characters.
// * Do not allocate memory.
// * Do not copy strings.
//
// NAMESPACE UTIL
//
// * General purpose utility functions.
// * Sort of a catch-all.
//
///////////////////////////////////////////////////////////////////////
#ifndef UTIL_HPP
#define UTIL_HPP
#include "wrap-string.hpp"
#include "wrap-set.hpp"
#include "wrap-map.hpp"
#include "wrap-vector.hpp"
#include "wrap-sstream.hpp"
#include <ostream>
#include <memory>
#include <utility>
#include <algorithm>
#include <string_view>
#include <cstdint>
#include <limits>
#include <iomanip>
#include "luastack.hpp"
#include "spookyv2.hpp"
namespace sv {
// Bring this into our namespace.
using string_view = std::string_view;
// Test character class, ignoring current locale and unicode issues.
inline bool ascii_isupper(char c) { return (c >= 'A') && (c <= 'Z'); }
inline bool ascii_islower(char c) { return (c >= 'a') && (c <= 'z'); }
inline bool ascii_isdigit(char c) { return (c >= '0') && (c <= '9'); }
inline bool ascii_isalpha(char c) { return ascii_isupper(c) || ascii_islower(c); }
inline bool ascii_isalnum(char c) { return ascii_isalpha(c) || ascii_isdigit(c); }
inline bool ascii_isspace(char c) { return (c==' ')||(c=='\t')||(c=='\r')||(c=='\n')||(c=='\f')||(c=='\v'); }
// Check for the null string_view
//
// Note that the null string view is an empty string,
// but not every empty string is the null string view.
//
inline bool isnull(string_view v) { return v.data() == nullptr; }
// Return true if the two strings are equal, ignoring case.
//
bool case_insensitive_eq(std::string_view s1, std::string_view s2);
// Check if numbers can be parsed as int64/double
bool valid_double(string_view v);
bool valid_int64(string_view v);
bool valid_hex64(string_view v);
// Convert strings to numbers. Returns errval on failure.
//
// The integer parser accepts a sequence of digits,
// with or without a + or - sign. The hex parser
// does not allow a + or - sign. For both the int64
// and hex64 parser, it is a failure if the number
// does not fit in 64 bits. The double parser does
// not accept the strings 'nan' or 'inf'.
//
double to_double(string_view v, double errval = std::numeric_limits<double>::quiet_NaN());
int64_t to_int64(string_view v, int64_t errval = std::numeric_limits<int64_t>::max());
uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits<uint64_t>::max());
// Trim whitspace from a string_view.
string_view ltrim(string_view v);
string_view rtrim(string_view v);
string_view trim(string_view v);
// Trim specific character (all occurrences) from a string_view.
string_view ltrim(string_view v, char c);
string_view rtrim(string_view v, char c);
string_view trim(string_view v, char c);
// Return true if the string has the specified prefix or suffix.
bool has_prefix(string_view s, string_view prefix);
bool has_suffix(string_view s, string_view suffix);
// Return the length of the common prefix of A and B.
int common_prefix_length(string_view a, string_view b);
// Return true if the string is a lua identifier.
bool is_lua_id(string_view s);
// Return true if the line of code is a lua comment.
bool is_lua_comment(string_view s);
// Return the first character, but if the view is empty,
// return zero.
inline char zfront(string_view &s) {
return s.empty() ? char(0) : s.front();
}
// Read from a string_view until separator is reached.
//
// If the separator appears in the source, returns everything
// before the separator, and updates the source to everything
// after the separator.
//
// If the separator doesn't appear in the source, returns
// the entire source, and replaces source with the null string_view.
//
string_view read_to_sep(string_view &source, char sep);
// Read from a string_view until newline is reached.
//
// If there's a line-break in the source (newline or CRLF),
// returns the text before the line-break, and updates the
// source to the text after the line-break.
//
// If there's no line-break in the source, returns the entire source,
// and updates source to the null string_view.
//
string_view read_to_line(string_view &source);
// Read a prefix string from a string_view.
//
// Returns false if the string view doesn't start with
// the specified prefix.
//
bool read_prefix(string_view &source, string_view prefix);
// Read from a string_view until whitespace is reached.
//
// If there's any whitespace in the source, returns the text
// before the whitespace, and update the source to the text
// after the whitespace.
//
// If there's no whitespace in the source, returns the entire
// source, and updates the source to the null string_view.
//
string_view read_to_space(string_view &source);
// Read up to nbytes from a string_view.
//
string_view read_nbytes(string_view &source, int nbytes);
// Read an ascii identifier from a string_view
//
// If there's no valid identifier, returns empty string.
//
string_view read_ascii_identifier(string_view &source);
// Read a number from a string view
//
// This is basically a regex pattern matching routine
// hardwired with the regex for numbers. You must
// specify which of the following parts of the regex
// are allowed or not:
//
// * plus sign
// * minus sign
// * decimal point
// * scientific notation exponents
//
// Returns the number as a string_view. There is
// no guarantee that the number is small enough to
// fit into any particular number of bits. This
// always uses base 10.
//
std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp);
// Read an ascii character from a string.
//
// Returns -1 if the string is empty.
//
int32_t read_ascii_char(string_view &source);
// Read a UTF8 codepoint from a string_view.
//
// If the next thing in the string_view isn't a valid
// codepoint, returns -1 and doesn't update the view.
//
int32_t read_codepoint_utf8(string_view &source);
// Return true if the string is valid utf-8.
bool valid_utf8(string_view s);
// Return true if the number conforms to the spec.
// See read_number for more information.
//
bool valid_number(string_view v, bool plus, bool minus, bool dec, bool exp);
} // namespace sv
namespace util {
enum WorldType {
WORLD_TYPE_STANDALONE,
WORLD_TYPE_C_SYNC,
WORLD_TYPE_S_SYNC,
WORLD_TYPE_MASTER,
};
enum MessageType {
MSG_NULL,
MSG_DIFF,
MSG_ACK,
MSG_INVOKE,
};
using StringVec = eng::vector<eng::string>;
using StringPair = std::pair<eng::string, eng::string>;
using StringSet = eng::set<eng::string>;
using LuaSourceVec = eng::vector<StringPair>;
using LuaSourcePtr = std::unique_ptr<LuaSourceVec>;
using HashValue = std::pair<uint64_t, uint64_t>;
using IdVector = eng::vector<int64_t>;
// Ascii uppercase and lowercase.
eng::string ascii_tolower(std::string_view c);
eng::string ascii_toupper(std::string_view c);
// Return seconds elapsed, for profiling purposes.
double profiling_clock();
// Output a string to a stream using Lua string escaping and quoting.
void quote_string(const eng::string &str, std::ostream *os);
// base64 encode.
void base64_encode(std::string_view v, std::ostream *oss);
// base64 decode.
//
// Returns true if the base64 was 'clean' base64, as
// opposed to base64 with extraneous characters.
//
bool base64_decode(std::string_view v, std::ostream *oss);
// ID vector quick create.
IdVector id_vector_create(int64_t id1=-1, int64_t id2=-1, int64_t id3=-1, int64_t id4=-1);
// ID vector debug string.
eng::string id_vector_debug_string(const IdVector &idv);
// Unions and sorts two ID vectors.
IdVector sort_union_id_vectors(const IdVector &v1, const IdVector &v2);
// Get a 128-bit hashvalue for a string.
HashValue hash_string(const eng::string &str);
// Get a 128-bit hashvalue for an ID vector.
HashValue hash_id_vector(const IdVector &idv);
// Convert a 128-bit hash to a hexadecimal string.
eng::string hash_to_hex(const HashValue &hash);
// Hash four integers together to 64 bits.
// This is a good hash, but not cryptographically good.
uint64_t hash_ints(uint64_t n1, uint64_t n2, uint64_t n3, uint64_t n4);
// Convert a 64-bit hash value into a floating point number between 0 and 1.
double hash_to_double(uint64_t hash);
// Split a string into multiple strings
StringVec split(const eng::string &s, char sep);
// Split a string into multiple strings using \r or \n
StringVec split_lines(const eng::string &s);
// Split a string into multiple lines using |, remove any leading blank line.
StringVec split_docstring(const eng::string &s);
// Join multiple strings into one string
eng::string join(const StringVec &strs, eng::string sep);
// Return N repetitions of string A
eng::string repeat_string(const eng::string &a, int n);
// String to lowercase/uppercase. Ascii only, no unicode.
eng::string tolower(eng::string input);
eng::string toupper(eng::string input);
// Convert a codepoint number into a utf8 string.
// If the codepoint is invalid, returns empty string.
eng::string get_codepoint_utf8(int32_t cp);
// Write a codepoint in utf8 to a stream.
// If the codepoint is invalid, writes nothing and returns false.
bool write_codepoint_utf8(int32_t cp, std::ostream *out);
// Calculate distance between two points
double distance_squared(double x1, double y1, double x2, double y2);
// Make a LuaSourceVec with one element, for unit testing.
LuaSourcePtr make_lua_source(const eng::string &code);
// Remove items from a vector that are nullptr.
template<class T>
void remove_nullptrs(T &vec) {
auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return x != nullptr; });
vec.erase(iter, vec.end());
}
// Remove items from a vector that are marked for deletion.
template<class T>
void remove_marked_items(T &vec) {
auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return !x.marked_for_deletion(); });
vec.erase(iter, vec.end());
}
// An XYZ coordinate, general purpose.
struct XYZ {
float x, y, z;
XYZ() { x=0; y=0; z=0; }
XYZ(float ix, float iy, float iz) { x=ix; y=iy; z=iz; }
bool operator ==(const XYZ &o) const { return x==o.x && y == o.y && z==o.z; }
bool operator !=(const XYZ &o) const { return x!=o.x || y != o.y || z!=o.z; }
eng::string debug_string() const;
};
class NullStreamBuffer : public std::streambuf
{
public:
int overflow(int c) { return c; }
};
// send_to_stream: send all arguments to the specified stream.
inline void send_to_stream(std::ostream &os) {}
template <class ARG, class... REST>
inline void send_to_stream(std::ostream &os, const ARG &arg, const REST & ... rest) {
os << arg;
send_to_stream(os, rest...);
}
// ss: convert all arguments to a string by sending them to a stringstream.
template <class... ARGS>
inline eng::string ss(const ARGS & ... args) {
eng::ostringstream oss;
send_to_stream(oss, args...);
return oss.str();
}
// A better API than std::setfill, std::hex, std::setw, std::setprecision
//
// Usage examples:
// std::cout << util::hex.width(5).fill('0').val(123)
// std::cout << util::dec.fill('$').precision(val(123)
//
// The reason that other API is bad is that it can leave std::cout
// in an unpredictable state. This API always leaves the stream clean.
//
template <class VALUE>
class FormattedNumber {
public:
VALUE value_;
bool hex_;
int width_;
char fill_;
int precision_;
constexpr FormattedNumber(VALUE v, bool h, int w, char f, int p)
: value_(v), hex_(h), width_(w), fill_(f), precision_(p) {}
constexpr FormattedNumber width(int w) const { return FormattedNumber(value_, hex_, w, fill_, precision_); }
constexpr FormattedNumber fill(char f) const { return FormattedNumber(value_, hex_, width_, f, precision_); }
constexpr FormattedNumber precision(int p) const { return FormattedNumber(value_, hex_, width_, fill_, p); }
template <class NVALUE>
constexpr FormattedNumber val(NVALUE v) const { return FormattedNumber(v, hex_, width_, fill_, precision_); }
};
constexpr auto hex = FormattedNumber<int>(0, true, 0, '0', 6);
constexpr auto hex8 = FormattedNumber<int>(0, true, 2, '0', 6);
constexpr auto hex16 = FormattedNumber<int>(0, true, 4, '0', 6);
constexpr auto hex32 = FormattedNumber<int>(0, true, 8, '0', 6);
constexpr auto hex64 = FormattedNumber<int>(0, true, 16, '0', 6);
constexpr auto dec = FormattedNumber<int>(0, false, 0, ' ', 6);
} // namespace util
template<class VALUE>
inline std::ostream &operator<<(std::ostream &oss, util::FormattedNumber<VALUE> n) {
if (n.hex_) oss << std::hex;
else oss << std::dec;
oss << std::setprecision(n.precision_) << std::setfill(n.fill_) << std::setw(n.width_) << n.value_;
oss << std::dec << std::setfill(' ') << std::setprecision(6);
return oss;
}
#endif // UTIL_HPP