integration/luprex/core/cpp/util.hpp

///////////////////////////////////////////////////////////////////////
//
// NAMESPACE SV
//
//  * Operate on string_view or just characters.
//  * Do not allocate memory.
//  * Do not copy strings.
//
// NAMESPACE UTIL
//
//  * General purpose utility functions.
//  * Sort of a catch-all.
//
///////////////////////////////////////////////////////////////////////

#ifndef UTIL_HPP
#define UTIL_HPP

#include "wrap-string.hpp"
#include "wrap-set.hpp"
#include "wrap-map.hpp"
#include "wrap-vector.hpp"
#include "wrap-sstream.hpp"
#include <ostream>
#include <memory>
#include <utility>
#include <algorithm>
#include <string_view>
#include <cstdint>
#include <limits>
#include <iomanip>

#include "luastack.hpp"
#include "spookyv2.hpp"

namespace sv {

// Bring this into our namespace.
using string_view = std::string_view;

// Test character class, ignoring current locale and unicode issues.
inline bool ascii_isupper(char c) { return (c >= 'A') && (c <= 'Z'); }
inline bool ascii_islower(char c) { return (c >= 'a') && (c <= 'z'); }
inline bool ascii_isdigit(char c) { return (c >= '0') && (c <= '9'); }
inline bool ascii_isalpha(char c) { return ascii_isupper(c) || ascii_islower(c); }
inline bool ascii_isalnum(char c) { return ascii_isalpha(c) || ascii_isdigit(c); }
inline bool ascii_isspace(char c) { return (c==' ')||(c=='\t')||(c=='\r')||(c=='\n')||(c=='\f')||(c=='\v'); }

// Check for the null string_view
//
// Note that the null string view is an empty string,
// but not every empty string is the null string view.
//
inline bool isnull(string_view v) { return v.data() == nullptr; }

// Return true if the two strings are equal, ignoring case.
//
bool case_insensitive_eq(std::string_view s1, std::string_view s2);

// Check if numbers can be parsed as int64/double
bool valid_double(string_view v);
bool valid_int64(string_view v);
bool valid_hex64(string_view v);

// Convert strings to numbers.  Returns errval on failure.
//
// The integer parser accepts a sequence of digits,
// with or without a + or - sign.   The hex parser
// does not allow a + or - sign.  For both the int64
// and hex64 parser, it is a failure if the number
// does not fit in 64 bits.  The double parser does
// not accept the strings 'nan' or 'inf'.
//
double to_double(string_view v, double errval = std::numeric_limits<double>::quiet_NaN());
int64_t to_int64(string_view v, int64_t errval = std::numeric_limits<int64_t>::max());
uint64_t to_hex64(string_view v, uint64_t errval = std::numeric_limits<uint64_t>::max());

// Trim whitspace from a string_view.
string_view ltrim(string_view v);
string_view rtrim(string_view v);
string_view trim(string_view v);

// Trim specific character (all occurrences) from a string_view.
string_view ltrim(string_view v, char c);
string_view rtrim(string_view v, char c);
string_view trim(string_view v, char c);

// Return true if the string has the specified prefix or suffix.
bool has_prefix(string_view s, string_view prefix);
bool has_suffix(string_view s, string_view suffix);

// Return the length of the common prefix of A and B.
int common_prefix_length(string_view a, string_view b);

// Return true if the string is a lua identifier.
bool is_lua_id(string_view s);

// Return true if the line of code is a lua comment.
bool is_lua_comment(string_view s);

// Return the first character, but if the view is empty,
// return zero.
inline char zfront(string_view &s) {
    return s.empty() ? char(0) : s.front();
}

// Read from a string_view until separator is reached.
//
// If the separator appears in the source, returns everything
// before the separator, and updates the source to everything
// after the separator.
//
// If the separator doesn't appear in the source, returns
// the entire source, and replaces source with the null string_view.
//
string_view read_to_sep(string_view &source, char sep);

// Read from a string_view until newline is reached.
//
// If there's a line-break in the source (newline or CRLF),
// returns the text before the line-break, and updates the
// source to the text after the line-break.
//
// If there's no line-break in the source, returns the entire source,
// and updates source to the null string_view.
//
string_view read_to_line(string_view &source);

// Read a prefix string from a string_view.
//
// Returns false if the string view doesn't start with
// the specified prefix.
//
bool read_prefix(string_view &source, string_view prefix);

// Read from a string_view until whitespace is reached.
//
// If there's any whitespace in the source, returns the text
// before the whitespace, and update the source to the text
// after the whitespace.
//
// If there's no whitespace in the source, returns the entire
// source, and updates the source to the null string_view.
//
string_view read_to_space(string_view &source);

// Read up to nbytes from a string_view.
//
string_view read_nbytes(string_view &source, int nbytes);

// Read an ascii identifier from a string_view
//
// If there's no valid identifier, returns empty string.
//
string_view read_ascii_identifier(string_view &source);

// Read a number from a string view
//
// This is basically a regex pattern matching routine
// hardwired with the regex for numbers.  You must
// specify which of the following parts of the regex
// are allowed or not:
//
//  * plus sign
//  * minus sign
//  * decimal point
//  * scientific notation exponents
//
// Returns the number as a string_view.  There is
// no guarantee that the number is small enough to
// fit into any particular number of bits.  This
// always uses base 10.
//
std::string_view read_number(string_view &source, bool plus, bool minus, bool dec, bool exp);

// Read an ascii character from a string.
//
// Returns -1 if the string is empty.
//
int32_t read_ascii_char(string_view &source);

// Read a UTF8 codepoint from a string_view.
//
// If the next thing in the string_view isn't a valid
// codepoint, returns -1 and doesn't update the view.
//
int32_t read_codepoint_utf8(string_view &source);

// Return true if the string is valid utf-8.
bool valid_utf8(string_view s);

// Return true if the number conforms to the spec.
// See read_number for more information.
//
bool valid_number(string_view v, bool plus, bool minus, bool dec, bool exp);

} // namespace sv

namespace util {

enum WorldType {
    WORLD_TYPE_STANDALONE,
    WORLD_TYPE_C_SYNC,
    WORLD_TYPE_S_SYNC,
    WORLD_TYPE_MASTER,
};

enum MessageType {
    MSG_NULL,
    MSG_DIFF,
    MSG_ACK,
    MSG_INVOKE,
};

using StringVec = eng::vector<eng::string>;
using StringPair = std::pair<eng::string, eng::string>;
using StringSet = eng::set<eng::string>;
using LuaSourceVec = eng::vector<StringPair>;
using LuaSourcePtr = std::unique_ptr<LuaSourceVec>;
using HashValue = std::pair<uint64_t, uint64_t>;
using IdVector = eng::vector<int64_t>;

// Ascii uppercase and lowercase.
eng::string ascii_tolower(std::string_view c);
eng::string ascii_toupper(std::string_view c);

// Return seconds elapsed, for profiling purposes.
double profiling_clock();

// Output a string to a stream using Lua string escaping and quoting.
void quote_string(const eng::string &str, std::ostream *os);

// base64 encode.
void base64_encode(std::string_view v, std::ostream *oss);

// base64 decode.
//
// Returns true if the base64 was 'clean' base64, as
// opposed to base64 with extraneous characters.
//
bool base64_decode(std::string_view v, std::ostream *oss);

// ID vector quick create.
IdVector id_vector_create(int64_t id1=-1, int64_t id2=-1, int64_t id3=-1, int64_t id4=-1);

// ID vector debug string.
eng::string id_vector_debug_string(const IdVector &idv);

// Unions and sorts two ID vectors.
IdVector sort_union_id_vectors(const IdVector &v1, const IdVector &v2);

// Get a 128-bit hashvalue for a string.
HashValue hash_string(const eng::string &str);

// Get a 128-bit hashvalue for an ID vector.
HashValue hash_id_vector(const IdVector &idv);

// Convert a 128-bit hash to a hexadecimal string.
eng::string hash_to_hex(const HashValue &hash);

// Hash four integers together to 64 bits.
// This is a good hash, but not cryptographically good.
uint64_t hash_ints(uint64_t n1, uint64_t n2, uint64_t n3, uint64_t n4);

// Convert a 64-bit hash value into a floating point number between 0 and 1.
double hash_to_double(uint64_t hash);

// Split a string into multiple strings
StringVec split(const eng::string &s, char sep);

// Split a string into multiple strings using \r or \n
StringVec split_lines(const eng::string &s);

// Split a string into multiple lines using |, remove any leading blank line.
StringVec split_docstring(const eng::string &s);

// Join multiple strings into one string
eng::string join(const StringVec &strs, eng::string sep);

// Return N repetitions of string A
eng::string repeat_string(const eng::string &a, int n);

// String to lowercase/uppercase.  Ascii only, no unicode.
eng::string tolower(eng::string input);
eng::string toupper(eng::string input);

// Convert a codepoint number into a utf8 string.
// If the codepoint is invalid, returns empty string.
eng::string get_codepoint_utf8(int32_t cp);

// Write a codepoint in utf8 to a stream.
// If the codepoint is invalid, writes nothing and returns false.
bool write_codepoint_utf8(int32_t cp, std::ostream *out);

// Calculate distance between two points
double distance_squared(double x1, double y1, double x2, double y2);

// Make a LuaSourceVec with one element, for unit testing.
LuaSourcePtr make_lua_source(const eng::string &code);

// Remove items from a vector that are nullptr.
template<class T>
void remove_nullptrs(T &vec) {
    auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return x != nullptr; });
    vec.erase(iter, vec.end());
}

// Remove items from a vector that are marked for deletion.
template<class T>
void remove_marked_items(T &vec) {
    auto iter = std::partition(vec.begin(), vec.end(), [] (const auto &x) { return !x.marked_for_deletion(); });
    vec.erase(iter, vec.end());
}

// An XYZ coordinate, general purpose.
struct XYZ {
    float x, y, z;
    XYZ() { x=0; y=0; z=0; }
    XYZ(float ix, float iy, float iz) { x=ix; y=iy; z=iz; }
    bool operator ==(const XYZ &o) const { return x==o.x && y == o.y && z==o.z; }
    bool operator !=(const XYZ &o) const { return x!=o.x || y != o.y || z!=o.z; }
    eng::string debug_string() const;
};

class NullStreamBuffer : public std::streambuf
{
public:
  int overflow(int c) { return c; }
};

// send_to_stream: send all arguments to the specified stream.
inline void send_to_stream(std::ostream &os) {}
template <class ARG, class... REST>
inline void send_to_stream(std::ostream &os, const ARG &arg, const REST & ... rest) {
    os << arg;
    send_to_stream(os, rest...);
}

// ss: convert all arguments to a string by sending them to a stringstream.
template <class... ARGS>
inline eng::string ss(const ARGS & ... args) {
    eng::ostringstream oss;
    send_to_stream(oss, args...);
    return oss.str();
}

// A better API than std::setfill, std::hex, std::setw, std::setprecision
//
// Usage examples:
//   std::cout << util::hex.width(5).fill('0').val(123)
//   std::cout << util::dec.fill('$').precision(val(123)
//
// The reason that other API is bad is that it can leave std::cout
// in an unpredictable state.  This API always leaves the stream clean.
//
template <class VALUE>
class FormattedNumber {
public:
    VALUE value_;
    bool hex_;
    int width_;
    char fill_;
    int precision_;

    constexpr FormattedNumber(VALUE v, bool h, int w, char f, int p)
        : value_(v), hex_(h), width_(w), fill_(f), precision_(p) {}

    constexpr FormattedNumber width(int w) const { return FormattedNumber(value_, hex_, w, fill_, precision_); }
    constexpr FormattedNumber fill(char f) const { return FormattedNumber(value_, hex_, width_, f, precision_); }
    constexpr FormattedNumber precision(int p) const { return FormattedNumber(value_, hex_, width_, fill_, p); }

    template <class NVALUE>
    constexpr FormattedNumber val(NVALUE v) const { return FormattedNumber(v, hex_, width_, fill_, precision_); }
};

constexpr auto hex = FormattedNumber<int>(0, true, 0, '0', 6);
constexpr auto hex8 = FormattedNumber<int>(0, true, 2, '0', 6);
constexpr auto hex16 = FormattedNumber<int>(0, true, 4, '0', 6);
constexpr auto hex32 = FormattedNumber<int>(0, true, 8, '0', 6);
constexpr auto hex64 = FormattedNumber<int>(0, true, 16, '0', 6);
constexpr auto dec = FormattedNumber<int>(0, false, 0, ' ', 6);

} // namespace util

template<class VALUE>
inline std::ostream &operator<<(std::ostream &oss, util::FormattedNumber<VALUE> n) {
    if (n.hex_) oss << std::hex;
    else oss << std::dec;
    oss << std::setprecision(n.precision_) << std::setfill(n.fill_) << std::setw(n.width_) << n.value_;
    oss << std::dec << std::setfill(' ') << std::setprecision(6);
    return oss;
}


#endif // UTIL_HPP