Files
integration/luprex/core/cpp/util.cpp

695 lines
19 KiB
C++
Raw Normal View History

#include "wrap-string.hpp"
#include "wrap-vector.hpp"
#include "util.hpp"
#include "fast-float.hpp"
#include <algorithm>
2020-12-05 18:57:53 -05:00
#include <sys/types.h>
#include <sys/stat.h>
2021-07-21 16:10:29 -04:00
#include <iomanip>
#include <cassert>
#include <cstdlib>
#include <cmath>
#include <charconv>
2021-07-21 16:10:29 -04:00
2021-12-17 16:21:56 -05:00
#ifdef WIN32
#endif
2020-12-05 18:57:53 -05:00
#ifndef WIN32
2021-12-17 16:21:56 -05:00
#include <time.h>
2020-12-05 18:57:53 -05:00
#include <unistd.h>
#endif
2020-11-13 15:18:09 -05:00
namespace sv {
2021-08-03 11:25:12 -04:00
2022-05-20 17:12:58 -04:00
bool case_insensitive_eq(string_view s1, string_view s2) {
if (s1.size() != s2.size()) return false;
for (int i = 0; i < int(s1.size()); i++) {
char c1 = s1[i];
char c2 = s2[i];
if (ascii_isupper(c1)) c1 += 'a'-'A';
if (ascii_isupper(c2)) c2 += 'a'-'A';
if (c1 != c2) return false;
}
return true;
}
bool valid_int64(string_view value) {
int64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 10);
if (r.ec != std::errc()) return false;
if (r.ptr != last) return false;
return true;
}
2022-04-25 13:43:11 -04:00
bool valid_hex64(string_view value) {
int64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 16);
if (r.ec != std::errc()) return false;
if (r.ptr != last) return false;
return true;
}
bool valid_double(string_view value) {
double result;
const char *last = value.data() + value.size();
auto r = fast_float::from_chars(value.data(), last, result);
if (r.ec != std::errc()) return false;
if (r.ptr != last) return false;
return true;
}
int64_t to_int64(string_view value, int64_t errval) {
int64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 10);
if (r.ec != std::errc()) return errval;
if (r.ptr != last) return errval;
return result;
}
2022-04-25 13:43:11 -04:00
uint64_t to_hex64(string_view value, uint64_t errval) {
uint64_t result;
const char *last = value.data() + value.size();
auto r = std::from_chars(value.data(), last, result, 16);
if (r.ec != std::errc()) return errval;
if (r.ptr != last) return errval;
return result;
}
double to_double(string_view value, double errval) {
double result;
const char *last = value.data() + value.size();
auto r = fast_float::from_chars(value.data(), last, result);
if (r.ec != std::errc()) return errval;
if (r.ptr != last) return errval;
return result;
}
string_view ltrim(string_view v) {
while ((!v.empty()) && (ascii_isspace(v.front()))) {
v.remove_prefix(1);
}
return v;
}
string_view rtrim(string_view v) {
while ((!v.empty()) && (ascii_isspace(v.back()))) {
v.remove_suffix(1);
}
return v;
}
string_view trim(string_view v) {
while ((!v.empty()) && (ascii_isspace(v.front()))) {
v.remove_prefix(1);
}
while ((!v.empty()) && (ascii_isspace(v.back()))) {
v.remove_suffix(1);
}
return v;
}
string_view ltrim(string_view v, char c) {
while ((!v.empty()) && (v.front() == c)) {
v.remove_prefix(1);
}
return v;
}
string_view rtrim(string_view v, char c) {
while ((!v.empty()) && (v.back() == c)) {
v.remove_suffix(1);
}
return v;
}
string_view trim(string_view v, char c) {
while ((!v.empty()) && (v.front() == c)) {
v.remove_prefix(1);
}
while ((!v.empty()) && (v.back() == c)) {
v.remove_suffix(1);
}
return v;
}
bool has_prefix(string_view s, string_view prefix) {
return 0 == s.compare(0, prefix.size(), prefix);
}
bool has_suffix(string_view s, string_view suffix) {
if (s.length() >= suffix.length()) {
return (0 == s.compare (s.length() - suffix.length(), suffix.length(), suffix));
} else {
return false;
}
}
int common_prefix_length(string_view a, string_view b) {
int minlen = std::min(a.size(), b.size());
for (int i = 0; i < minlen; i++) {
if (a[i] != b[i]) return i;
}
return minlen;
}
bool is_lua_id(string_view str) {
if (str.size() == 0) return false;
char c=str[0];
if ((!ascii_isalpha(c)) && (c!='_')) return false;
for (int i = 1; i < int(str.size()); i++) {
char c = str[i];
if ((!ascii_isalpha(c)) && (!ascii_isdigit(c)) && (c!='_')) return false;
}
return true;
}
bool is_lua_comment(string_view s) {
int start = 0;
while ((start < int(s.size())) && ((s[start]==' ') || (s[start]=='\t'))) start++;
return s.substr(start, 2) == "--";
}
string_view read_to_sep(string_view &source, char sep) {
size_t pos = source.find(sep);
string_view result;
if (pos == string_view::npos) {
result = source;
source = string_view();
} else {
result = source.substr(0, pos);
source = source.substr(pos + 1);
}
return result;
}
string_view read_to_line(string_view &source) {
size_t pos = source.find('\n');
string_view result;
if (pos == string_view::npos) {
result = source;
source = string_view();
} else {
result = source.substr(0, pos);
source = source.substr(pos + 1);
}
if ((!result.empty()) && (result.back() == '\r')) {
result.remove_suffix(1);
}
return result;
}
string_view read_to_space(string_view &source) {
size_t pos1 = 0;
while ((pos1 < source.size()) && (!ascii_isspace(source[pos1]))) {
pos1 += 1;
}
string_view result = source.substr(0, pos1);
if (pos1 == source.size()) {
source = string_view();
return result;
}
size_t pos2 = pos1 + 1;
while ((pos2 < source.size()) && (ascii_isspace(source[pos2]))) {
pos2 += 1;
}
source = source.substr(pos2);
return result;
}
2022-04-25 13:43:11 -04:00
string_view read_nbytes(string_view &source, int nbytes) {
if (nbytes < 0) nbytes = 0;
if (nbytes > int(source.size())) nbytes = source.size();
string_view result = source.substr(0, nbytes);
source = source.substr(nbytes);
return result;
}
string_view read_ascii_identifier(string_view &source) {
size_t len = 0;
if ((len < source.size()) && (sv::ascii_isalpha(source[len]))) {
len += 1;
while ((len < source.size()) && (sv::ascii_isalnum(source[len]))) {
len += 1;
}
}
string_view result = source.substr(0, len);
source.remove_prefix(len);
return result;
}
2022-04-25 13:43:11 -04:00
bool valid_utf8(string_view s)
{
const unsigned char *bytes = (const unsigned char *)s.data();
const unsigned char *tail = bytes + s.size();
unsigned int codepoint;
int seqlen;
while (bytes < tail) {
if ((bytes[0] & 0x80) == 0x00) {
// U+0000 to U+007F
codepoint = (bytes[0] & 0x7F);
seqlen = 1;
} else if ((bytes[0] & 0xE0) == 0xC0) {
// U+0080 to U+07FF
codepoint = (bytes[0] & 0x1F);
seqlen = 2;
} else if ((bytes[0] & 0xF0) == 0xE0) {
// U+0800 to U+FFFF
codepoint = (bytes[0] & 0x0F);
seqlen = 3;
} else if ((bytes[0] & 0xF8) == 0xF0) {
// U+10000 to U+10FFFF
codepoint = (bytes[0] & 0x07);
seqlen = 4;
} else {
return false;
}
if (bytes + seqlen > tail) {
return false;
}
for (int i = 1; i < seqlen; ++i) {
if ((bytes[i] & 0xC0) != 0x80) return false;
codepoint = (codepoint << 6) | (bytes[i] & 0x3F);
}
if ((codepoint > 0x10FFFF) ||
((codepoint >= 0xD800) && (codepoint <= 0xDFFF)) ||
((codepoint <= 0x007F) && (seqlen != 1)) ||
((codepoint >= 0x0080) && (codepoint <= 0x07FF) && (seqlen != 2)) ||
((codepoint >= 0x0800) && (codepoint <= 0xFFFF) && (seqlen != 3)) ||
((codepoint >= 0x10000) && (codepoint <= 0x1FFFFF) && (seqlen != 4))) {
return false;
}
bytes += seqlen;
}
return true;
}
} // namespace sv
namespace util {
eng::string ascii_tolower(std::string_view s) {
eng::string mod(s);
for (int i = 0; i < int(mod.size()); i++) {
if (sv::ascii_isupper(mod[i])) {
mod[i] += 'a' - 'A';
}
}
return mod;
}
eng::string ascii_toupper(std::string_view s) {
eng::string mod(s);
for (int i = 0; i < int(mod.size()); i++) {
if (sv::ascii_islower(mod[i])) {
mod[i] += 'A' - 'a';
}
}
return mod;
}
void quote_string(const eng::string &s, std::ostream *os) {
bool anysq = false;
bool anydq = false;
2021-09-08 01:32:08 -04:00
for (char c : s) {
if (c == '\'') anysq = true;
if (c == '"') anydq = true;
2021-09-08 01:32:08 -04:00
}
bool usesinglequote = (!anysq)||(anydq);
2021-09-08 01:32:08 -04:00
(*os) << (usesinglequote ? '\'' : '"');
for (char c : s) {
if (c >= 32) {
if (c == '"') {
(*os) << (usesinglequote ? "\"" : "\\\"");
} else if (c == '\'') {
(*os) << (usesinglequote ? "\\'" : "'");
} else {
(*os) << c;
}
} else {
2022-05-09 15:10:09 -04:00
unsigned int value = ((unsigned char)c);
2021-09-08 01:32:08 -04:00
switch (c) {
case '\n': (*os) << "\\n"; break;
case '\t': (*os) << "\\t"; break;
case '\r': (*os) << "\\r"; break;
default:
2022-05-09 15:10:09 -04:00
(*os) << "\\" << std::setfill('0') << std::setw(3) << value;
2021-09-08 01:32:08 -04:00
break;
}
}
}
(*os) << (usesinglequote ? '\'' : '"');
}
2021-08-13 17:02:35 -04:00
IdVector id_vector_create(int64_t id1, int64_t id2, int64_t id3, int64_t id4) {
IdVector result;
if (id1 >= 0) result.push_back(id1);
if (id2 >= 0) result.push_back(id2);
if (id3 >= 0) result.push_back(id3);
if (id4 >= 0) result.push_back(id4);
return result;
}
2021-08-03 11:25:12 -04:00
eng::string id_vector_debug_string(const IdVector &idv) {
eng::ostringstream oss;
2021-08-03 11:25:12 -04:00
bool first = true;
for (int64_t id : idv) {
if (!first) oss << ",";
oss << id;
first = false;
}
return oss.str();
}
2021-07-30 13:22:23 -04:00
IdVector sort_union_id_vectors(const IdVector &v1, const IdVector &v2) {
IdVector result(v1.size() + v2.size());
int next = 0;
for (int64_t id : v1) result[next++] = id;
for (int64_t id : v2) result[next++] = id;
std::sort(result.begin(), result.end());
int64_t prev = -1;
int64_t count = 0;
for (int64_t id : result) {
if (id != prev) {
prev = id;
result[count++] = id;
}
}
result.resize(count);
return result;
}
HashValue hash_string(const eng::string &s) {
uint64_t hash1 = 0;
uint64_t hash2 = 0;
SpookyHash::ChainHash128(s.c_str(), s.size(), &hash1, &hash2);
return util::HashValue(hash1, hash2);
}
2021-08-09 12:54:32 -04:00
HashValue hash_id_vector(const IdVector &idv) {
uint64_t hash1 = 0;
uint64_t hash2 = 0;
SpookyHash::ChainHash128(&idv[0], idv.size() * sizeof(int64_t), &hash1, &hash2);
return util::HashValue(hash1, hash2);
}
eng::string hash_to_hex(const HashValue &hv) {
eng::ostringstream oss;
oss << std::hex << std::setw(16) << std::setfill('0') << hv.first;
oss << std::hex << std::setw(16) << std::setfill('0') << hv.second;
return oss.str();
2021-08-09 12:54:32 -04:00
}
static inline uint64_t Rot64(uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}
uint64_t hash_ints(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
uint64_t h0 = c ^ 0xc548cebf3714dbb9;
uint64_t h1 = d ^ 0xd23a7edd44383f8d;
uint64_t h2 = a ^ 0x7356f92e4b154df7;
uint64_t h3 = b ^ 0x55ce09295766838d;
h3 ^= h2; h2 = Rot64(h2,15); h3 += h2;
h0 ^= h3; h3 = Rot64(h3,52); h0 += h3;
h1 ^= h0; h0 = Rot64(h0,26); h1 += h0;
h2 ^= h1; h1 = Rot64(h1,51); h2 += h1;
h3 ^= h2; h2 = Rot64(h2,28); h3 += h2;
h0 ^= h3; h3 = Rot64(h3,9); h0 += h3;
h1 ^= h0; h0 = Rot64(h0,47); h1 += h0;
h2 ^= h1; h1 = Rot64(h1,54); h2 += h1;
h3 ^= h2; h2 = Rot64(h2,32); h3 += h2;
h0 ^= h3; h3 = Rot64(h3,25); h0 += h3;
h1 ^= h0; h0 = Rot64(h0,63); h1 += h0;
return h1;
}
2021-08-09 12:54:32 -04:00
2022-04-06 15:09:28 -04:00
double hash_to_double(uint64_t hash) {
return (hash >> (64-53)) * 0x1p-53;
}
StringVec split(const eng::string &s, char sep) {
2021-07-30 13:22:23 -04:00
StringVec result;
int start = 0;
for (int i = 0; i < int(s.size()); i++) {
if (s[i] == sep) {
result.push_back(s.substr(start, i-start));
start = i+1;
}
}
if (start < int(s.size())) {
result.push_back(s.substr(start));
}
return result;
}
static eng::string substr_nocr(const eng::string &s, int start, int len) {
2021-12-15 23:03:43 -05:00
if ((len > 0) && (s[start + len - 1] == '\r')) {
len -= 1;
}
return s.substr(start, len);
}
StringVec split_lines(const eng::string &s) {
2021-12-15 23:03:43 -05:00
StringVec result;
int start = 0;
for (int i = 0; i < int(s.size()); i++) {
if (s[i]=='\n') {
result.push_back(substr_nocr(s, start, i-start));
start = i + 1;
}
}
if (start < int(s.size())) {
result.push_back(substr_nocr(s, start, s.size()-start));
}
return result;
}
StringVec split_docstring(const eng::string &s) {
StringVec result;
int start = 0;
for (int i = 0; i < int(s.size()); i++) {
if (s[i]=='|') {
int len = i-start;
if ((len > 0)||(start > 0)) {
result.push_back(s.substr(start, i-start));
}
start = i + 1;
}
}
if (start < int(s.size())) {
result.push_back(s.substr(start, s.size()-start));
}
return result;
}
eng::string join(const StringVec &strs, const eng::string &sep) {
2021-11-16 12:20:11 -05:00
if (strs.empty()) return "";
eng::ostringstream oss;
2021-11-16 12:20:11 -05:00
oss << strs[0];
for (int i = 1; i < int(strs.size()); i++) {
2021-11-16 12:20:11 -05:00
oss << sep << strs[i];
}
return oss.str();
}
eng::string repeat_string(const eng::string &a, int n) {
int len = a.size();
eng::string result(len * n, ' ');
for (int i = 0; i < n; i++) {
for (int j = 0; j < len; j++) {
result[i*len + j] = a[j];
}
}
return result;
}
eng::string tolower(eng::string input) {
2021-02-07 15:35:31 -05:00
for (int i = 0; i < int(input.size()); i++) {
input[i] = std::tolower(input[i]);
}
return input;
}
eng::string toupper(eng::string input) {
2021-02-07 15:35:31 -05:00
for (int i = 0; i < int(input.size()); i++) {
input[i] = std::toupper(input[i]);
}
return input;
}
2021-07-30 13:22:23 -04:00
double distance_squared(double x1, double y1, double x2, double y2) {
double dx = x1 - x2;
double dy = y1 - y2;
return dx*dx + dy*dy;
2021-11-14 15:57:18 -05:00
}
LuaSourcePtr make_lua_source(const eng::string &code) {
LuaSourcePtr result(new LuaSourceVec);
eng::string fn = "file.lua";
result->push_back(std::make_pair(fn, code));
return result;
}
eng::string XYZ::debug_string() const {
eng::ostringstream oss;
oss << "(" << x << "," << y << "," << z << ")";
return oss.str();
2021-02-02 16:29:07 -05:00
}
2022-04-25 13:43:11 -04:00
2020-11-13 15:18:09 -05:00
} // namespace util
2021-07-21 16:10:29 -04:00
std::ostream &operator<<(std::ostream &oss, const util::hex64 &v) {
2021-07-21 16:10:29 -04:00
oss << "0x" << std::setw(16) << std::setfill('0') << std::hex;
return oss;
}
std::ostream &operator<<(std::ostream &oss, const util::hex32 &v) {
2021-07-21 16:10:29 -04:00
oss << "0x" << std::setw(8) << std::setfill('0') << std::hex;
return oss;
}
std::ostream &operator<<(std::ostream &oss, const util::hex16 &v) {
2021-07-21 16:10:29 -04:00
oss << "0x" << std::setw(4) << std::setfill('0') << std::hex;
return oss;
}
std::ostream &operator<<(std::ostream &oss, const util::hex8 &v) {
2021-07-21 16:10:29 -04:00
oss << "0x" << std::setw(2) << std::setfill('0') << std::hex;
return oss;
}
2021-07-30 13:22:23 -04:00
2021-12-15 23:03:43 -05:00
LuaDefine(unittests_util, "", "some unit tests") {
// test str_to_int64, str_to_double
LuaAssert(L, sv::to_int64("123") == 123);
LuaAssert(L, sv::to_int64("123.4") == INT64_MIN);
LuaAssert(L, sv::to_int64("12ab") == INT64_MIN);
LuaAssert(L, sv::to_int64("") == INT64_MIN);
LuaAssert(L, sv::to_double("123.5") == 123.5);
LuaAssert(L, std::isnan(sv::to_double("12ab")));
LuaAssert(L, std::isnan(sv::to_double("")));
// Test trim, ltrim, rtrim
LuaAssert(L, sv::ltrim(" foo ") == "foo ");
LuaAssert(L, sv::rtrim(" foo ") == " foo");
LuaAssert(L, sv::trim(" foo ") == "foo");
LuaAssert(L, sv::trim("foo") == "foo");
LuaAssert(L, sv::trim("") == "");
LuaAssert(L, sv::ltrim("**foo**", '*') == "foo**");
LuaAssert(L, sv::rtrim("**foo**", '*') == "**foo");
LuaAssert(L, sv::trim("**foo**", '*') == "foo");
LuaAssert(L, sv::trim("foo", '*') == "foo");
LuaAssert(L, sv::trim("", '*') == "");
// Test read_to_line
std::string_view v = "foo\nbar\r\nbaz";
std::string_view v1 = sv::read_to_line(v);
LuaAssertStrEq(L, v1, "foo");
LuaAssertStrEq(L, v, "bar\r\nbaz");
std::string_view v2 = sv::read_to_line(v);
LuaAssertStrEq(L, v2, "bar");
LuaAssertStrEq(L, v, "baz");
std::string_view v3 = sv::read_to_line(v);
LuaAssertStrEq(L, v3, "baz");
LuaAssert(L, sv::isnull(v));
// Test read_to_space
v = "foo bar baz";
std::string_view s1 = sv::read_to_space(v);
LuaAssertStrEq(L, s1, "foo");
LuaAssertStrEq(L, v, "bar baz");
std::string_view s2 = sv::read_to_space(v);
LuaAssertStrEq(L, s2, "bar");
LuaAssertStrEq(L, v, "baz");
std::string_view s3 = sv::read_to_space(v);
LuaAssertStrEq(L, s3, "baz");
LuaAssert(L, sv::isnull(v));
2021-07-30 13:22:23 -04:00
// Test the unioning of ID vectors.
util::IdVector idv1,idv2;
idv1.push_back(1);
idv1.push_back(6);
idv1.push_back(4);
idv2.push_back(5);
idv2.push_back(1);
idv2.push_back(6);
util::IdVector joined = util::sort_union_id_vectors(idv1, idv2);
LuaAssert(L, joined.size() == 4);
LuaAssert(L, joined[0] == 1);
LuaAssert(L, joined[1] == 4);
LuaAssert(L, joined[2] == 5);
LuaAssert(L, joined[3] == 6);
// Test the string split routine.
util::StringVec sv1 = util::split("foo,bar,baz", ',');
LuaAssert(L, sv1.size() == 3);
LuaAssert(L, sv1[0] == "foo");
LuaAssert(L, sv1[1] == "bar");
LuaAssert(L, sv1[2] == "baz");
util::StringVec sv2 = util::split(",foo,,bar", ',');
LuaAssert(L, sv2.size() == 4);
LuaAssert(L, sv2[0]=="");
LuaAssert(L, sv2[1]=="foo");
LuaAssert(L, sv2[2]=="");
LuaAssert(L, sv2[3]=="bar");
2021-12-15 23:03:43 -05:00
// Test the split_lines routine.
util::StringVec sv3 = util::split_lines("foo\n\nbar\r\nbaz\r\n\r\n");
LuaAssert(L, sv3.size() == 5);
LuaAssert(L, sv3[0] == "foo");
LuaAssert(L, sv3[1] == "");
LuaAssert(L, sv3[2] == "bar");
LuaAssert(L, sv3[3] == "baz");
LuaAssert(L, sv3[4] == "");
// Test the repeat string routine.
LuaAssertStrEq(L, util::repeat_string("abc", 3), "abcabcabc");
2021-07-30 13:22:23 -04:00
// test toupper and tolower
LuaAssert(L, util::toupper("fooBar") == "FOOBAR");
LuaAssert(L, util::tolower("fooBar") == "foobar");
// Test distance_squared
LuaAssert(L, util::distance_squared(1, 1, 5, 4) == 25.0);
LuaAssert(L, util::distance_squared(5, 4, 1, 1) == 25.0);
// Test XYZ.
util::XYZ xyza(3,4,5), xyzb(3,4,5), xyzc(3,4,6);
LuaAssert(L, xyza.x == 3);
LuaAssert(L, xyza.y == 4);
LuaAssert(L, xyza.z == 5);
LuaAssert(L, xyza == xyzb);
LuaAssert(L, xyza != xyzc);
LuaAssert(L, xyza.debug_string() == "(3,4,5)");
// Test hash_to_string
LuaAssertStrEq(L, util::hash_to_hex(util::HashValue(0x1234,0x789a)),
"0000000000001234000000000000789a");
2021-11-21 13:35:39 -05:00
2022-04-06 15:09:28 -04:00
// Test hash_to_double
LuaAssert(L, util::hash_to_double(0x1000000000000000) == 1.0/16.0);
LuaAssert(L, util::hash_to_double(0x7000000000000000) == 7.0/16.0);
LuaAssert(L, util::hash_to_double(0xF000000000000000) == 15.0/16.0);
2021-07-30 13:22:23 -04:00
return 0;
}