From ff81d79b4ae589ceb0e7c288b55b85ba7d29a508 Mon Sep 17 00:00:00 2001 From: jyelon Date: Tue, 17 Feb 2026 19:57:58 -0500 Subject: [PATCH] Implement printf, dprintf, string.format --- Docs/Lua-String-Formatting.md | 12 ++ luprex/cpp/core/luastack.hpp | 3 +- luprex/cpp/core/pprint.cpp | 298 ++++++++++++++++++++++------- luprex/cpp/core/pprint.hpp | 33 ++++ luprex/cpp/core/source.cpp | 1 - luprex/cpp/core/world-accessor.cpp | 133 ++++++++----- luprex/cpp/core/world-core.cpp | 4 +- 7 files changed, 363 insertions(+), 121 deletions(-) create mode 100644 Docs/Lua-String-Formatting.md diff --git a/Docs/Lua-String-Formatting.md b/Docs/Lua-String-Formatting.md new file mode 100644 index 00000000..1259f116 --- /dev/null +++ b/Docs/Lua-String-Formatting.md @@ -0,0 +1,12 @@ +# Lua String Formatting + +Lua contains a builtin routine, string.format, which is similar to "printf." +We have reimplemented this from scratch, adding a bunch of new formatting +directives. + +We have also added formatting routines that output to the +console and the debug logs. The new routines are: + +format(format_string, arg1, arg2, arg3...) +dformat(format_string, arg1, arg2, arg3...) + diff --git a/luprex/cpp/core/luastack.hpp b/luprex/cpp/core/luastack.hpp index e37b5eee..f9db65c0 100644 --- a/luprex/cpp/core/luastack.hpp +++ b/luprex/cpp/core/luastack.hpp @@ -700,7 +700,8 @@ public: // LUA_TTHREAD, LUA_TLIGHTUSERDATA, LUA_TUSERDATA. // int type(LuaSlot s) const { return lua_type(L_, s); } - + const char *typestr(LuaSlot s) const { return lua_typename(L_, lua_type(L_, s)); } + // Get the extended type of a LuaSlot. // // If the variable contains a table, returns one of the LuaTableType diff --git a/luprex/cpp/core/pprint.cpp b/luprex/cpp/core/pprint.cpp index e5a07f08..b31926b4 100644 --- a/luprex/cpp/core/pprint.cpp +++ b/luprex/cpp/core/pprint.cpp @@ -5,6 +5,7 @@ #include "table.hpp" #include +#include class PrintMachine { public: @@ -185,24 +186,21 @@ public: } } - // If it's an array of atomic values, without a visible metatable, - // we're going to print it without newlines. Scan the table to see if - // it's possible to print it array-style. - bool array_style = false; - if (!print_meta) { - array_style = true; - for (int i = 1; i <= nkeys; i++) { - LS.rawget(val, value, i); - if (LS.isnil(val) || (LS.type(val) == LUA_TTABLE)) { - array_style = false; - break; - } - } + // Count the number of array-style keys. + // Also, check if there are any tables in the array keys. + int narray_keys = 0; + bool array_contains_table = false; + for (int i = 1; i <= nkeys; i++) { + LS.rawget(val, value, i); + if (LS.isnil(val)) break; + narray_keys = i; + if (LS.type(val) == LUA_TTABLE) array_contains_table = true; } - // Print it array-style. This code is simple because + // Maybe print it array-style. This code is simple because // we don't do indentation, and we don't handle any values // that aren't atomic. + bool array_style = (narray_keys == nkeys) && (!array_contains_table); if (array_style) { (*output_) << "{"; for (int i = 1; i <= nkeys; i++) { @@ -310,61 +308,231 @@ void pprint(LuaCoreStack &LS, LuaSlot val, const PrettyPrintOptions &opts, std:: PrintMachine pm(LS, val, opts.indent, opts.level, opts.expand, os); } -LuaDefine(string_pprint, "obj1, obj2, ...", - "|Pretty-print the specified objects into a string." - "|" - "|See also: string.pprintx, which has a lot more options." - "|This function uses the default options: pretty print indented," - "|start at indentation level zero, and always expand the" - "|top-level table." - "|") { - LuaRet result; - LuaExtraArgs extra; - LuaDefStack LS(L, result, extra); - util::ostringstream oss; - for (int i = 0; i < extra.size(); i++) { - pprint(LS, extra[i], PrettyPrintOptions(), &oss); - oss << "\n"; +////////////////////////////////////////////////////////////////////////////////// +// +// Format +// +// Printf-style formatting that consumes arguments from LuaExtraArgs. +// +////////////////////////////////////////////////////////////////////////////////// + +class FormatDirective +{ + // Given a string_view that starts with '%', count the number of characters + // in the format parameters: the '%', flags, width, and precision. Does NOT + // include the conversion character. + // + // For example, given "%8.2d %2.7d", returns 4 (the length of "%8.2"). + // + static int format_parameters_length(std::string_view fmt) { + assert ((fmt.size() >= 1) && (fmt[0] == '%')); + size_t i = 1; + + // Flags + while (i < fmt.size() && (fmt[i] == '-' || fmt[i] == '+' || fmt[i] == ' ' || fmt[i] == '#' || fmt[i] == '0')) + i++; + + // Width + while (i < fmt.size() && fmt[i] >= '0' && fmt[i] <= '9') + i++; + + // Precision + if (i < fmt.size() && fmt[i] == '.') { + i++; + while (i < fmt.size() && fmt[i] >= '0' && fmt[i] <= '9') + i++; + } + return (int)i; + } + +public: + std::string_view precedingliteral; + std::string_view parameters; + std::string_view modifiers; + char directive; + char rebuilt[100]; + const int PARAMETERS_TOO_LONG = 50; + + // Return an error message declaring this format specifier to be invalid. + // + eng::string invalid() + { + return util::ss("Invalid format specifier: '", parameters, modifiers, directive, "'"); + } + + // Rebuild the format directive, using the specified suffix + // instead of the stored modifiers and directive. + // + const char *rebuild(std::string_view suffix) + { + std::string_view params = parameters; + if (int(params.size()) > PARAMETERS_TOO_LONG) params = "%"; + memcpy(rebuilt, params.data(), params.size()); + memcpy(rebuilt + params.size(), suffix.data(), suffix.size()); + rebuilt[params.size() + suffix.size()] = 0; + return rebuilt; + } + + // Read one directive from fmt, advancing fmt past everything consumed. + // + // On return: + // directive != 0, parameters non-empty — normal format directive + // directive != 0, parameters empty — not possible + // directive == 0, parameters empty — end of string (may have precedingliteral) + // directive == 0, parameters non-empty — truncated format (missing conversion char) + // + void read(std::string_view &fmt) + { + // Find the preceding literal (everything before the first '%'). + size_t pct = fmt.find('%'); + if (pct == std::string_view::npos) { + precedingliteral = fmt; + parameters = {}; + modifiers = {}; + directive = 0; + fmt = {}; + return; + } + + precedingliteral = fmt.substr(0, pct); + fmt.remove_prefix(pct); + + // Measure format parameters (%, flags, width, precision). + int plen = format_parameters_length(fmt); + parameters = fmt.substr(0, plen); + fmt.remove_prefix(plen); + + // Read 'l' modifiers. + int modcount = 0; + while ((modcount < int(fmt.size())) && (fmt[modcount] == 'l')) + modcount++; + modifiers = fmt.substr(0, modcount); + fmt.remove_prefix(modcount); + + // Read conversion character. + if (fmt.empty()) { directive = 0; return; } + directive = fmt[0]; + fmt.remove_prefix(1); + } +}; + + +static void format_signed(LuaCoreStack &LS, LuaSlot arg, const char *format, std::ostream *os) { + auto ni = LS.tryinteger(arg); + if (ni) { + char buf[64]; + snprintf(buf, sizeof(buf), format, *ni); + (*os) << buf; } - LS.set(result, oss.view()); - return LS.result(); } -LuaDefine(string_pprintx, "options", - "|Pretty-print the specified object into a string, with options" - "|" - "|Options is a table with these fields:" - "|" - "| value - the object to pretty-print" - "| indent - if false, suppress newlines and indentation (default: true)" - "| level - base level of indentation (default: zero)" - "| expand - if true, force expansion of top-level table (default: false)" - "|" - "|About the expand flag: normally, when you print a class, it just " - "|prints '', and when you print a tangible, it just" - "|prints ''. But sometimes, you want to see the details." - "|The expand flag forces it to expand the top-level table, even if the" - "|top-level table is a tangible or class." - "|") { - LuaArg loptions; - LuaRet result; - LuaVar value; - LuaDefStack LS(L, loptions, result, value); - PrettyPrintOptions options; - LuaKeywordParser kp(LS, loptions); - options.parse(kp); - if (!kp.optional(value, "value")) { - LS.set(value, LuaNil); +static void format_unsigned(LuaCoreStack &LS, LuaSlot arg, const char *format, std::ostream *os) { + auto ni = LS.tryinteger(arg); + if (ni) { + char buf[64]; + snprintf(buf, sizeof(buf), format, (uint64_t)(*ni)); + (*os) << buf; } - kp.final_check_throw(); - util::ostringstream oss; - pprint(LS, value, options, &oss); - LS.set(result, oss.view()); - return LS.result(); } -LuaDefine(string_print, "obj", +static void format_double(LuaCoreStack &LS, LuaSlot arg, const char *format, std::ostream *os) { + auto ni = LS.trynumber(arg); + if (ni) { + char buf[64]; + snprintf(buf, sizeof(buf), format, *ni); + (*os) << buf; + } +} + + +eng::string format(LuaCoreStack &LS, std::string_view fmt, LuaExtraArgs args, std::ostream *os) { + FormatDirective fd; + + // First pass: validate the format string and the argument types. + std::string_view fmtcopy = fmt; + int nargs = 0; + while (!fmtcopy.empty()) { + fd.read(fmtcopy); + + if (int(fd.parameters.size()) > fd.PARAMETERS_TOO_LONG) return fd.invalid(); + + if (fd.directive == 0 && !fd.parameters.empty()) return fd.invalid(); + + if (fd.directive == '%') { + if ((fd.parameters.size() != 1)||(fd.modifiers.size() != 0)) + return fd.invalid(); + } else if (fd.directive != 0) { + if (nargs >= args.size()) + return util::ss("expected more than ", args.size(), " arguments"); + LuaSpecial arg = args[nargs++]; + switch (fd.directive) { + case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': + case 'c': + if (!LS.isinteger(arg)) + return util::ss("bad argument #", nargs, " (integer expected, got ", LS.typestr(arg), ")"); + break; + case 'e': case 'E': case 'f': case 'g': case 'G': + if (!LS.isnumber(arg)) + return util::ss("bad argument #", nargs, " (number expected, got ", LS.typestr(arg), ")"); + break; + case 's': case 'q': case 'p': case 'P': + break; + default: + return fd.invalid(); + } + } + } + + if (nargs != args.size()) + return util::ss("expected ", nargs, " arguments, got ", args.size()); + + // Second pass: produce output. + int argidx = 0; + while (!fmt.empty()) { + fd.read(fmt); + + if (!fd.precedingliteral.empty()) + os->write(fd.precedingliteral.data(), fd.precedingliteral.size()); + + if (fd.directive == 0) break; + if (fd.directive == '%') { os->put('%'); continue; } + + LuaSpecial arg = args[argidx++]; + + switch (fd.directive) { + case 'd': format_signed(LS, arg, fd.rebuild(PRId64), os); break; + case 'i': format_signed(LS, arg, fd.rebuild(PRId64), os); break; + case 'o': format_unsigned(LS, arg, fd.rebuild(PRIo64), os); break; + case 'u': format_unsigned(LS, arg, fd.rebuild(PRIu64), os); break; + case 'x': format_unsigned(LS, arg, fd.rebuild(PRIx64), os); break; + case 'X': format_unsigned(LS, arg, fd.rebuild(PRIX64), os); break; + case 'e': format_double(LS, arg, fd.rebuild("e"), os); break; + case 'E': format_double(LS, arg, fd.rebuild("E"), os); break; + case 'f': format_double(LS, arg, fd.rebuild("f"), os); break; + case 'g': format_double(LS, arg, fd.rebuild("g"), os); break; + case 'G': format_double(LS, arg, fd.rebuild("G"), os); break; + case 'c': format_signed(LS, arg, fd.rebuild("c"), os); break; + case 's': atomic_print(LS, arg, false, os); break; + case 'q': atomic_print(LS, arg, true, os); break; + case 'p': pprint(LS, arg, PrettyPrintOptions(fd.modifiers.empty(), false), os); break; + case 'P': pprint(LS, arg, PrettyPrintOptions(fd.modifiers.empty(), true), os); break; + default: break; + } + } + + return {}; +} + +////////////////////////////////////////////////////////////////////////////////// +// +// Lua Interfaces to the Various Printing Routines +// +// Note: there are more functions like this in world-accessor. +// +////////////////////////////////////////////////////////////////////////////////// + +LuaDefine(tostring, "obj", "|Concise print the specified object into a string" "|" "|This prints a concise representation of obj into a string. Tables" @@ -381,9 +549,9 @@ LuaDefine(string_print, "obj", return LS.result(); } -LuaDefineAlias(tostring, string_print); -LuaDefine(string_isidentifier, "str", "return true if the string is a valid lua identifier") { +LuaDefine(string_isidentifier, "str", + "return true if the string is a valid lua identifier") { LuaArg str; LuaRet result; LuaDefStack LS(L, str, result); @@ -396,5 +564,3 @@ LuaDefine(string_isidentifier, "str", "return true if the string is a valid lua return LS.result(); } - - diff --git a/luprex/cpp/core/pprint.hpp b/luprex/cpp/core/pprint.hpp index 7f4a7729..e57f0d06 100644 --- a/luprex/cpp/core/pprint.hpp +++ b/luprex/cpp/core/pprint.hpp @@ -27,6 +27,7 @@ struct PrettyPrintOptions { int level; bool expand; PrettyPrintOptions() : indent(true), level(0), expand(true) {} + PrettyPrintOptions(bool indent, bool expand) : indent(indent), level(0), expand(expand) {} void parse(LuaKeywordParser &kp); }; @@ -42,4 +43,36 @@ void atomic_print(LuaCoreStack &LS, LuaSlot val, bool quote, std::ostream *os); // void pprint(LuaCoreStack &LS, LuaSlot val, const PrettyPrintOptions &opts, std::ostream *os); +// Format a string using printf-style format specifiers, consuming +// arguments from a LuaExtraArgs. Returns an empty string on success, +// or an error message on failure. +// +// Numeric types (argument must be a number). All of the following +// use the same formatting parameters as 'printf'. +// +// %d, %i — signed decimal integer +// %o — unsigned octal +// %u — unsigned decimal +// %x, %X — unsigned hexadecimal (lower/upper) +// %e, %E — scientific notation (lower/upper) +// %f — decimal floating point +// %g, %G — shortest of %e/%f (lower/upper) +// %c — character (integer converted to character) +// +// Non-numeric types: +// %s — prints any lua value using atomic_print (unquoted) +// %q — prints any lua value using atomic_print (quoted) +// +// Pretty-printing: +// +// %p - pretty print +// %lp - pretty print, but all on one line +// %P - pretty print, force table expansion +// %lP - pretty print, force table expansion, all on one line +// +// Special: +// %% — literal percent sign (consumes no argument) +// +eng::string format(LuaCoreStack &LS, std::string_view fmt, LuaExtraArgs args, std::ostream *os); + #endif // PPRINT_HPP \ No newline at end of file diff --git a/luprex/cpp/core/source.cpp b/luprex/cpp/core/source.cpp index 0c662a0b..d46389fc 100644 --- a/luprex/cpp/core/source.cpp +++ b/luprex/cpp/core/source.cpp @@ -800,7 +800,6 @@ LuaDefineBuiltin(string_rep, "str, count", "repeat the string some number of tim LuaDefineBuiltin(string_reverse, "str", "reverse the bytes of the string"); LuaDefineBuiltin(string_lower, "str", "convert string to lowercase"); LuaDefineBuiltin(string_upper, "str", "convert string to uppercase"); -LuaDefineBuiltin(string_format, "formatstr, v1,v2,v3...", "generate formatted output string"); LuaDefineBuiltin(string_gmatch, "str, pattern", "iterate over pattern-matched substrings"); LuaDefineBuiltin(string_gsub, "str, pattern, replace", "global replace pattern in string"); LuaDefineBuiltin(string_match, "str, pattern", "return start and end of pattern in string"); diff --git a/luprex/cpp/core/world-accessor.cpp b/luprex/cpp/core/world-accessor.cpp index 2432a26d..d0f62a9d 100644 --- a/luprex/cpp/core/world-accessor.cpp +++ b/luprex/cpp/core/world-accessor.cpp @@ -857,57 +857,6 @@ LuaDefine(math_randomstate, "seed", LuaSandboxBuiltin(math_randomseed, "", ""); -LuaDefine(pprint, "obj1, obj2, ...", - "|Pretty-print the specified objects." - "|" - "|See also: pprintx, which has a lot more options." - "|This function uses the default options: pretty print indented," - "|start at indentation level zero, and always expand the" - "|top-level table." - "|") { - World *w = World::fetch_global_pointer(L); - std::ostream *ostream = w->lthread_print_stream(); - LuaExtraArgs extra; - LuaDefStack LS(L, extra); - for (int i = 0; i < extra.size(); i++) { - pprint(LS, extra[i], PrettyPrintOptions(), ostream); - (*ostream) << std::endl; - } - return LS.result(); -} - -LuaDefine(pprintx, "options", - "|Pretty-print the specified object, with options" - "|" - "|Options is a table with these fields:" - "|" - "| value - the object to pretty-print" - "| indent - if false, suppress newlines and indentation (default: true)" - "| level - base level of indentation (default: zero)" - "| expand - if true, force expansion of top-level table (default: true)" - "|" - "|About the expand flag: normally, when you print a class, it just " - "|prints '', and when you print a tangible, it just" - "|prints ''. But sometimes, you want to see the details." - "|The expand flag forces it to expand the top-level table, even if the" - "|top-level table is a tangible or class." - "|") { - World *w = World::fetch_global_pointer(L); - std::ostream *ostream = w->lthread_print_stream(); - LuaArg loptions; - LuaVar value; - LuaDefStack LS(L, loptions, value); - PrettyPrintOptions options; - LuaKeywordParser kp(LS, loptions); - options.parse(kp); - if (!kp.optional(value, "value")) { - LS.set(value, LuaNil); - } - kp.final_check_throw(); - pprint(LS, value, options, ostream); - return LS.result(); -} - LuaDefine(print, "obj1, obj2, ...", "|Print object or objects.") { World *w = World::fetch_global_pointer(L); @@ -938,6 +887,88 @@ LuaDefine(dprint, "obj1, obj2, ...", return 0; } +#define PRINTF_DOCS \ + "|" \ + "|Arguments: format, arg1, arg2, arg3..." \ + "|" \ + "|Numeric types (argument must be a number):" \ + "| %d, %i -- signed decimal integer" \ + "| %o -- unsigned octal" \ + "| %u -- unsigned decimal" \ + "| %x, %X -- unsigned hexadecimal (lower/upper)" \ + "| %e, %E -- scientific notation (lower/upper)" \ + "| %f -- decimal floating point" \ + "| %g, %G -- shortest of %e/%f (lower/upper)" \ + "| %c -- character (integer converted to character)" \ + "|" \ + "|Print any lua value:" \ + "| %s -- Print unquoted" \ + "| %q -- Print quoted" \ + "|" \ + "|Pretty-print any lua value, showing contents of tables:" \ + "| %p -- pretty print" \ + "| %lp -- pretty print, all on one line" \ + "| %P -- pretty print, force table expansion" \ + "| %lP -- pretty print, force table expansion, all on one line" \ + "|" \ + "|Other:" \ + "| %% -- literal percent sign" \ + "|" + +LuaDefine(printf, "fmt, ...", + "|Print a formatted string to the console." + "|" + PRINTF_DOCS) { + World *w = World::fetch_global_pointer(L); + std::ostream *ostream = w->lthread_print_stream(); + LuaArg lfmt; + LuaExtraArgs extra; + LuaDefStack LS(L, lfmt, extra); + eng::string fmtstr = LS.ckstring(lfmt, "format string"); + eng::string err = format(LS, fmtstr, extra, ostream); + if (!err.empty()) { + luaL_error(L, "%s", err.c_str()); + } + (*ostream) << std::endl; + return 0; +} + +LuaDefine(dprintf, "fmt, ...", + "|Print a formatted string to the debug log." + "|" + PRINTF_DOCS) { + LuaArg lfmt; + LuaExtraArgs extra; + LuaDefStack LS(L, lfmt, extra); + eng::string fmtstr = LS.ckstring(lfmt, "format string"); + std::ostringstream oss; + eng::string err = format(LS, fmtstr, extra, &oss); + if (!err.empty()) { + luaL_error(L, "%s", err.c_str()); + } + oss << std::endl; + util::dprintview(oss.str()); + return 0; +} + +LuaDefine(string_format, "fmt, ...", + "|Format a string using printf-style format specifiers." + "|" + PRINTF_DOCS) { + LuaArg lfmt; + LuaRet result; + LuaExtraArgs extra; + LuaDefStack LS(L, lfmt, result, extra); + eng::string fmtstr = LS.ckstring(lfmt, "format string"); + util::ostringstream oss; + eng::string err = format(LS, fmtstr, extra, &oss); + if (!err.empty()) { + luaL_error(L, "%s", err.c_str()); + } + LS.set(result, oss.view()); + return LS.result(); +} + LuaDefine(doc, "function", "|Print documentation for specified function.") { World *w = World::fetch_global_pointer(L); diff --git a/luprex/cpp/core/world-core.cpp b/luprex/cpp/core/world-core.cpp index 0bd52ec5..8b02d272 100644 --- a/luprex/cpp/core/world-core.cpp +++ b/luprex/cpp/core/world-core.cpp @@ -1135,7 +1135,7 @@ void World::clear_lthread_state() { lthread_place_id_ = 0; lthread_thread_id_ = 0; lthread_use_ppool_ = false; - lthread_prints_.str(); + lthread_prints_.str(""); lthread_prints_.clear(); } @@ -1162,7 +1162,7 @@ void World::open_lthread_state(int64_t actor, int64_t place, int64_t thread, boo lthread_place_id_ = place; lthread_thread_id_ = thread; lthread_use_ppool_ = ppool; - lthread_prints_.str(); + lthread_prints_.str(""); lthread_prints_.clear(); }