diff --git a/include/fennec/format/detail/_format.h b/include/fennec/format/detail/_format.h index 36ab3fb..d5ae122 100644 --- a/include/fennec/format/detail/_format.h +++ b/include/fennec/format/detail/_format.h @@ -43,6 +43,8 @@ struct _format_argimpl { _format_argimpl() {}; virtual ~_format_argimpl() {}; virtual string format(const format_arg& fmt) = 0; + virtual bool is_integer() = 0; + virtual int64_t int_value() = 0; }; // Polymorphic template specialization @@ -53,21 +55,28 @@ struct _format_arg : _format_argimpl { _format_arg(const T& arg) : val(arg) { } virtual ~_format_arg() = default; - virtual string format(const format_arg& fmt) { + string format(const format_arg& fmt) override { return fennec::forward(fmtr(fmt, val)); } + + bool is_integer() override { + return is_integral_v or is_convertible_v; + } + virtual int64_t int_value() override { + if constexpr(is_integral_v) { + return val; + } else if constexpr(is_convertible_v) { + return val; + } else { + return -1; + } + } }; -// Polymorphic template specialization for rvalue_references +// Polymorphic template specialization for x/r/xr value references template -struct _format_arg : _format_argimpl { - formatter fmtr; - const T& val; - _format_arg(const T& arg) : val(arg) { - } - virtual ~_format_arg() = default; - virtual string format(const format_arg& fmt) { - return fennec::forward(fmtr(fmt, val)); +struct _format_arg : _format_arg { + _format_arg(const T& arg) : _format_arg(arg) { } }; @@ -83,8 +92,59 @@ struct _format_argarray { string format(size_t i, const format_arg& fmt) { return args[i]->format(fmt); } + + bool is_integer(size_t i) { + return args[i]->is_integer(); + } + + int64_t int_value(size_t i) { + return args[i]->int_value(); + } }; +// checks if character is a valid format type +constexpr bool _isfmt_t(char c) { + switch (c) { + default: return false; + case 's': case '?': // strings + case 'c': // char + case 'd': // decimal + case 'b': case 'B': // binary + case 'o': // octal + case 'x': case 'X': // hex + case 'a': case 'A': // float hex + case 'e': case 'E': // scientific notation + case 'f': case 'F': // fixed precision + case 'g': case 'G': // general precision + return true; + } +} + +// checks if character is a valid format int type +constexpr bool _isfmt_i(char c) { + switch (c) { + default: return false; + case 'd': // decimal + case 'b': case 'B': // binary + case 'o': // octal + case 'x': case 'X': // hex + return true; + } +} + +// checks if character is a valid format float type +constexpr bool _isfmt_f(char c) { + switch (c) { + default: return false; + case 'a': case 'A': // float hex + case 'e': case 'E': // scientific notation + case 'f': case 'F': // fixed precision + case 'g': case 'G': // general precision + return true; + } + +} + } #endif // FENNEC_LANG_STRINGS_DETAIL_FORMAT_H diff --git a/include/fennec/format/format.h b/include/fennec/format/format.h index dc49a20..f4221d8 100644 --- a/include/fennec/format/format.h +++ b/include/fennec/format/format.h @@ -43,15 +43,12 @@ template string format(const cstring& str, ArgsT&&...args) { static constexpr size_t argc = sizeof...(ArgsT); static constexpr format_arg default_fmt = { - .fill { // default no fill - .c = '\0', - .n = 0, - }, + .fill = ' ', .align = '\0', // default to locale - .sign = '-', // default to sign only for negative numbers + .sign = '\0', // default to sign only for negative numbers .alt = false, // default no prefix .width = 0, - .precision = 0, + .precision = 6, // default to 6 sigfigs .type = '\0', }; @@ -63,74 +60,210 @@ string format(const cstring& str, ArgsT&&...args) { detail::_format_argarray argarray = { fennec::forward(args)... }; string res; size_t i = 0; - size_t a = -1; + size_t arg_c = -1; while (i <= str.length()) { - size_t b = str.find('{', i); - size_t e = str.find('}', i); + size_t brace = str.find('{', i); + size_t end = str.find('}', i); format_arg fmt = default_fmt; // check for '}}' - if (e < b) { - if (str[e + 1] == '}') { - res += string(str.data() + i, e - i); - i = e + 2; + if (end < brace) { + if (str[end + 1] == '}') { + res += string(str.data() + i, end - i); + i = end + 2; continue; } assertf(false, "fennec::format syntax error, encountered unexpected '{'") } // append string - if (b >= str.length()) { // handle end case + if (brace >= str.length()) { // handle end case res += string(str.data() + i, str.length() - i); break; } - res += string(str.data() + i, b - i); + res += string(str.data() + i, brace - i); // next brace, validate escape - size_t nb = str.find('{', b + 1); - if (b + 1 == nb) { + size_t next_brace = str.find('{', brace + 1); + if (brace + 1 == next_brace) { res += '{'; - i = nb + 1; + i = next_brace + 1; continue; } // find contained colon - size_t c = str.find(':', b); + size_t colon = str.find(':', brace); // validate colon and brace location - assertf(c < nb or e < nb, "fennec::format syntax error, mismatched '{}'"); + assertf(colon < next_brace or end < next_brace, "fennec::format syntax error, mismatched '{}'"); // parse index if present - size_t id = min(c, e) - 1; - if (id > b) { - a = 0; + size_t id = min(colon, end) - 1; + if (id > brace) { + arg_c = 0; } else { - ++a; + ++arg_c; } - for (size_t j = id, k = 1; j > b; --j, k *= 10) { + for (size_t j = id, k = 1; j > brace; --j, k *= 10) { size_t u = (str[j] - '0'); assertf(u < 10, "fennec::format syntax error, invalid argument index"); - a += k * u; + arg_c += k * u; } + // store argument to allow nested replacement fields + size_t arg = arg_c; + // validate index - assertf(a < argc, "fennec::format syntax error, invalid argument index"); + assertf(arg < argc, "fennec::format syntax error, invalid argument index"); // early return case for no colon - if (c > e) { - res += argarray.format(a, fmt); - i = e + 1; + if (colon > end) { + res += argarray.format(arg, fmt); + i = end + 1; continue; } // parse format specifiers + // we're going to parse right-to-left since the valid combinations + // of specifiers change based on the type of the argument + + // first find the matching '}' brace, e is not necessarily the matching brace + // since some specifiers allow nested replacement fields + size_t spec = colon; + while (str[spec + 1] != '}') { + if (next_brace < end) { // if the next brace is before the next closing brace + spec = end + 1; + end = str.find('}', spec); + next_brace = str.find('{', spec); + } else { + spec = end - 1; + break; + } + } + + assert(spec < str.length() - 1 and str[spec+1] == '}', "fennec::format syntax error, mismatched '{}'"); + + // check type + if (detail::_isfmt_t(str[spec])) { + fmt.type = str[spec--]; + } + + // early return + if (spec == colon) { + res += argarray.format(arg, fmt); + i = end + 1; + continue; + } + + // search for width and precision + size_t x = 0, j = 1; + bool found_decimal = false; + size_t num_decimals = 0; + bool is_float_t = detail::_isfmt_f(fmt.type); + bool is_str_t = fmt.type == 's'; + bool is_integer_t = detail::_isfmt_i(fmt.type); + + // default "precision" for strings should be 0 for no limit + if (is_str_t) { + fmt.precision = 0; + } + + // parse width and precision + while (isdigit(str[spec]) or (found_decimal = (str[spec] == '.')) or str[spec] == '{' or str[spec] == '}') { + // handle decimal point for precision + if (found_decimal) { + assertf(is_float_t or is_str_t, "fennec::format syntax error, encountered precision argument on non-floating point format"); + assertf(num_decimals == 0, "fennec::format syntax error, multiple decimals detected in floating point format"); + ++num_decimals; + found_decimal = false; + + fmt.precision = x; + x = 0, j = 1; + --spec; + continue; + } + + // check for nested replacement field + if (str[spec] == '{') { + assertf(str[spec - 1] == '0' or str[spec - 1] == '.' or not isdigit(str[spec - 1]), + "fennec::format syntax error, unexpected digit preceding nested replacement field"); + + bool prec = str[spec - 1] == '.'; + size_t sub = str[spec + 1] == '}' ? ++arg_c : x; + + assertf(sub < argc, "fennec::format syntax error, argument index out of range in nested replacement field"); + assertf(argarray.is_integer(sub), "fennec::format argument error, nested replacement field argument is not convertible to integral type"); + + (prec ? fmt.precision : fmt.width) = argarray.int_value(sub); + } + + // ignore closing brace for nested replacement fields + if (str[spec] == '}') { + continue; + } + + // crude way to only handle 0 case if 0 is the last digit + fmt.fill = str[spec] == '0' ? '0' : ' '; + + // parse the number + x += j * (str[spec] - '0'); + j *= 10; + --spec; + } + if (x != 0) { + fmt.width = x; + } + + // early return + if (spec == colon) { + res += argarray.format(arg, fmt); + i = end + 1; + continue; + } + + // check for alt form + if (str[spec] == '#') { + assertf(is_float_t or is_integer_t, "fennec::format syntax error, encountered alt spec ('#') with non-decimal type"); + fmt.alt = true; + --spec; + } + + // check for sign + if (str[spec] == '-' or str[spec] == '+' or str[spec] == ' ') { + fmt.sign = str[spec]; + if (str[spec] == ' ') { // handle fill if only space, gets overwritten if encounters fill character + fmt.fill = ' '; + } + --spec; + } + + // check for alignment + if (str[spec] == '<' or str[spec] == '>' or str[spec] == '^') { + fmt.align = str[spec]; + --spec; + } + + // fill character + if (str[spec] != ':') { + fmt.fill = str[spec]; + if (str[spec] == ' ') { + fmt.sign = fmt.sign == '\0' ? ' ' : fmt.sign; + } + --spec; + } + + // default sign + fmt.sign = fmt.sign == '\0' ? '-' : fmt.sign; + + // validate that we handled the entire format arg + assertf(spec == colon, "fennec::format syntax error, malformed format string detected, possible double colon"); // add formatted argument - res += argarray.format(a, fmt); - i = e + 1; + res += argarray.format(arg, fmt); + i = end + 1; } return res; diff --git a/include/fennec/format/format_arg.h b/include/fennec/format/format_arg.h index 88e07d1..c16e4c9 100644 --- a/include/fennec/format/format_arg.h +++ b/include/fennec/format/format_arg.h @@ -37,10 +37,7 @@ namespace fennec { struct format_arg { - struct { - char c; - size_t n; - } fill; + char fill; char align, sign; bool alt; size_t width, precision; diff --git a/include/fennec/format/formatter.h b/include/fennec/format/formatter.h index 218b205..ebcf47b 100644 --- a/include/fennec/format/formatter.h +++ b/include/fennec/format/formatter.h @@ -37,6 +37,11 @@ namespace fennec { +// base template ======================================================================================================= + +/// +/// \brief Formatter struct, used to turn values into formatted strings +/// \tparam T The type to format template struct formatter { string operator()(const format_arg&, const T&) { @@ -45,6 +50,9 @@ struct formatter { } }; + +// strings ============================================================================================================= + template struct formatter { string operator()(const format_arg&, const char (&str)[N]) { @@ -66,6 +74,138 @@ struct formatter { } }; + +// decimal types ======================================================================================================= + +template requires(is_integral_v and not is_bool_v) +struct formatter { + string operator()(const format_arg& fmt, IntT x) { + static constexpr char lowdigits[] = "0123456789abcdef"; + static constexpr char highdigits[] = "0123456789ABCDEF"; + bool neg = x < 0; x = fennec::abs(x); + const char* digits = lowdigits; + string res; + string pre; + + size_t base; + switch (fmt.type) { + // decimal + default: + assertf(false, "invalid format type for integral value"); + [[fallthrough]]; + case '\0': case 'd': + base = 10; + break; + + // binary + case 'B': + pre = "0B"; + base = 2; + break; + case 'b': + pre = "0b"; + base = 2; + break; + + // octal + case 'o': + pre = "0"; + base = 8; + break; + + // hex + case 'X': + digits = highdigits; + pre = "0X"; + base = 16; + break; + + case 'x': + pre = "0x"; + base = 16; + break; + } + + // parse int + while (x != 0) { + res = digits[x % base] + res; + x /= base; + } + + // handle 0 + if (res.empty()) { + res = '0' + res; + } + + // add prefix unless fill is '0' + if (fmt.alt and fmt.fill != '0' and (base != 8 or res[0] != '0')) { + res = pre + res; + } + + // fill + size_t fill = fmt.alt ? fmt.width - pre.length() : fmt.width; + switch (fmt.align) { + // align left + case '<': + while (res.size() < fill) { + res += fmt.fill == '0' ? ' ' : fmt.fill; + } + break; + + // align right + case '>': default: + while (res.size() < fill) { + res = fmt.fill + res; + } + break; + + // align + case '^': + bool rr = true; + while (res.size() < fill) { + if (rr) { + res += fmt.fill == '0' ? ' ' : fmt.fill; + } else { + res = fmt.fill + res; + } + rr = !rr; + } + break; + } + + // add prefix after filled 0s + if (fmt.alt and fmt.fill == '0') { + res = pre + res; + } + + // add sign + switch (fmt.sign) { + case '+': + res = (neg ? '-' : '+') + res; + break; + case ' ': + res = (neg ? '-' : ' ') + res; + break; + case '-': default: + if (neg) res = '-' + res; + break; + } + + return res; + } +}; + +template requires(is_bool_v) +struct formatter { + string operator()(const format_arg& fmt, BoolT x) { + if (fmt.type == 's' or fmt.type == '\0') { + return x ? string("true") : string("false"); + } + + return formatter{}(fmt, static_cast(x)); + } +}; + } #endif // FENNEC_LANG_FORMAT_FORMATTER_H \ No newline at end of file diff --git a/include/fennec/string/string.h b/include/fennec/string/string.h index 88b2fda..54a79e0 100644 --- a/include/fennec/string/string.h +++ b/include/fennec/string/string.h @@ -19,11 +19,8 @@ #ifndef FENNEC_LANGPROC_STRINGS_STRING_H #define FENNEC_LANGPROC_STRINGS_STRING_H -#include #include -#include - #include #include diff --git a/test/tests/test_format.h b/test/tests/test_format.h index bd787d8..09a9023 100644 --- a/test/tests/test_format.h +++ b/test/tests/test_format.h @@ -47,6 +47,33 @@ inline void fennec_test_format() { fennec_test_run(fennec::format("{0} {0}", "Hello"), string("Hello Hello")); fennec_test_run(fennec::format("{0} {1}", "Hello", "World!"), string("Hello World!")); fennec_test_run(fennec::format("{1} {0}", "Hello", "World!"), string("World! Hello")); + + fennec_test_spacer(1); + + fennec_test_run(fennec::format("{:6}", 342), string(" 342")); + fennec_test_run(fennec::format("{:06}", 342), string("000342")); + fennec_test_run(fennec::format("{:*<6}", 342), string("342***")); + fennec_test_run(fennec::format("{:*>6}", 342), string("***342")); + fennec_test_run(fennec::format("{:*^6}", 342), string("*342**")); + + fennec_test_spacer(1); + + fennec_test_run(fennec::format("{0:},{0:+},{0:-},{0: }", 1), string("1,+1,1, 1")); + fennec_test_run(fennec::format("{0:},{0:+},{0:-},{0: }", -1), string("-1,-1,-1,-1")); + + fennec_test_spacer(1); + + fennec_test_run(fennec::format("{:#06d}", 15), string("000015")); + fennec_test_run(fennec::format("{:#06x}", 15), string("0x000f")); + fennec_test_run(fennec::format("{:#06X}", 15), string("0X000F")); + fennec_test_run(fennec::format("{:#06o}", 15), string("000017")); + fennec_test_run(fennec::format("{:#06b}", 15), string("0b1111")); + fennec_test_run(fennec::format("{:#06B}", 15), string("0B1111")); + + fennec_test_spacer(1); + + fennec_test_run(fennec::format("{},{}", true, false), string("true,false")); + fennec_test_run(fennec::format("{:#06b},{:#06b}", true, false), string("0b0001,0b0000")); } }