rexylib/include/rexy/detail/format/formatter.tpp

666 lines
21 KiB
C++

/**
This file is a part of rexy's general purpose library
Copyright (C) 2022 rexy712
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef REXY_DETAIL_FORMAT_FORMATTER_TPP
#define REXY_DETAIL_FORMAT_FORMATTER_TPP
#include "formatter.hpp"
#include "basic_types.hpp"
#include "internal_types.hpp"
#include "standard_types.hpp"
#include "parse_context.hpp"
#include "format_context.hpp"
#include "format_args.hpp"
#include "parse.hpp"
#include "utf_iterator.hpp"
#include "../../utility.hpp" //abs, memcpy
#include <type_traits> //remove_cvref
#include <utility> //forward, move
#include <cstddef> //size_t
#include <algorithm> //max
#include <cmath> //signbit
#include <charconv> //to_chars
#include <cctype> //toupper
#include <cstdint> //uintptr_t
#include <locale> //locale
#include <variant> //monostate
#include <limits>
namespace rexy::fmt::detail{
template<class T, class Char>
constexpr auto formatter_base<T,Char>::parse(basic_format_parse_context<Char>& ctx) -> decltype(ctx.begin()){
using stored_type = detail::stored_type_t<std::remove_cvref_t<T>,fmt_ctx_t>;
return detail::parse::perform_standard_parse(
ctx.begin(),
ctx.end(),
format_spec_handler_t{
detail::dynamic_format_specs_handler<parse_ctx_t>{ctx, specs},
detail::map_to_storage_enum_v<stored_type,char_type>
}
);
}
template<class T, class Char>
template<class U, class FormatContext>
auto formatter_base<T,Char>::format(U&& t, FormatContext& ctx) -> decltype(ctx.out()){
normalize_dynamic_format_specs(ctx, specs);
return detail::format::perform_standard_format<Char>(std::forward<U>(t), ctx.out(), specs);
}
namespace format{
template<class Char, class OutIt>
constexpr OutIt perform_format_write(const Char* c, OutIt out, std::size_t length){
for(std::size_t i = 0;i < length;++i){
*out++ = c[i];
}
return std::move(out);
}
template<class OutIt, class Outputer>
constexpr OutIt perform_format_write_aligned(OutIt out, int width, const format_specs& specs, alignment default_align, Outputer&& outputer){
const int fill_amount = std::max(specs.width - width, 0);
int left_fill_amount = fill_amount;
int right_fill_amount = fill_amount;
const alignment align = specs.align == alignment::none ? default_align : specs.align;
switch(align){
case alignment::center:
left_fill_amount /= 2;
[[fallthrough]];
case alignment::right:
for(std::size_t i = 0;i < left_fill_amount;++i){
*out++ = specs.align_char;
}
right_fill_amount -= left_fill_amount;
[[fallthrough]];
case alignment::left:
out = outputer(std::move(out));
for(std::size_t i = 0;i < right_fill_amount;++i){
*out++ = specs.align_char;
}
break;
default:
REXY_THROW_FORMAT_ERROR("Invalid alignment state");
};
return std::move(out);
}
template<class T>
consteval T cxlog10(T t){
return t < 10 ? 1 : 1 + cxlog10(t / 10);
}
template<class OutIt, class T>
constexpr OutIt perform_format_write_sign(OutIt out, int sign, T val){
if(val < 0){
*out++ = '-';
}else if(sign == '+'){
*out++ = '+';
}else if(sign == ' '){
*out++ = ' ';
}
return std::move(out);
}
template<class OutIt, Floating T>
constexpr OutIt perform_format_write_sign(OutIt out, int sign, T val){
if(std::signbit(val)){
*out++ = '-';
}else if(sign == '+'){
*out++ = '+';
}else if(sign == ' '){
*out++ = ' ';
}
return std::move(out);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(const void* t, OutIt out, const format_specs& specs, const std::locale& loc){
//2 hexidecimal digits per octet
constexpr std::size_t maxbufflen = sizeof(std::uintptr_t) * (CHAR_BIT / 4.0) + 3; //+2 for '0x', +1 for float rounding truncation
char buff[maxbufflen] = {};
buff[0] = '0';
buff[1] = 'x';
char* buffstart = buff + 2;
char* buffend = buff + maxbufflen;
auto result = std::to_chars(buffstart, buffend, reinterpret_cast<std::uintptr_t>(t), 16);
if(result.ec != std::errc{}){
REXY_THROW_FORMAT_ERROR("Unable to convert pointer type");
}
return perform_standard_format<Char>(buff, out, specs, loc);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(const void* t, OutIt out){
//2 hexidecimal digits per octet
constexpr std::size_t maxbufflen = sizeof(std::uintptr_t) * (CHAR_BIT / 4.0) + 4; //+2 for '0x', +1 for float rounding truncation
char buff[maxbufflen] = {};
buff[0] = '0';
buff[1] = 'x';
char* buffstart = buff + 2;
char* buffend = buff + maxbufflen;
auto result = std::to_chars(buffstart, buffend, reinterpret_cast<std::uintptr_t>(t), 16);
if(result.ec != std::errc{}){
REXY_THROW_FORMAT_ERROR("Unable to convert pointer type");
}
const auto bufflen = result.ptr - buff;
return perform_format_write(buff, out, bufflen);
}
static constexpr unsigned int codepoint_fields[] = {
0x1100u, 0x115Fu,
0x2329u, 0x232Au,
0x2E80u, 0x303Eu,
0x3040u, 0xA4CFu,
0xAC00u, 0xD7A3u,
0xF900u, 0xFAFFu,
0xFE10u, 0xFE19u,
0xFE30u, 0xFE6Fu,
0xFF00u, 0xFF60u,
0xFFE0u, 0xFFE6u,
0x1F300u, 0x1F64Fu,
0x1F900u, 0x1F9FFu,
0x20000u, 0x2FFFDu,
0x30000u, 0x3FFFDu
};
constexpr unsigned int estimate_unicode_width(const char32_t c){
unsigned int width = 1;
for(auto field : codepoint_fields){
if(c < field){
return width;
}
//flip between 1 and 2
width ^= 3u;
}
return width;
}
constexpr unsigned int estimate_unicode_string_width(const char* first, const char* last){
unsigned int width = 0;
for(utf8_iterator<char> it{first, last};it.valid();++it){
const auto codepoint = *it;
width += estimate_unicode_width(codepoint);
}
return width;
}
template<class Char, class OutIt>
requires(!UTF8_String<Char>)
constexpr OutIt perform_standard_format(const Char* c, OutIt out, const format_specs& specs, const std::locale& loc){
const basic_string_view<Char> str{c};
std::size_t est_width = 0;
std::size_t print_cnt = 0;
if(specs.precision > 0){
est_width = std::min<std::size_t>(str.length(), specs.precision);
print_cnt = est_width;
}else{
est_width = str.length();
print_cnt = str.length();
}
const auto outputter = [&](OutIt o){
for(std::size_t i = 0;i < print_cnt;++i){
*o++ = str[i];
}
return std::move(o);
};
return perform_format_write_aligned(std::move(out), est_width, specs, alignment::left, outputter);
}
template<UTF8_String Char, class OutIt>
requires(UTF8_String<Char>)
constexpr OutIt perform_standard_format(const Char* c, OutIt out, const format_specs& specs, const std::locale& loc){
const basic_string_view<Char> str{c};
std::size_t est_width = 0;
std::size_t print_cnt = 0;
if(specs.precision > 0){
for(utf8_iterator<Char> it{str.begin(), str.end()};it.valid();++it){
const auto ch_width = estimate_unicode_width(*it);
if(ch_width + est_width > specs.precision){
break;
}
print_cnt += it.byte_count();
est_width += ch_width;
}
}else{
est_width = estimate_unicode_string_width(str.cbegin(), str.cend());
print_cnt = str.length();
}
const auto outputter = [&](OutIt o){
for(std::size_t i = 0;i < print_cnt;++i){
*o++ = str[i];
}
return std::move(o);
};
return perform_format_write_aligned(std::move(out), est_width, specs, alignment::left, outputter);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(const Char* c, OutIt out){
for(const Char* i = c;*i;++i){
*out++ = *i;
}
return std::move(out);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(basic_string_view<std::type_identity_t<Char>> str, OutIt out, const format_specs& specs, const std::locale& loc){
return perform_standard_format<Char>(str.c_str(), std::move(out), specs, loc);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(basic_string_view<std::type_identity_t<Char>> str, OutIt out){
return perform_standard_format(str.c_str(), std::move(out));
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(Char b, OutIt out, const format_specs& specs, const std::locale& loc){
if(specs.present == presentation::int_t){
return perform_standard_format<Char>(static_cast<int>(b), std::move(out), specs, loc);
}
const auto outputter = [=](OutIt o){
*o++ = b;
return std::move(o);
};
return perform_format_write_aligned(std::move(out), 1, specs, alignment::left, outputter);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(Char b, OutIt out){
*out++ = b;
return std::move(out);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(bool b, OutIt out, const format_specs& specs, const std::locale& loc){
switch(specs.present){
case presentation::default_t:
case presentation::string_t:
break;
case presentation::int_t:
case presentation::char_t:
return perform_standard_format<Char>(static_cast<unsigned char>(b), std::move(out), specs, loc);
default:
REXY_THROW_FORMAT_ERROR("Invalid type argument for bool");
};
if(specs.locale){
const auto& facet = std::use_facet<std::numpunct<Char>>(loc);
const auto word = b ? facet.truename() : facet.falsename();
format_specs copy_specs = specs;
copy_specs.locale = false;
return perform_standard_format<Char>(
word.c_str(),
std::move(out),
copy_specs,
loc
);
}
return perform_standard_format<Char>(b ? "true" : "false", std::move(out), specs, loc);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(bool b, OutIt out){
return perform_standard_format<Char>(b ? "true" : "false", std::move(out));
}
template<class Char, class OutIt>
constexpr OutIt perform_localized_integer_write(OutIt out, const char* start, const char* last, const std::locale& loc){
const auto& facet = std::use_facet<std::numpunct<Char>>(loc);
const int group_size = int(facet.grouping()[0]);
const Char sep = facet.thousands_sep();
const int len = int(last - start);
if(group_size != 0){
int write_count = (len-1) % group_size;
*out++ = *start++;
while(true){
for(;write_count > 0;--write_count){
*out++ = *start++;
}
if(start == last){
break;
}
*out++ = sep;
write_count = std::min(group_size, int(last - start));
}
return std::move(out);
}
return perform_format_write(start, std::move(out), len);
}
template<class Char, class OutIt, Integral T>
constexpr OutIt perform_standard_format(T b, OutIt out, const format_specs& specs, const std::locale& loc){
constexpr std::size_t maxbufflen = rexy::max(
std::numeric_limits<long long>::digits + 3, //add 1 for sign bit, 2 for prefix
std::numeric_limits<unsigned long long>::digits + 3
);
if(specs.present == presentation::char_t){
return perform_standard_format<Char>(static_cast<Char>(b), std::move(out), specs, loc);
};
int base = 10;
int total_width = 0;
const bool should_zero_fill = specs.zero_fill && specs.align == alignment::none;
bool to_upper = false;
char buff[maxbufflen] = {};
char* buffstart = buff;
char* buffend = buff + maxbufflen;
string_view prefix = "";
switch(specs.type){
case 'b':
prefix = "0b";
base = 2;
break;
case 'B':
prefix = "0B";
base = 2;
break;
case 'o':
prefix = b == 0 ? "" : "0";
base = 8;
break;
case 'x':
prefix = "0x";
base = 16;
break;
case 'X':
prefix = "0X";
to_upper = true;
base = 16;
break;
default:
break;
};
total_width += prefix.length();
auto result = std::to_chars(buffstart, buffend, b, base);
if(result.ec != std::errc{}){
REXY_THROW_FORMAT_ERROR("Unable to convert integral type");
}
buffend = result.ptr;
if(b < 0){
++buffstart;
total_width += 1;
}else if(specs.sign != 0){
total_width += 1;
}
const auto bufflen = buffend - buffstart;
total_width += bufflen;
if(to_upper){
for(auto it = buffstart;it != buffend;++it){
*it = std::toupper(*it);
}
}
const auto outputter = [&](OutIt o) -> OutIt{
if(specs.alt_form){
o = perform_format_write_sign(std::move(o), specs.sign, b);
o = perform_format_write(prefix.data(), std::move(o), prefix.length());
}else{
o = perform_format_write(prefix.data(), std::move(out), prefix.length());
o = perform_format_write_sign(std::move(o), specs.sign, b);
}
if(should_zero_fill && specs.width > total_width){
const int fill_width = specs.width - total_width;
for(int i = 0;i < fill_width;++i){
*o++ = '0';
}
}
if(specs.locale){
const auto& facet = std::use_facet<std::numpunct<Char>>(loc);
const int group_size = facet.grouping()[0];
const Char sep = facet.thousands_sep();
return perform_localized_integer_write<Char>(std::move(o), buffstart, buffend, loc);
}
return perform_format_write(buffstart, std::move(o), bufflen);
};
if(should_zero_fill){
return outputter(std::move(out));
}
return perform_format_write_aligned(std::move(out), total_width, specs, alignment::right, outputter);
}
//////////////////////////////////////////////DONE///////////////////////////////////////////////
template<class Char, class OutIt, Arithmetic T>
constexpr OutIt perform_standard_format(T b, OutIt out){
using limits = std::numeric_limits<long double>;
constexpr auto maxexp = limits::max_exponent10; //this is a constant expression but using limits::max_exponent10 directly isn't?
//long double will be the longest type possible, so operate on that.
//+4 for ones' place digit, decimal point, and 'e+' in scientific mode
//maximum buffer length is the maximum significant digits plus maximum length of exponent
constexpr std::size_t maxbufflen = 4 + limits::max_digits10 + cxlog10(maxexp);
char buff[maxbufflen] = {};
char* buffend = buff + maxbufflen;
auto result = std::to_chars(buff, buffend, b);
if(result.ec != std::errc{}){
REXY_THROW_FORMAT_ERROR("Unable to convert arithmetic type");
}
const auto bufflen = result.ptr - buff;
return perform_format_write(buff, std::move(out), bufflen);
}
//TODO
template<class Char, class OutIt, Floating T>
constexpr OutIt perform_standard_format(T f, OutIt out, const format_specs& specs, const std::locale& loc){
using limits = std::numeric_limits<T>;
//max number of post-decimal digits is same as the inverted smallest radix exponent
constexpr int max_precision = rexy::abs(limits::min_exponent);
//max number of leading digits is same as biggest decimal exponent
constexpr int max_significants = limits::max_exponent10;
//+4 for ones' place digit, decimal point, and 'e+' in scientific mode
//maximum buffer length is the maximum significant digits plus maximum precision because the
//user can request any precision. So you can take the longest number with no decimal and add on
//the longest decimal trail allowed.
constexpr int maxbufflen = max_precision + max_significants + 4;
char buff[maxbufflen] = {};
char* buffstart = buff;
char* buffend = buff + maxbufflen;
const bool supplied_precision = specs.precision > 0;
const bool is_infinity = f == std::numeric_limits<T>::infinity() || f == -std::numeric_limits<T>::infinity();
const bool is_nan = (f != f);
const bool is_integer_representable = (static_cast<T>(static_cast<long long>(f)) == f) && !is_infinity && !is_nan;
const bool should_zero_fill = specs.zero_fill && specs.align == alignment::none;
std::chars_format fmt = std::chars_format::general;
bool to_upper = false;
bool manual_precision = supplied_precision;
bool trailing_dot = false;
std::size_t total_width = 0;
//TODO handle any other modes and formatting options
std::to_chars_result result{};
switch(specs.type){
case 'A':
to_upper = true;
[[fallthrough]];
case 'a':
fmt = std::chars_format::hex;
break;
case 'E':
to_upper = true;
[[fallthrough]];
case 'e':
fmt = std::chars_format::scientific;
manual_precision = true;
break;
case 'F':
to_upper = true;
[[fallthrough]];
case 'f':
fmt = std::chars_format::fixed;
manual_precision = true;
break;
case 'G':
to_upper = true;
[[fallthrough]];
case 'g':
manual_precision = true;
if(specs.alt_form){
//keep trailing zeros
fmt = std::chars_format::fixed;
}
break;
default:
trailing_dot = is_integer_representable && !supplied_precision && specs.alt_form;
break;
};
//manually handle nan and inf since some compilers (msvc) output something other than the desired values in 'to_chars'
if(is_nan){
result.ptr = buffstart + 3;
rexy::memcpy(buffstart, "nan", 3);
}else if(is_infinity){
result.ptr = buffstart + 3;
rexy::memcpy(buffstart, "inf", 3);
}else if(manual_precision){
const int precision = supplied_precision ? specs.precision : 6;
result = std::to_chars(buffstart, buffend, f, fmt, precision);
}else{
result = std::to_chars(buffstart, buffend, f, fmt);
}
if(result.ec != std::errc{}){
REXY_THROW_FORMAT_ERROR("Unable to convert floating type");
}
buffend = result.ptr;
//exclude negative sign automatically put there
if(buffstart[0] == '-'){
++buffstart;
++total_width;
}else if(specs.sign != 0){
++total_width;
}
if(trailing_dot){
++total_width;
}
const auto bufflen = buffend - buffstart;
total_width += bufflen;
if(to_upper){
for(auto it = buffstart;it != buffend;++it){
*it = std::toupper(*it);
}
}
const auto outputter = [&](OutIt o){
Char radix_char = '.';
o = perform_format_write_sign(std::move(o), specs.sign, f);
if(should_zero_fill && specs.width > total_width && !(is_infinity || is_nan)){
const int fill_width = specs.width - total_width;
for(int i = 0;i < fill_width;++i){
*o++ = '0';
}
}
if(specs.locale){
const auto& facet = std::use_facet<std::numpunct<Char>>(loc);
const int group_size = facet.grouping()[0];
radix_char = facet.decimal_point();
string_view buff_view{buffstart, buffend};
const auto radix_pos = buff_view.find_first_of('.');
if(radix_pos != string_view::npos){
buff[radix_pos] = radix_char;
o = perform_localized_integer_write<Char>(std::move(o), buffstart, buffstart + radix_pos, loc);
buffstart += radix_pos;
}
}
o = perform_format_write(buffstart, std::move(o), bufflen);
if(trailing_dot){
*o++ = radix_char;
}
return std::move(o);
};
if(should_zero_fill){
return outputter(std::move(out));
}
return perform_format_write_aligned(std::move(out), total_width, specs, alignment::right, outputter);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(std::monostate, OutIt out, const format_specs&, const std::locale&){
return std::move(out);
}
template<class Char, class OutIt>
constexpr OutIt perform_standard_format(std::monostate, OutIt out){
return std::move(out);
}
template<class FmtCtx, class ParseCtx, class Specs>
template<Handle<FmtCtx> T>
constexpr void arg_formatter<FmtCtx,ParseCtx,Specs>::operator()(T&& t){
t.format(parse_ctx, fmt_ctx);
}
template<class FmtCtx, class ParseCtx, class Specs>
template<class T>
constexpr void arg_formatter<FmtCtx,ParseCtx,Specs>::operator()(T&& t){
using handler_t = format_specs_checker<dynamic_format_specs_handler<ParseCtx>>;
using specs_t = dynamic_format_specs<typename FmtCtx::char_type>;
specs_t specs;
parse_ctx.advance_to(parse::perform_standard_parse(
parse_ctx.begin(),
parse_ctx.end(),
handler_t{
dynamic_format_specs_handler<ParseCtx>{
parse_ctx,
specs
},
map_to_storage_enum_v<T,typename FmtCtx::char_type>
}
));
normalize_dynamic_format_specs(fmt_ctx, specs);
fmt_ctx.advance_to(perform_standard_format<typename FmtCtx::char_type>(t, fmt_ctx.out(), specs, fmt_ctx.locale()));
}
template<class FmtCtx, class ParseCtx>
template<Handle<FmtCtx> T>
constexpr void empty_formatter<FmtCtx,ParseCtx>::operator()(T&& t){
t.format(parse_ctx, fmt_ctx);
}
template<class FmtCtx, class ParseCtx>
template<class T>
constexpr void empty_formatter<FmtCtx,ParseCtx>::operator()(T&& t){
perform_standard_format<typename FmtCtx::char_type>(t, fmt_ctx.out());
}
} //namespace format
}
#endif