mirror of git://gcc.gnu.org/git/gcc.git
libstdc++: Implement _Escaping_sink to avoid construction of string
This patch implements _Escaping_sink that stores characters in a local (stack) buffer. When the buffer is full, the range of characters is escaped and written to the underlying sink. To support above, the __write_escaped_unicode_part function are defined. It takes __str and __prev_esc by reference. The __prev_esc value is updated based on the last character written. If the buffer ends with an incomplete code point sequence, __str is left non-empty and last code points are not written. _Escaping_sink then copies these characters to the front of the buffer to reconstruct the full code point. __formatter__str::_M_format_range now uses _Escaping_sink to escape any non-continuous character sequences. libstdc++-v3/ChangeLog: * include/std/format (__format::__write_escape_seqs) (__format::_Escaping_sink): Define. (__format::__write_escaped_unicode_part): Extract from __format::__write_escaped_unicode. (__format::__write_escaped_unicode): Forward to __write_escaped_unicode_part. (__formatter_str::_M_format_range): Use _Escaping sink. * testsuite/std/format/ranges/string.cc: New tests for character which codepoints will be split in buffer and escaping. Invoked test_padding. Reviewed-by: Patrick Palka <ppalka@redhat.com> Reviewed-by: Jonathan Wakely <jwakely@redhat.com> Signed-off-by: Tomasz Kamiński <tkaminsk@redhat.com>
This commit is contained in:
parent
9f13fd1b07
commit
59cabe08b5
|
@ -105,6 +105,7 @@ namespace __format
|
||||||
template<typename _CharT> class _Sink;
|
template<typename _CharT> class _Sink;
|
||||||
template<typename _CharT> class _Fixedbuf_sink;
|
template<typename _CharT> class _Fixedbuf_sink;
|
||||||
template<typename _Out, typename _CharT> class _Padding_sink;
|
template<typename _Out, typename _CharT> class _Padding_sink;
|
||||||
|
template<typename _Out, typename _CharT> class _Escaping_sink;
|
||||||
|
|
||||||
// Output iterator that writes to a type-erase character sink.
|
// Output iterator that writes to a type-erase character sink.
|
||||||
template<typename _CharT>
|
template<typename _CharT>
|
||||||
|
@ -1066,6 +1067,17 @@ namespace __format
|
||||||
return ++__out;
|
return ++__out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename _Out, typename _CharT>
|
||||||
|
_Out
|
||||||
|
__write_escape_seqs(_Out __out, basic_string_view<_CharT> __units)
|
||||||
|
{
|
||||||
|
using _UChar = make_unsigned_t<_CharT>;
|
||||||
|
for (_CharT __c : __units)
|
||||||
|
__out = __format::__write_escape_seq(
|
||||||
|
__out, static_cast<_UChar>(__c), _Escapes<_CharT>::_S_x());
|
||||||
|
return __out;
|
||||||
|
}
|
||||||
|
|
||||||
template<typename _Out, typename _CharT>
|
template<typename _Out, typename _CharT>
|
||||||
_Out
|
_Out
|
||||||
__write_escaped_char(_Out __out, _CharT __c)
|
__write_escaped_char(_Out __out, _CharT __c)
|
||||||
|
@ -1124,12 +1136,10 @@ namespace __format
|
||||||
|
|
||||||
template<typename _CharT, typename _Out>
|
template<typename _CharT, typename _Out>
|
||||||
_Out
|
_Out
|
||||||
__write_escaped_unicode(_Out __out,
|
__write_escaped_unicode_part(_Out __out, basic_string_view<_CharT>& __str,
|
||||||
basic_string_view<_CharT> __str,
|
bool& __prev_esc, _Term_char __term)
|
||||||
_Term_char __term)
|
|
||||||
{
|
{
|
||||||
using _Str_view = basic_string_view<_CharT>;
|
using _Str_view = basic_string_view<_CharT>;
|
||||||
using _UChar = make_unsigned_t<_CharT>;
|
|
||||||
using _Esc = _Escapes<_CharT>;
|
using _Esc = _Escapes<_CharT>;
|
||||||
|
|
||||||
static constexpr char32_t __replace = U'\uFFFD';
|
static constexpr char32_t __replace = U'\uFFFD';
|
||||||
|
@ -1143,10 +1153,10 @@ namespace __format
|
||||||
}();
|
}();
|
||||||
|
|
||||||
__unicode::_Utf_view<char32_t, _Str_view> __v(std::move(__str));
|
__unicode::_Utf_view<char32_t, _Str_view> __v(std::move(__str));
|
||||||
|
__str = {};
|
||||||
|
|
||||||
auto __first = __v.begin();
|
auto __first = __v.begin();
|
||||||
auto const __last = __v.end();
|
auto const __last = __v.end();
|
||||||
|
|
||||||
bool __prev_esc = true;
|
|
||||||
while (__first != __last)
|
while (__first != __last)
|
||||||
{
|
{
|
||||||
bool __esc_ascii = false;
|
bool __esc_ascii = false;
|
||||||
|
@ -1185,15 +1195,32 @@ namespace __format
|
||||||
__out = __format::__write_escaped_char(__out, *__first.base());
|
__out = __format::__write_escaped_char(__out, *__first.base());
|
||||||
else if (__esc_unicode)
|
else if (__esc_unicode)
|
||||||
__out = __format::__write_escape_seq(__out, *__first, _Esc::_S_u());
|
__out = __format::__write_escape_seq(__out, *__first, _Esc::_S_u());
|
||||||
else // __esc_replace
|
// __esc_replace
|
||||||
for (_CharT __c : _Str_view(__first.base(), __first._M_units()))
|
else if (_Str_view __units(__first.base(), __first._M_units());
|
||||||
__out = __format::__write_escape_seq(__out,
|
__units.end() != __last.base())
|
||||||
static_cast<_UChar>(__c),
|
__out = __format::__write_escape_seqs(__out, __units);
|
||||||
_Esc::_S_x());
|
else
|
||||||
|
{
|
||||||
|
__str = __units;
|
||||||
|
return __out;
|
||||||
|
}
|
||||||
|
|
||||||
__prev_esc = true;
|
__prev_esc = true;
|
||||||
++__first;
|
++__first;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return __out;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _CharT, typename _Out>
|
||||||
|
_Out
|
||||||
|
__write_escaped_unicode(_Out __out, basic_string_view<_CharT> __str,
|
||||||
|
_Term_char __term)
|
||||||
|
{
|
||||||
|
bool __prev_escape = true;
|
||||||
|
__out = __format::__write_escaped_unicode_part(__out, __str,
|
||||||
|
__prev_escape, __term);
|
||||||
|
__out = __format::__write_escape_seqs(__out, __str);
|
||||||
return __out;
|
return __out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1412,55 +1439,28 @@ namespace __format
|
||||||
size_t(ranges::distance(__rg)));
|
size_t(ranges::distance(__rg)));
|
||||||
return format(__str, __fc);
|
return format(__str, __fc);
|
||||||
}
|
}
|
||||||
else if (!_M_spec._M_debug)
|
else
|
||||||
{
|
{
|
||||||
|
auto __handle_debug = [this, &__rg]<typename _NOut>(_NOut __nout)
|
||||||
|
{
|
||||||
|
if (!_M_spec._M_debug)
|
||||||
|
return ranges::copy(__rg, std::move(__nout)).out;
|
||||||
|
|
||||||
|
_Escaping_sink<_NOut, _CharT>
|
||||||
|
__sink(std::move(__nout), _Term_quote);
|
||||||
|
ranges::copy(__rg, __sink.out());
|
||||||
|
return __sink._M_finish();
|
||||||
|
};
|
||||||
|
|
||||||
const size_t __padwidth = _M_spec._M_get_width(__fc);
|
const size_t __padwidth = _M_spec._M_get_width(__fc);
|
||||||
if (__padwidth == 0 && _M_spec._M_prec_kind == _WP_none)
|
if (__padwidth == 0 && _M_spec._M_prec_kind == _WP_none)
|
||||||
return ranges::copy(__rg, __fc.out()).out;
|
return __handle_debug(__fc.out());
|
||||||
|
|
||||||
_Padding_sink<_Out, _CharT> __sink(__fc.out(), __padwidth,
|
_Padding_sink<_Out, _CharT>
|
||||||
_M_spec._M_get_precision(__fc));
|
__sink(__fc.out(), __padwidth, _M_spec._M_get_precision(__fc));
|
||||||
ranges::copy(__rg, __sink.out());
|
__handle_debug(__sink.out());
|
||||||
return __sink._M_finish(_M_spec._M_align, _M_spec._M_fill);
|
return __sink._M_finish(_M_spec._M_align, _M_spec._M_fill);
|
||||||
}
|
}
|
||||||
else if constexpr (ranges::forward_range<_Rg> || ranges::sized_range<_Rg>)
|
|
||||||
{
|
|
||||||
const size_t __n(ranges::distance(__rg));
|
|
||||||
size_t __w = __n;
|
|
||||||
if constexpr (!__unicode::__literal_encoding_is_unicode<_CharT>())
|
|
||||||
if (size_t __max = _M_spec._M_get_precision(__fc); __n > __max)
|
|
||||||
__w == __max;
|
|
||||||
|
|
||||||
if (__w <= __format::__stackbuf_size<_CharT>)
|
|
||||||
{
|
|
||||||
_CharT __buf[__format::__stackbuf_size<_CharT>];
|
|
||||||
ranges::copy_n(ranges::begin(__rg), __w, __buf);
|
|
||||||
return _M_format_escaped(_String_view(__buf, __n), __fc);
|
|
||||||
}
|
|
||||||
else if constexpr (ranges::random_access_range<_Rg>)
|
|
||||||
{
|
|
||||||
ranges::iterator_t<_Rg> __first = ranges::begin(__rg);
|
|
||||||
ranges::subrange __sub(__first, ranges::next(__first, __w));
|
|
||||||
return _M_format_escaped(_String(from_range, __sub), __fc);
|
|
||||||
}
|
|
||||||
else if (__w <= __n)
|
|
||||||
{
|
|
||||||
ranges::subrange __sub(
|
|
||||||
counted_iterator(ranges::begin(__rg), __w),
|
|
||||||
default_sentinel);
|
|
||||||
return _M_format_escaped(_String(from_range, __sub), __fc);
|
|
||||||
}
|
|
||||||
else if constexpr (ranges::sized_range<_Rg>)
|
|
||||||
return _M_format_escaped(_String(from_range, __rg), __fc);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// N.B. preserve the computed size
|
|
||||||
ranges::subrange __sub(__rg, __n);
|
|
||||||
return _M_format_escaped(_String(from_range, __sub), __fc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return _M_format_escaped(_String(from_range, __rg), __fc);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr void
|
constexpr void
|
||||||
|
@ -3997,6 +3997,93 @@ namespace __format
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<typename _Out, typename _CharT>
|
||||||
|
class _Escaping_sink : public _Buf_sink<_CharT>
|
||||||
|
{
|
||||||
|
using _Esc = _Escapes<_CharT>;
|
||||||
|
|
||||||
|
_Out _M_out;
|
||||||
|
_Term_char _M_term : 2;
|
||||||
|
unsigned _M_prev_escape : 1;
|
||||||
|
unsigned _M_out_discards : 1;
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_sync_discarding()
|
||||||
|
{
|
||||||
|
if constexpr (is_same_v<_Out, _Sink_iter<_CharT>>)
|
||||||
|
_M_out_discards = _M_out._M_discarding();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_write()
|
||||||
|
{
|
||||||
|
span<_CharT> __bytes = this->_M_used();
|
||||||
|
basic_string_view<_CharT> __str(__bytes.data(), __bytes.size());
|
||||||
|
|
||||||
|
size_t __rem = 0;
|
||||||
|
if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
|
||||||
|
{
|
||||||
|
bool __prev_escape = _M_prev_escape;
|
||||||
|
_M_out = __format::__write_escaped_unicode_part(
|
||||||
|
std::move(_M_out), __str, __prev_escape, _M_term);
|
||||||
|
_M_prev_escape = __prev_escape;
|
||||||
|
|
||||||
|
__rem = __str.size();
|
||||||
|
if (__rem > 0 && __str.data() != this->_M_buf) [[unlikely]]
|
||||||
|
ranges::move(__str, this->_M_buf);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
_M_out = __format::__write_escaped_ascii(
|
||||||
|
std::move(_M_out), __str, _M_term);
|
||||||
|
|
||||||
|
this->_M_reset(this->_M_buf, __rem);
|
||||||
|
_M_sync_discarding();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
_M_overflow() override
|
||||||
|
{
|
||||||
|
if (_M_out_discards)
|
||||||
|
this->_M_rewind();
|
||||||
|
else
|
||||||
|
_M_write();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
_M_discarding() const override
|
||||||
|
{ return _M_out_discards; }
|
||||||
|
|
||||||
|
public:
|
||||||
|
[[__gnu__::__always_inline__]]
|
||||||
|
explicit
|
||||||
|
_Escaping_sink(_Out __out, _Term_char __term)
|
||||||
|
: _M_out(std::move(__out)), _M_term(__term),
|
||||||
|
_M_prev_escape(true), _M_out_discards(false)
|
||||||
|
{
|
||||||
|
_M_out = __format::__write(std::move(_M_out), _Esc::_S_term(_M_term));
|
||||||
|
_M_sync_discarding();
|
||||||
|
}
|
||||||
|
|
||||||
|
_Out
|
||||||
|
_M_finish()
|
||||||
|
{
|
||||||
|
if (_M_out_discards)
|
||||||
|
return std::move(_M_out);
|
||||||
|
|
||||||
|
if (!this->_M_used().empty())
|
||||||
|
{
|
||||||
|
_M_write();
|
||||||
|
if constexpr (__unicode::__literal_encoding_is_unicode<_CharT>())
|
||||||
|
if (auto __rem = this->_M_used(); !__rem.empty())
|
||||||
|
{
|
||||||
|
basic_string_view<_CharT> __str(__rem.data(), __rem.size());
|
||||||
|
_M_out = __format::__write_escape_seqs(std::move(_M_out), __str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return __format::__write(std::move(_M_out), _Esc::_S_term(_M_term));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
enum class _Arg_t : unsigned char {
|
enum class _Arg_t : unsigned char {
|
||||||
_Arg_none, _Arg_bool, _Arg_c, _Arg_i, _Arg_u, _Arg_ll, _Arg_ull,
|
_Arg_none, _Arg_bool, _Arg_c, _Arg_i, _Arg_u, _Arg_ll, _Arg_ull,
|
||||||
_Arg_flt, _Arg_dbl, _Arg_ldbl, _Arg_str, _Arg_sv, _Arg_ptr, _Arg_handle,
|
_Arg_flt, _Arg_dbl, _Arg_ldbl, _Arg_str, _Arg_sv, _Arg_ptr, _Arg_handle,
|
||||||
|
|
|
@ -279,6 +279,93 @@ void test_padding()
|
||||||
VERIFY( strip_prefix(resv, 46, '*') );
|
VERIFY( strip_prefix(resv, 46, '*') );
|
||||||
VERIFY( strip_quotes(resv) );
|
VERIFY( strip_quotes(resv) );
|
||||||
VERIFY( resv == in );
|
VERIFY( resv == in );
|
||||||
|
|
||||||
|
// width is 5, size is 15
|
||||||
|
in = "\u2160\u2161\u2162\u2163\u2164";
|
||||||
|
in += in; // width is 10, size is 30
|
||||||
|
in += in; // width is 20, size is 60
|
||||||
|
in += in; // width is 40, size is 120
|
||||||
|
in += in; // width is 80, size is 240
|
||||||
|
in += in; // width is 160, size is 480
|
||||||
|
|
||||||
|
lc.assign_range(in);
|
||||||
|
|
||||||
|
resv = res = std::format("{:s}", lc);
|
||||||
|
VERIFY( resv == in );
|
||||||
|
|
||||||
|
resv = res = std::format("{:*>10s}", lc);
|
||||||
|
VERIFY( resv == in );
|
||||||
|
|
||||||
|
resv = res = std::format("{:*>200s}", lc);
|
||||||
|
VERIFY( strip_prefix(resv, 40, '*') );
|
||||||
|
VERIFY( resv == in );
|
||||||
|
|
||||||
|
resv = res = std::format("{:?s}", lc);
|
||||||
|
VERIFY( strip_quotes(resv) );
|
||||||
|
VERIFY( resv == in );
|
||||||
|
|
||||||
|
resv = res = std::format("{:*>10?s}", lc);
|
||||||
|
VERIFY( strip_quotes(resv) );
|
||||||
|
VERIFY( resv == in );
|
||||||
|
|
||||||
|
resv = res = std::format("{:*>200?s}", lc);
|
||||||
|
VERIFY( strip_prefix(resv, 38, '*') );
|
||||||
|
VERIFY( strip_quotes(resv) );
|
||||||
|
VERIFY( resv == in );
|
||||||
|
}
|
||||||
|
|
||||||
|
void test_escaping()
|
||||||
|
{
|
||||||
|
std::string res;
|
||||||
|
std::string_view resv;
|
||||||
|
|
||||||
|
const std::string_view input =
|
||||||
|
"\t\n\r\\\""
|
||||||
|
"\u008a" // Cc, Control, Line Tabulation Set,
|
||||||
|
"\u00ad" // Cf, Format, Soft Hyphen
|
||||||
|
"\u1d3d" // Lm, Modifier letter, Modifier Letter Capital Ou
|
||||||
|
"\u00a0" // Zs, Space Separator, No-Break Space (NBSP)
|
||||||
|
"\u2029" // Zp, Paragraph Separator, Paragraph Separator
|
||||||
|
"\U0001f984" // So, Other Symbol, Unicorn Face
|
||||||
|
;
|
||||||
|
const std::string_view output =
|
||||||
|
R"(\t\n\r\\\")"
|
||||||
|
R"(\u{8a})"
|
||||||
|
R"(\u{ad})"
|
||||||
|
"\u1d3d"
|
||||||
|
R"(\u{a0})"
|
||||||
|
R"(\u{2029})"
|
||||||
|
"\U0001f984";
|
||||||
|
|
||||||
|
std::forward_list<char> lc(std::from_range, input);
|
||||||
|
resv = res = std::format("{:s}", lc);
|
||||||
|
VERIFY( resv == input );
|
||||||
|
resv = res = std::format("{:?s}", lc);
|
||||||
|
VERIFY( strip_quotes(resv) );
|
||||||
|
VERIFY( resv == output );
|
||||||
|
|
||||||
|
// width is 5, size is 15
|
||||||
|
std::string in = "\u2160\u2161\u2162\u2163\u2164";
|
||||||
|
in += in; // width is 10, size is 30
|
||||||
|
in += in; // width is 20, size is 60
|
||||||
|
in += in; // width is 40, size is 120
|
||||||
|
in += in; // width is 80, size is 240
|
||||||
|
in += in; // width is 160, size is 480
|
||||||
|
std::string_view inv = in;
|
||||||
|
|
||||||
|
// last charcter is incomplete
|
||||||
|
lc.assign_range(inv.substr(0, 479));
|
||||||
|
|
||||||
|
// non-debug format, chars copied as is
|
||||||
|
resv = res = std::format("{:s}", lc);
|
||||||
|
VERIFY( resv == inv.substr(0, 479) );
|
||||||
|
|
||||||
|
// debug-format, incomplete code-point sequence is esaped
|
||||||
|
resv = res = std::format("{:?s}", lc);
|
||||||
|
VERIFY( strip_quotes(resv) );
|
||||||
|
VERIFY( resv.substr(0, 477) == inv.substr(0, 477) );
|
||||||
|
resv.remove_prefix(477);
|
||||||
|
VERIFY( resv == R"(\x{e2}\x{85})" );
|
||||||
}
|
}
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
|
@ -287,4 +374,6 @@ int main()
|
||||||
test_outputs<char>();
|
test_outputs<char>();
|
||||||
test_outputs<wchar_t>();
|
test_outputs<wchar_t>();
|
||||||
test_nested();
|
test_nested();
|
||||||
|
test_padding();
|
||||||
|
test_escaping();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue