/** This file is a part of rexy's general purpose library Copyright (C) 2022 rexy712 This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #ifndef REXY_DETAIL_FORMAT_UTF_ITERATOR_TPP #define REXY_DETAIL_FORMAT_UTF_ITERATOR_TPP namespace rexy::fmt::detail{ template class utf8_iterator { private: const Char* m_start = nullptr; const Char* m_last = nullptr; const Char* m_next = nullptr; char32_t m_value = 0; public: constexpr utf8_iterator(const Char* first, const Char* last): m_start(first), m_last(last) { m_next = convert_codepoint(m_start, m_last, m_value); } constexpr utf8_iterator& operator++(void){ m_start = m_next; if(m_start != m_last){ m_next = convert_codepoint(m_start, m_last, m_value); } return *this; } constexpr utf8_iterator operator++(int){ auto tmp = *this; ++(*this); return tmp; } constexpr char32_t operator*(void)const{ return m_value; } constexpr bool valid(void)const{ return m_start != m_last; } constexpr std::size_t byte_count(void)const{ return m_next - m_start; } private: static constexpr const Char* convert_codepoint(const Char* first, const Char* last, char32_t& codepoint){ const std::size_t maxlen = last - first; if((*first & 0x80) == 0){ codepoint = first[0]; return first + 1; }else if((*first & 0xE0) == 0xC0 && maxlen > 1){ codepoint = ((first[0] & 0x1F) << 6); codepoint += (first[1] & 0x3F); return first + 2; }else if((*first & 0xF0) == 0xE0 && maxlen > 2){ codepoint = ((first[0] & 0x0F) << 12); codepoint += ((first[1] & 0x3F) << 6); codepoint += (first[2] & 0x3F); return first + 3; }else if((*first & 0xF8) == 0xF0 && maxlen > 3){ codepoint = ((first[0] & 0x07) << 18); codepoint += ((first[1] & 0x3F) << 12); codepoint += (first[2] & 0x3F) << 6; codepoint += (first[3] & 0x3F); return first + 4; } codepoint = 0; return first; } }; } #endif