92 lines
2.5 KiB
C++
92 lines
2.5 KiB
C++
/**
|
|
This file is a part of rexy's general purpose library
|
|
Copyright (C) 2022 rexy712
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#ifndef REXY_DETAIL_FORMAT_UTF_ITERATOR_TPP
|
|
#define REXY_DETAIL_FORMAT_UTF_ITERATOR_TPP
|
|
|
|
namespace rexy::fmt::detail{
|
|
|
|
template<class Char>
|
|
class utf8_iterator
|
|
{
|
|
private:
|
|
const Char* m_start = nullptr;
|
|
const Char* m_last = nullptr;
|
|
const Char* m_next = nullptr;
|
|
char32_t m_value = 0;
|
|
|
|
public:
|
|
constexpr utf8_iterator(const Char* first, const Char* last):
|
|
m_start(first), m_last(last)
|
|
{
|
|
m_next = convert_codepoint(m_start, m_last, m_value);
|
|
}
|
|
|
|
constexpr utf8_iterator& operator++(void){
|
|
m_start = m_next;
|
|
if(m_start != m_last){
|
|
m_next = convert_codepoint(m_start, m_last, m_value);
|
|
}
|
|
return *this;
|
|
}
|
|
constexpr utf8_iterator operator++(int){
|
|
auto tmp = *this;
|
|
++(*this);
|
|
return tmp;
|
|
}
|
|
constexpr char32_t operator*(void)const{
|
|
return m_value;
|
|
}
|
|
constexpr bool valid(void)const{
|
|
return m_start != m_last;
|
|
}
|
|
constexpr std::size_t byte_count(void)const{
|
|
return m_next - m_start;
|
|
}
|
|
|
|
private:
|
|
static constexpr const Char* convert_codepoint(const Char* first, const Char* last, char32_t& codepoint){
|
|
const std::size_t maxlen = last - first;
|
|
if((*first & 0x80) == 0){
|
|
codepoint = first[0];
|
|
return first + 1;
|
|
}else if((*first & 0xE0) == 0xC0 && maxlen > 1){
|
|
codepoint = ((first[0] & 0x1F) << 6);
|
|
codepoint += (first[1] & 0x3F);
|
|
return first + 2;
|
|
}else if((*first & 0xF0) == 0xE0 && maxlen > 2){
|
|
codepoint = ((first[0] & 0x0F) << 12);
|
|
codepoint += ((first[1] & 0x3F) << 6);
|
|
codepoint += (first[2] & 0x3F);
|
|
return first + 3;
|
|
}else if((*first & 0xF8) == 0xF0 && maxlen > 3){
|
|
codepoint = ((first[0] & 0x07) << 18);
|
|
codepoint += ((first[1] & 0x3F) << 12);
|
|
codepoint += (first[2] & 0x3F) << 6;
|
|
codepoint += (first[3] & 0x3F);
|
|
return first + 4;
|
|
}
|
|
codepoint = 0;
|
|
return first;
|
|
}
|
|
};
|
|
|
|
}
|
|
|
|
#endif
|