/**
This file is a part of rexy's general purpose library
Copyright (C) 2022 rexy712
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#ifndef REXY_DETAIL_FORMAT_UTF_ITERATOR_TPP
#define REXY_DETAIL_FORMAT_UTF_ITERATOR_TPP
namespace rexy::fmt::detail{
template
class utf8_iterator
{
private:
const Char* m_start = nullptr;
const Char* m_last = nullptr;
const Char* m_next = nullptr;
char32_t m_value = 0;
public:
constexpr utf8_iterator(const Char* first, const Char* last):
m_start(first), m_last(last)
{
m_next = convert_codepoint(m_start, m_last, m_value);
}
constexpr utf8_iterator& operator++(void){
m_start = m_next;
if(m_start != m_last){
m_next = convert_codepoint(m_start, m_last, m_value);
}
return *this;
}
constexpr utf8_iterator operator++(int){
auto tmp = *this;
++(*this);
return tmp;
}
constexpr char32_t operator*(void)const{
return m_value;
}
constexpr bool valid(void)const{
return m_start != m_last;
}
constexpr std::size_t byte_count(void)const{
return m_next - m_start;
}
private:
static constexpr const Char* convert_codepoint(const Char* first, const Char* last, char32_t& codepoint){
const std::size_t maxlen = last - first;
if((*first & 0x80) == 0){
codepoint = first[0];
return first + 1;
}else if((*first & 0xE0) == 0xC0 && maxlen > 1){
codepoint = ((first[0] & 0x1F) << 6);
codepoint += (first[1] & 0x3F);
return first + 2;
}else if((*first & 0xF0) == 0xE0 && maxlen > 2){
codepoint = ((first[0] & 0x0F) << 12);
codepoint += ((first[1] & 0x3F) << 6);
codepoint += (first[2] & 0x3F);
return first + 3;
}else if((*first & 0xF8) == 0xF0 && maxlen > 3){
codepoint = ((first[0] & 0x07) << 18);
codepoint += ((first[1] & 0x3F) << 12);
codepoint += (first[2] & 0x3F) << 6;
codepoint += (first[3] & 0x3F);
return first + 4;
}
codepoint = 0;
return first;
}
};
}
#endif