Imperfect, but still functional and fully c++17 standard compliant Small String Optimization

This commit is contained in:
rexy712 2020-08-03 17:38:52 -07:00
parent 8cf91a7505
commit ab52132fe1
2 changed files with 290 additions and 152 deletions

View File

@ -23,6 +23,7 @@
#include <utility> //forward
#include <cstddef> //size_t,ptrdiff
#include <cstring> //strlen
#include <climits> //CHAR_BIT
#include "steal.hpp"
#include "cx/utility.hpp"
@ -49,42 +50,170 @@ namespace rexy{
using const_iterator = const_pointer;
protected:
size_type m_length = 0; //length of string not including null terminator
size_type m_cap = 0; //size of current buffer not including null terminator
pointer m_data = nullptr;
static constexpr size_type EXTRA_SDATA_LEN = 0;
struct ldata{
unsigned char islong:1;
size_type capacity:(CHAR_BIT*sizeof(size_type)-1);
size_type length;
constexpr ldata(void)noexcept:
islong(0),
capacity(0),
length(0){}
};
static constexpr size_type MAX_SHORT_LEN = EXTRA_SDATA_LEN+sizeof(ldata)-2;
struct sdata{
unsigned char islong:1;
unsigned char length:(CHAR_BIT-1);
value_type data[MAX_SHORT_LEN+1];
constexpr sdata(void)noexcept:
islong(0),
length(0),
data{}{}
};
union combine_data{
ldata l;
sdata s;
constexpr combine_data(void)noexcept:
l(){}
}m__data;
pointer m_raw = m__data.s.data;
constexpr void set_islong_flag(bool b){
if(b)
m__data.l.islong = b;
else
m__data.s.islong = b;
}
constexpr bool islong(void)const{
//common standard layout union member subsequence, never undefined behavior
return m__data.l.islong;
}
constexpr pointer set_short_ptr(void){
set_islong_flag(false);
return m_raw = m__data.s.data;
}
constexpr pointer set_long_ptr(pointer ptr){
set_islong_flag(true);
return m_raw = ptr;
}
constexpr pointer get_long_ptr(void){
return m_raw;
}
constexpr pointer get_short_ptr(void){
return m_raw;
}
constexpr const_pointer get_long_ptr(void)const{
return m_raw;
}
constexpr const_pointer get_short_ptr(void)const{
return m_raw;
}
constexpr pointer get_pointer(void){
return m_raw;
}
constexpr const_pointer get_pointer(void)const{
return m_raw;
}
constexpr void set_long_length(size_type len){
m__data.l.length = len;
}
constexpr size_type get_long_length(void)const{
return m__data.l.length;
}
constexpr void set_short_length(size_type len){
m__data.s.length = static_cast<unsigned char>(len);
}
constexpr size_type get_short_length(void)const{
return m__data.s.length;
}
constexpr void set_long_capacity(size_type cap){
m__data.l.capacity = cap;
}
constexpr void set_short_capacity(size_type){}
constexpr size_type get_long_capacity(void)const{
return m__data.l.capacity;
}
constexpr size_type get_short_capacity(void)const{
return MAX_SHORT_LEN;
}
constexpr void set_length(size_type s){
if(islong())
set_long_length(s);
else
set_short_length(s);
}
protected:
constexpr string_base(void)noexcept = default;
constexpr string_base(size_type len)noexcept:
m_cap(len){}
//Initialize without copying
constexpr string_base(pointer data, size_type len, size_type cap)noexcept{
if(cap > MAX_SHORT_LEN){
set_islong_flag(true);
set_long_ptr(data);
set_long_length(len);
set_long_capacity(cap);
}else if(len){
set_islong_flag(false);
pointer raw = set_short_ptr();
if(len)
memcpy(raw, data, sizeof(value_type)*len);
raw[len] = 0;
set_short_length(len);
set_short_capacity(cap);
}
}
constexpr string_base(pointer data, size_type len)noexcept:
m_cap(len), m_data(data){}
constexpr string_base(pointer data, size_type len, size_type cap)noexcept:
m_length(len), m_cap(cap), m_data(data){}
//Copy ctor (do nothing)
constexpr string_base(const string_base&)noexcept{}
string_base(data, len, len){}
//Copy ctor, copy length+capacity+short string, not long string value
constexpr string_base(const string_base& s)noexcept:
m__data(s.m__data){}
constexpr string_base(string_base&& s)noexcept:
m__data(std::move(s.m__data)),
m_raw(s.islong() ? s.m_raw : m__data.s.data)
{
s.set_islong_flag(false);
}
~string_base(void)noexcept = default;
constexpr string_base& operator=(string_base&& s)noexcept{
std::swap(m__data, s.m__data);
if(this->islong())
std::swap(m_raw, s.m_raw);
else{
s.m_raw = m_raw;
m_raw = m__data.s.data;
}
return *this;
}
public:
//Stop managing stored pointer. Does not free.
constexpr pointer release(void)noexcept{return cx::exchange(m_data, nullptr);}
//Length of string not including null terminator
constexpr size_type length(void)const noexcept{return m_length;}
constexpr size_type capacity(void)const noexcept{return m_cap;}
constexpr size_type length(void)const noexcept{
if(islong())
return get_long_length();
else
return get_short_length();
}
constexpr size_type capacity(void)const noexcept{
if(islong())
return get_long_capacity();
else
return get_short_capacity();
}
//direct access to managed pointer
constexpr pointer c_str(void)noexcept{return m_data;}
constexpr const_pointer c_str(void)const noexcept{return m_data;}
constexpr pointer get(void)noexcept{return m_data;}
constexpr const_pointer get(void)const noexcept{return m_data;}
constexpr operator pointer(void)noexcept{return m_data;}
constexpr operator const_pointer(void)const noexcept{return m_data;}
//true if m_data is not null
constexpr bool valid(void)const noexcept{return m_data;}
constexpr pointer c_str(void)noexcept{return get_pointer();}
constexpr const_pointer c_str(void)const noexcept{return get_pointer();}
constexpr pointer get(void)noexcept{return get_pointer();}
constexpr const_pointer get(void)const noexcept{return get_pointer();}
constexpr operator pointer(void)noexcept{return get_pointer();}
constexpr operator const_pointer(void)const noexcept{return get_pointer();}
//true if m_data is not empty
constexpr bool valid(void)const noexcept{return length() > 0;}
constexpr reference operator[](size_type i)noexcept{return m_data[i];}
constexpr const_reference operator[](size_type i)const noexcept{return m_data[i];}
constexpr reference operator[](size_type i)noexcept{return get_pointer()[i];}
constexpr const_reference operator[](size_type i)const noexcept{return get_pointer()[i];}
};
@ -104,12 +233,9 @@ namespace rexy{
using const_iterator = typename string_base<Char>::const_iterator;
using allocator_type = Allocator;
protected:
using string_base<Char>::m_data;
using string_base<Char>::m_length;
using string_base<Char>::m_cap;
private:
void _copy_construct_string(const_pointer data, size_type len, size_type cap)
noexcept(noexcept(this->allocate(0)));
basic_string& _copy_string(const_pointer s, size_type len)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)));
@ -120,16 +246,15 @@ namespace rexy{
constexpr basic_string(rexy::steal<pointer> data, size_type len, size_type cap)noexcept;
constexpr basic_string(rexy::steal<pointer> data)noexcept;
basic_string(const_pointer data, size_type len)noexcept(noexcept(this->allocate(0)));
basic_string(const_pointer data, size_type len, size_type cap)noexcept(noexcept(this->allocate(0)));
basic_string(const_pointer data)noexcept(noexcept(this->allocate(0)));
explicit basic_string(size_type len)noexcept(noexcept(this->allocate(0)));
basic_string(size_type len, size_type cap)noexcept(noexcept(this->allocate(0)));
//normal copy and move ctors
basic_string(const basic_string& b)noexcept(noexcept(this->allocate(0)));
constexpr basic_string(basic_string&& s)noexcept(noexcept(cx::exchange(s.m_data, nullptr)));
template<class C>
basic_string(const string_base<C>& b)noexcept(noexcept(this->allocate(0)));
constexpr basic_string(basic_string&& s)noexcept;
basic_string(const string_base<Char>&)noexcept(noexcept(this->allocate(0)));
//dtor
~basic_string(void)noexcept(noexcept(this->deallocate(nullptr, 0)));
@ -139,13 +264,12 @@ namespace rexy{
noexcept(this->deallocate(nullptr,0)));
constexpr basic_string& operator=(basic_string&& s)noexcept;
//Copy from c string
basic_string& operator=(const_pointer c)
basic_string& operator=(const string_base<Char>& s)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)));
//Copy from other string_base
template<class C>
basic_string& operator=(const string_base<C>& s)
//Copy from c string
basic_string& operator=(const_pointer c)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)));
@ -162,11 +286,7 @@ namespace rexy{
void append(const_pointer data)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)));
template<class C>
void append(const string_base<C>& s)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)));
pointer release(void)noexcept(noexcept(this->allocate(0)));
using detail::hasallocator<Allocator>::allocator;
};
@ -218,13 +338,8 @@ namespace rexy{
using iterator = typename string_base<Char>::iterator;
using const_iterator = typename string_base<Char>::const_iterator;
protected:
using string_base<Char>::m_data;
using string_base<Char>::m_length;
using string_base<Char>::m_cap;
public:
constexpr static_string(void)noexcept = default;
constexpr static_string(void)noexcept;
constexpr static_string(const_pointer str, size_type len)noexcept;
constexpr static_string(const_pointer c)noexcept;
constexpr static_string(const static_string& s)noexcept;

View File

@ -30,15 +30,35 @@
namespace rexy{
//allocate string if longer than small string capacity, copy otherwise
template<class Char, class Allocator>
void basic_string<Char,Allocator>::_copy_construct_string(const_pointer data, size_type len, size_type cap)
noexcept(noexcept(this->allocate(0)))
{
if(cap > this->get_short_capacity()){
this->set_islong_flag(true);
pointer raw = this->set_long_ptr(this->allocate(sizeof(value_type)*(cap+1)));
if(data)
memcpy(raw, data, sizeof(value_type)*len);
raw[len] = 0;
this->set_long_length(len);
this->set_long_capacity(cap);
}else{
this->set_islong_flag(false);
pointer raw = this->set_short_ptr();
if(data)
memcpy(raw, data, sizeof(value_type)*len);
raw[len] = 0;
this->set_short_length(len);
this->set_short_capacity(cap);
}
}
template<class Char, class Allocator>
constexpr basic_string<Char,Allocator>::basic_string(void)noexcept{}
template<class Char, class Allocator>
constexpr basic_string<Char,Allocator>::basic_string(rexy::steal<pointer> data)noexcept:
string_base<Char>(data.value() ? cx::strlen(data.value()) : 0)
{
m_data = data.value();
m_length = m_cap;
}
basic_string(data.value(), data.value() ? cx::strlen(data.value()) : 0){}
template<class Char, class Allocator>
constexpr basic_string<Char,Allocator>::basic_string(rexy::steal<pointer> data, size_type len)noexcept:
string_base<Char>(data.value(), len, len){}
@ -46,65 +66,47 @@ namespace rexy{
constexpr basic_string<Char,Allocator>::basic_string(rexy::steal<pointer> data, size_type len, size_type cap)noexcept:
string_base<Char>(data.value(), len, cap){}
template<class Char, class Allocator>
basic_string<Char,Allocator>::basic_string(const_pointer data, size_type len, size_type cap)
noexcept(noexcept(this->allocate(0)))
{
_copy_construct_string(data, len, cap);
}
template<class Char, class Allocator>
basic_string<Char,Allocator>::basic_string(const_pointer data, size_type len)
noexcept(noexcept(this->allocate(0))):
string_base<Char>(len ? this->allocate(sizeof(Char)*(len+1)) : nullptr, len, len)
{
if(len){
memcpy(m_data, data, len*sizeof(Char));
m_data[len] = 0;
}
}
basic_string(data, len, len){}
template<class Char, class Allocator>
basic_string<Char,Allocator>::basic_string(const_pointer data)
noexcept(noexcept(this->allocate(0))):
string_base<Char>(data ? cx::strlen(data) : 0)
{
if(m_cap){
m_data = this->allocate(sizeof(Char)*(m_cap+1));
memcpy(m_data, data, sizeof(Char)*m_cap);
m_length = m_cap;
}
}
basic_string(data, data ? cx::strlen(data) : 0){}
template<class Char, class Allocator>
basic_string<Char,Allocator>::basic_string(size_type len)
basic_string<Char,Allocator>::basic_string(size_type cap)
noexcept(noexcept(this->allocate(0))):
string_base<Char>(len ? this->allocate(sizeof(Char)*(len+1)) : nullptr, len)
{
if(len)
m_data[len] = 0;
}
basic_string(size_type{0}, cap){}
template<class Char, class Allocator>
basic_string<Char,Allocator>::basic_string(size_type len, size_type cap)
noexcept(noexcept(this->allocate(0))):
string_base<Char>(len ? this->allocate(sizeof(Char)*(len+1)) : nullptr, len, cap)
noexcept(noexcept(this->allocate(0)))
{
if(len)
m_data[len] = 0;
_copy_construct_string(nullptr, len, cap);
}
//normal copy and move ctors
template<class Char, class Allocator>
basic_string<Char,Allocator>::basic_string(const basic_string& b)
noexcept(noexcept(this->allocate(0))):
string_base<Char>(b.m_length ? this->allocate(sizeof(Char)*(b.m_length+1)) : nullptr, b.m_length, b.m_length)
detail::hasallocator<Allocator>(b)
{
if(b.m_length)
memcpy(m_data, b.m_data, sizeof(Char)*(b.m_length+1));
_copy_construct_string(b.get(), b.length(), b.capacity());
}
template<class Char, class Allocator>
constexpr basic_string<Char,Allocator>::basic_string(basic_string&& s)
noexcept(noexcept(cx::exchange(s.m_data, nullptr))):
string_base<Char>(cx::exchange(s.m_data, nullptr), s.m_length, s.m_cap){}
constexpr basic_string<Char,Allocator>::basic_string(basic_string&& s)noexcept:
detail::hasallocator<Allocator>(std::move(s)),
string_base<Char>(std::move(s)){}
template<class Char, class Allocator>
template<class C>
basic_string<Char,Allocator>::basic_string(const string_base<C>& b)
noexcept(noexcept(this->allocate(0))):
string_base<Char>(b.length() ? this->allocate(sizeof(Char)*(b.length()+1)) : nullptr, b.length(), b.length())
basic_string<Char,Allocator>::basic_string(const string_base<Char>& b)
noexcept(noexcept(this->allocate(0)))
{
if(b.length())
memcpy(m_data, b.get(), sizeof(Char)*(b.length()+1));
_copy_construct_string(b.get(), b.length(), b.capacity());
}
//dtor
@ -112,7 +114,8 @@ namespace rexy{
basic_string<Char,Allocator>::~basic_string(void)
noexcept(noexcept(this->deallocate(nullptr, 0)))
{
this->deallocate(m_data, sizeof(Char)*(m_cap+1));
if(this->islong())
this->deallocate(this->get_pointer(), sizeof(value_type)*(this->get_long_capacity()+1));
}
template<class Char, class Allocator>
@ -120,9 +123,9 @@ namespace rexy{
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr, 0)))
{
if(s.m_length < m_cap){
memcpy(m_data, s.m_data, sizeof(Char)*(s.m_length+1));
m_length = s.m_length;
if(s.length() < this->capacity()){
memcpy(this->get_pointer(), s.get_pointer(), sizeof(value_type)*(s.length()+1));
this->set_length(s.length());
return *this;
}
basic_string tmp(s);
@ -130,11 +133,16 @@ namespace rexy{
}
template<class Char, class Allocator>
constexpr basic_string<Char,Allocator>& basic_string<Char,Allocator>::operator=(basic_string&& s)noexcept{
cx::swap(m_data, s.m_data);
m_length = s.m_length;
m_cap = s.m_cap;
string_base<Char>::operator=(std::move(s));
return *this;
}
template<class Char, class Allocator>
basic_string<Char,Allocator>& basic_string<Char,Allocator>::operator=(const string_base<Char>& s)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)))
{
return (*this = basic_string(s));
}
//Copy from c string
template<class Char, class Allocator>
basic_string<Char,Allocator>& basic_string<Char,Allocator>::operator=(const_pointer c)
@ -143,61 +151,53 @@ namespace rexy{
{
return _copy_string(c, cx::strlen(c));
}
//Copy from other string_base
template<class Char, class Allocator>
template<class C>
basic_string<Char,Allocator>& basic_string<Char,Allocator>::operator=(const string_base<C>& s)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)))
{
return _copy_string(s.get(), s.length());
}
//Replace managed pointer. Frees existing value
template<class Char, class Allocator>
void basic_string<Char,Allocator>::reset(pointer val)
noexcept(noexcept(this->deallocate(nullptr,0)))
{
this->deallocate(m_data,sizeof(Char)*(m_cap+1));
m_data = val;
m_length = val ? cx::strlen(val) : 0;
m_cap = m_length;
reset(val, val ? cx::strlen(val) : 0);
}
template<class Char, class Allocator>
void basic_string<Char,Allocator>::reset(pointer val, size_type len)
noexcept(noexcept(this->deallocate(nullptr,0)))
{
this->deallocate(m_data,sizeof(Char)*(m_cap+1));
m_data = val;
m_length = len;
m_cap = len;
if(this->islong())
this->deallocate(this->get_long_ptr(),sizeof(value_type)*(this->get_long_capacity()+1));
this->set_islong_flag(true);
this->set_long_ptr(val);
this->set_long_length(len);
this->set_long_capacity(len);
}
template<class Char, class Allocator>
bool basic_string<Char,Allocator>::resize(size_type newsize)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)))
{
if(newsize < m_cap)
if(newsize < this->capacity())
return false;
return (*this = basic_string(m_data, newsize));
if(!this->islong() && newsize < this->get_short_capacity())
return false;
return (*this = basic_string(this->get_pointer(), newsize));
}
template<class Char, class Allocator>
void basic_string<Char,Allocator>::append(const_pointer data, size_type len)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)))
{
if(len+m_length <= m_cap){
memcpy(m_data+m_length, data, sizeof(Char)*len);
m_length += len;
m_data[m_length] = 0;
}else if(!m_data){
*this = basic_string(len, len);
memcpy(m_data, data, sizeof(Char)*len);
m_data[len] = 0;
size_type mylen = this->length();
size_type mycap = this->capacity();
pointer raw = this->get_pointer();
if(mylen+len <= mycap){
memcpy(raw+mylen, data, sizeof(value_type)*len);
this->set_length(mylen+len);
raw[mylen+len] = 0;
}else{
auto newsize = cx::max(m_length+len, m_cap*2);
auto newsize = cx::max(mylen+len, mycap*2);
basic_string tmp(newsize);
tmp.append(m_data, m_length);
tmp.append(raw, mylen);
tmp.append(data, len);
*this = std::move(tmp);
}
@ -210,13 +210,24 @@ namespace rexy{
if(data)
append(data, cx::strlen(data));
}
template<class Char, class Allocator>
template<class C>
void basic_string<Char,Allocator>::append(const string_base<C>& s)
noexcept(noexcept(this->allocate(0)) &&
noexcept(this->deallocate(nullptr,0)))
{
append(s.get(), s.length());
auto basic_string<Char,Allocator>::release(void)noexcept(noexcept(this->allocate(0))) -> pointer{
if(this->islong()){
pointer raw = this->get_long_ptr();
this->set_islong_flag(false);
this->set_short_ptr();
this->set_short_length(0);
return raw;
}
size_type len = this->get_short_length();
pointer raw = this->get_short_ptr();
pointer retval = this->allocate(sizeof(value_type)*len+1);
memcpy(retval, raw, sizeof(value_type)*len);
retval[len] = 0;
raw[0] = 0;
this->set_short_length(0);
return retval;
}
template<class Char, class Allocator>
@ -226,10 +237,11 @@ namespace rexy{
{
if(!len)
return (*this = basic_string(rexy::steal<pointer>(nullptr), 0, 0));
if(len <= m_length){
m_length = len;
memcpy(m_data, s, sizeof(Char)*len);
m_data[len] = 0;
if(len <= this->length()){
this->set_length(len);
pointer raw = this->get_pointer();
memcpy(raw, s, sizeof(value_type)*len);
raw[len] = 0;
return *this;
}
return (*this = basic_string(s, len));
@ -255,19 +267,29 @@ namespace rexy{
template<class Char>
constexpr static_string<Char>::static_string(const_pointer str, size_type len)noexcept:
string_base<Char>(const_cast<pointer>(str), len, len){}
constexpr static_string<Char>::static_string(void)noexcept:
static_string(nullptr, 0){}
template<class Char>
constexpr static_string<Char>::static_string(const_pointer str, size_type len)noexcept{
if(!str)
this->set_long_ptr(nullptr);
else
this->set_long_ptr(const_cast<pointer>(str));
this->set_long_length(len);
this->set_long_capacity(len);
}
template<class Char>
constexpr static_string<Char>::static_string(const static_string& s)noexcept:
string_base<Char>(s.m_data, s.m_length, s.m_length){}
static_string(s.get_long_ptr(), s.get_long_length()){}
template<class Char>
constexpr static_string<Char>::static_string(static_string&& s)noexcept:
string_base<Char>(s.m_data, s.m_length, s.m_length){}
static_string(s.get_long_ptr(), s.get_long_length()){}
template<class Char>
constexpr static_string<Char>& static_string<Char>::operator=(const static_string& s)noexcept{
m_data = s.m_data;
m_length = s.m_length;
m_cap = s.m_cap;
this->set_long_ptr(const_cast<pointer>(s.get_long_ptr()));
this->set_long_length(s.get_long_length());
this->set_long_capacity(s.get_long_capacity());
return *this;
}
template<class Char>
@ -275,16 +297,17 @@ namespace rexy{
static_string(c, cx::strlen(c)){}
template<class Char>
constexpr static_string<Char>& static_string<Char>::operator=(const_pointer c)noexcept{
m_data = const_cast<pointer>(c);
m_length = cx::strlen(c);
m_cap = m_length;
size_type len = cx::strlen(c);
this->set_long_ptr(const_cast<pointer>(c));
this->set_long_length(len);
this->set_long_capacity(len);
return *this;
}
template<class Char>
constexpr static_string<Char>& static_string<Char>::operator=(static_string&& s)noexcept{
m_data = s.m_data;
m_length = s.m_length;
m_cap = s.m_cap;
this->set_long_ptr(s.get_long_ptr());
this->set_long_length(s.get_long_length());
this->set_long_capacity(s.get_long_capacity());
return *this;
}