From ab52132fe1e211c8512edee59e8e89742aae65b5 Mon Sep 17 00:00:00 2001 From: rexy712 Date: Mon, 3 Aug 2020 17:38:52 -0700 Subject: [PATCH] Imperfect, but still functional and fully c++17 standard compliant Small String Optimization --- include/rexy/string_base.hpp | 215 +++++++++++++++++++++++++-------- include/rexy/string_base.tpp | 227 +++++++++++++++++++---------------- 2 files changed, 290 insertions(+), 152 deletions(-) diff --git a/include/rexy/string_base.hpp b/include/rexy/string_base.hpp index cfddc28..f7cd07c 100644 --- a/include/rexy/string_base.hpp +++ b/include/rexy/string_base.hpp @@ -23,6 +23,7 @@ #include //forward #include //size_t,ptrdiff #include //strlen +#include //CHAR_BIT #include "steal.hpp" #include "cx/utility.hpp" @@ -49,42 +50,170 @@ namespace rexy{ using const_iterator = const_pointer; protected: - size_type m_length = 0; //length of string not including null terminator - size_type m_cap = 0; //size of current buffer not including null terminator - pointer m_data = nullptr; + static constexpr size_type EXTRA_SDATA_LEN = 0; + + struct ldata{ + unsigned char islong:1; + size_type capacity:(CHAR_BIT*sizeof(size_type)-1); + size_type length; + constexpr ldata(void)noexcept: + islong(0), + capacity(0), + length(0){} + }; + + static constexpr size_type MAX_SHORT_LEN = EXTRA_SDATA_LEN+sizeof(ldata)-2; + struct sdata{ + unsigned char islong:1; + unsigned char length:(CHAR_BIT-1); + value_type data[MAX_SHORT_LEN+1]; + constexpr sdata(void)noexcept: + islong(0), + length(0), + data{}{} + }; + union combine_data{ + ldata l; + sdata s; + constexpr combine_data(void)noexcept: + l(){} + }m__data; + pointer m_raw = m__data.s.data; + + constexpr void set_islong_flag(bool b){ + if(b) + m__data.l.islong = b; + else + m__data.s.islong = b; + } + constexpr bool islong(void)const{ + //common standard layout union member subsequence, never undefined behavior + return m__data.l.islong; + } + constexpr pointer set_short_ptr(void){ + set_islong_flag(false); + return m_raw = m__data.s.data; + } + constexpr pointer set_long_ptr(pointer ptr){ + set_islong_flag(true); + return m_raw = ptr; + } + constexpr pointer get_long_ptr(void){ + return m_raw; + } + constexpr pointer get_short_ptr(void){ + return m_raw; + } + constexpr const_pointer get_long_ptr(void)const{ + return m_raw; + } + constexpr const_pointer get_short_ptr(void)const{ + return m_raw; + } + constexpr pointer get_pointer(void){ + return m_raw; + } + constexpr const_pointer get_pointer(void)const{ + return m_raw; + } + constexpr void set_long_length(size_type len){ + m__data.l.length = len; + } + constexpr size_type get_long_length(void)const{ + return m__data.l.length; + } + constexpr void set_short_length(size_type len){ + m__data.s.length = static_cast(len); + } + constexpr size_type get_short_length(void)const{ + return m__data.s.length; + } + constexpr void set_long_capacity(size_type cap){ + m__data.l.capacity = cap; + } + constexpr void set_short_capacity(size_type){} + constexpr size_type get_long_capacity(void)const{ + return m__data.l.capacity; + } + constexpr size_type get_short_capacity(void)const{ + return MAX_SHORT_LEN; + } + constexpr void set_length(size_type s){ + if(islong()) + set_long_length(s); + else + set_short_length(s); + } protected: constexpr string_base(void)noexcept = default; - constexpr string_base(size_type len)noexcept: - m_cap(len){} //Initialize without copying + constexpr string_base(pointer data, size_type len, size_type cap)noexcept{ + if(cap > MAX_SHORT_LEN){ + set_islong_flag(true); + set_long_ptr(data); + set_long_length(len); + set_long_capacity(cap); + }else if(len){ + set_islong_flag(false); + pointer raw = set_short_ptr(); + if(len) + memcpy(raw, data, sizeof(value_type)*len); + raw[len] = 0; + set_short_length(len); + set_short_capacity(cap); + } + } constexpr string_base(pointer data, size_type len)noexcept: - m_cap(len), m_data(data){} - constexpr string_base(pointer data, size_type len, size_type cap)noexcept: - m_length(len), m_cap(cap), m_data(data){} - //Copy ctor (do nothing) - constexpr string_base(const string_base&)noexcept{} + string_base(data, len, len){} + //Copy ctor, copy length+capacity+short string, not long string value + constexpr string_base(const string_base& s)noexcept: + m__data(s.m__data){} + constexpr string_base(string_base&& s)noexcept: + m__data(std::move(s.m__data)), + m_raw(s.islong() ? s.m_raw : m__data.s.data) + { + s.set_islong_flag(false); + } ~string_base(void)noexcept = default; + constexpr string_base& operator=(string_base&& s)noexcept{ + std::swap(m__data, s.m__data); + if(this->islong()) + std::swap(m_raw, s.m_raw); + else{ + s.m_raw = m_raw; + m_raw = m__data.s.data; + } + return *this; + } + public: - //Stop managing stored pointer. Does not free. - constexpr pointer release(void)noexcept{return cx::exchange(m_data, nullptr);} - //Length of string not including null terminator - constexpr size_type length(void)const noexcept{return m_length;} - constexpr size_type capacity(void)const noexcept{return m_cap;} + constexpr size_type length(void)const noexcept{ + if(islong()) + return get_long_length(); + else + return get_short_length(); + } + constexpr size_type capacity(void)const noexcept{ + if(islong()) + return get_long_capacity(); + else + return get_short_capacity(); + } //direct access to managed pointer - constexpr pointer c_str(void)noexcept{return m_data;} - constexpr const_pointer c_str(void)const noexcept{return m_data;} - constexpr pointer get(void)noexcept{return m_data;} - constexpr const_pointer get(void)const noexcept{return m_data;} - constexpr operator pointer(void)noexcept{return m_data;} - constexpr operator const_pointer(void)const noexcept{return m_data;} - //true if m_data is not null - constexpr bool valid(void)const noexcept{return m_data;} + constexpr pointer c_str(void)noexcept{return get_pointer();} + constexpr const_pointer c_str(void)const noexcept{return get_pointer();} + constexpr pointer get(void)noexcept{return get_pointer();} + constexpr const_pointer get(void)const noexcept{return get_pointer();} + constexpr operator pointer(void)noexcept{return get_pointer();} + constexpr operator const_pointer(void)const noexcept{return get_pointer();} + //true if m_data is not empty + constexpr bool valid(void)const noexcept{return length() > 0;} - constexpr reference operator[](size_type i)noexcept{return m_data[i];} - constexpr const_reference operator[](size_type i)const noexcept{return m_data[i];} + constexpr reference operator[](size_type i)noexcept{return get_pointer()[i];} + constexpr const_reference operator[](size_type i)const noexcept{return get_pointer()[i];} }; @@ -104,12 +233,9 @@ namespace rexy{ using const_iterator = typename string_base::const_iterator; using allocator_type = Allocator; - protected: - using string_base::m_data; - using string_base::m_length; - using string_base::m_cap; - private: + void _copy_construct_string(const_pointer data, size_type len, size_type cap) + noexcept(noexcept(this->allocate(0))); basic_string& _copy_string(const_pointer s, size_type len) noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr,0))); @@ -120,16 +246,15 @@ namespace rexy{ constexpr basic_string(rexy::steal data, size_type len, size_type cap)noexcept; constexpr basic_string(rexy::steal data)noexcept; basic_string(const_pointer data, size_type len)noexcept(noexcept(this->allocate(0))); + basic_string(const_pointer data, size_type len, size_type cap)noexcept(noexcept(this->allocate(0))); basic_string(const_pointer data)noexcept(noexcept(this->allocate(0))); explicit basic_string(size_type len)noexcept(noexcept(this->allocate(0))); basic_string(size_type len, size_type cap)noexcept(noexcept(this->allocate(0))); //normal copy and move ctors basic_string(const basic_string& b)noexcept(noexcept(this->allocate(0))); - constexpr basic_string(basic_string&& s)noexcept(noexcept(cx::exchange(s.m_data, nullptr))); - - template - basic_string(const string_base& b)noexcept(noexcept(this->allocate(0))); + constexpr basic_string(basic_string&& s)noexcept; + basic_string(const string_base&)noexcept(noexcept(this->allocate(0))); //dtor ~basic_string(void)noexcept(noexcept(this->deallocate(nullptr, 0))); @@ -139,13 +264,12 @@ namespace rexy{ noexcept(this->deallocate(nullptr,0))); constexpr basic_string& operator=(basic_string&& s)noexcept; - //Copy from c string - basic_string& operator=(const_pointer c) + + basic_string& operator=(const string_base& s) noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr,0))); - //Copy from other string_base - template - basic_string& operator=(const string_base& s) + //Copy from c string + basic_string& operator=(const_pointer c) noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr,0))); @@ -162,11 +286,7 @@ namespace rexy{ void append(const_pointer data) noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr,0))); - template - void append(const string_base& s) - noexcept(noexcept(this->allocate(0)) && - noexcept(this->deallocate(nullptr,0))); - + pointer release(void)noexcept(noexcept(this->allocate(0))); using detail::hasallocator::allocator; }; @@ -218,13 +338,8 @@ namespace rexy{ using iterator = typename string_base::iterator; using const_iterator = typename string_base::const_iterator; - protected: - using string_base::m_data; - using string_base::m_length; - using string_base::m_cap; - public: - constexpr static_string(void)noexcept = default; + constexpr static_string(void)noexcept; constexpr static_string(const_pointer str, size_type len)noexcept; constexpr static_string(const_pointer c)noexcept; constexpr static_string(const static_string& s)noexcept; diff --git a/include/rexy/string_base.tpp b/include/rexy/string_base.tpp index d0b20bd..49970e2 100644 --- a/include/rexy/string_base.tpp +++ b/include/rexy/string_base.tpp @@ -30,15 +30,35 @@ namespace rexy{ + //allocate string if longer than small string capacity, copy otherwise + template + void basic_string::_copy_construct_string(const_pointer data, size_type len, size_type cap) + noexcept(noexcept(this->allocate(0))) + { + if(cap > this->get_short_capacity()){ + this->set_islong_flag(true); + pointer raw = this->set_long_ptr(this->allocate(sizeof(value_type)*(cap+1))); + if(data) + memcpy(raw, data, sizeof(value_type)*len); + raw[len] = 0; + this->set_long_length(len); + this->set_long_capacity(cap); + }else{ + this->set_islong_flag(false); + pointer raw = this->set_short_ptr(); + if(data) + memcpy(raw, data, sizeof(value_type)*len); + raw[len] = 0; + this->set_short_length(len); + this->set_short_capacity(cap); + } + } + template constexpr basic_string::basic_string(void)noexcept{} template constexpr basic_string::basic_string(rexy::steal data)noexcept: - string_base(data.value() ? cx::strlen(data.value()) : 0) - { - m_data = data.value(); - m_length = m_cap; - } + basic_string(data.value(), data.value() ? cx::strlen(data.value()) : 0){} template constexpr basic_string::basic_string(rexy::steal data, size_type len)noexcept: string_base(data.value(), len, len){} @@ -46,65 +66,47 @@ namespace rexy{ constexpr basic_string::basic_string(rexy::steal data, size_type len, size_type cap)noexcept: string_base(data.value(), len, cap){} template + basic_string::basic_string(const_pointer data, size_type len, size_type cap) + noexcept(noexcept(this->allocate(0))) + { + _copy_construct_string(data, len, cap); + } + template basic_string::basic_string(const_pointer data, size_type len) noexcept(noexcept(this->allocate(0))): - string_base(len ? this->allocate(sizeof(Char)*(len+1)) : nullptr, len, len) - { - if(len){ - memcpy(m_data, data, len*sizeof(Char)); - m_data[len] = 0; - } - } + basic_string(data, len, len){} template basic_string::basic_string(const_pointer data) noexcept(noexcept(this->allocate(0))): - string_base(data ? cx::strlen(data) : 0) - { - if(m_cap){ - m_data = this->allocate(sizeof(Char)*(m_cap+1)); - memcpy(m_data, data, sizeof(Char)*m_cap); - m_length = m_cap; - } - } + basic_string(data, data ? cx::strlen(data) : 0){} template - basic_string::basic_string(size_type len) + basic_string::basic_string(size_type cap) noexcept(noexcept(this->allocate(0))): - string_base(len ? this->allocate(sizeof(Char)*(len+1)) : nullptr, len) - { - if(len) - m_data[len] = 0; - } + basic_string(size_type{0}, cap){} template basic_string::basic_string(size_type len, size_type cap) - noexcept(noexcept(this->allocate(0))): - string_base(len ? this->allocate(sizeof(Char)*(len+1)) : nullptr, len, cap) + noexcept(noexcept(this->allocate(0))) { - if(len) - m_data[len] = 0; + _copy_construct_string(nullptr, len, cap); } //normal copy and move ctors template basic_string::basic_string(const basic_string& b) noexcept(noexcept(this->allocate(0))): - string_base(b.m_length ? this->allocate(sizeof(Char)*(b.m_length+1)) : nullptr, b.m_length, b.m_length) + detail::hasallocator(b) { - if(b.m_length) - memcpy(m_data, b.m_data, sizeof(Char)*(b.m_length+1)); + _copy_construct_string(b.get(), b.length(), b.capacity()); } template - constexpr basic_string::basic_string(basic_string&& s) - noexcept(noexcept(cx::exchange(s.m_data, nullptr))): - string_base(cx::exchange(s.m_data, nullptr), s.m_length, s.m_cap){} - + constexpr basic_string::basic_string(basic_string&& s)noexcept: + detail::hasallocator(std::move(s)), + string_base(std::move(s)){} template - template - basic_string::basic_string(const string_base& b) - noexcept(noexcept(this->allocate(0))): - string_base(b.length() ? this->allocate(sizeof(Char)*(b.length()+1)) : nullptr, b.length(), b.length()) + basic_string::basic_string(const string_base& b) + noexcept(noexcept(this->allocate(0))) { - if(b.length()) - memcpy(m_data, b.get(), sizeof(Char)*(b.length()+1)); + _copy_construct_string(b.get(), b.length(), b.capacity()); } //dtor @@ -112,7 +114,8 @@ namespace rexy{ basic_string::~basic_string(void) noexcept(noexcept(this->deallocate(nullptr, 0))) { - this->deallocate(m_data, sizeof(Char)*(m_cap+1)); + if(this->islong()) + this->deallocate(this->get_pointer(), sizeof(value_type)*(this->get_long_capacity()+1)); } template @@ -120,9 +123,9 @@ namespace rexy{ noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr, 0))) { - if(s.m_length < m_cap){ - memcpy(m_data, s.m_data, sizeof(Char)*(s.m_length+1)); - m_length = s.m_length; + if(s.length() < this->capacity()){ + memcpy(this->get_pointer(), s.get_pointer(), sizeof(value_type)*(s.length()+1)); + this->set_length(s.length()); return *this; } basic_string tmp(s); @@ -130,11 +133,16 @@ namespace rexy{ } template constexpr basic_string& basic_string::operator=(basic_string&& s)noexcept{ - cx::swap(m_data, s.m_data); - m_length = s.m_length; - m_cap = s.m_cap; + string_base::operator=(std::move(s)); return *this; } + template + basic_string& basic_string::operator=(const string_base& s) + noexcept(noexcept(this->allocate(0)) && + noexcept(this->deallocate(nullptr,0))) + { + return (*this = basic_string(s)); + } //Copy from c string template basic_string& basic_string::operator=(const_pointer c) @@ -143,61 +151,53 @@ namespace rexy{ { return _copy_string(c, cx::strlen(c)); } - //Copy from other string_base - template - template - basic_string& basic_string::operator=(const string_base& s) - noexcept(noexcept(this->allocate(0)) && - noexcept(this->deallocate(nullptr,0))) - { - return _copy_string(s.get(), s.length()); - } //Replace managed pointer. Frees existing value template void basic_string::reset(pointer val) noexcept(noexcept(this->deallocate(nullptr,0))) { - this->deallocate(m_data,sizeof(Char)*(m_cap+1)); - m_data = val; - m_length = val ? cx::strlen(val) : 0; - m_cap = m_length; + reset(val, val ? cx::strlen(val) : 0); } template void basic_string::reset(pointer val, size_type len) noexcept(noexcept(this->deallocate(nullptr,0))) { - this->deallocate(m_data,sizeof(Char)*(m_cap+1)); - m_data = val; - m_length = len; - m_cap = len; + if(this->islong()) + this->deallocate(this->get_long_ptr(),sizeof(value_type)*(this->get_long_capacity()+1)); + this->set_islong_flag(true); + this->set_long_ptr(val); + this->set_long_length(len); + this->set_long_capacity(len); } template bool basic_string::resize(size_type newsize) noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr,0))) { - if(newsize < m_cap) + if(newsize < this->capacity()) return false; - return (*this = basic_string(m_data, newsize)); + if(!this->islong() && newsize < this->get_short_capacity()) + return false; + return (*this = basic_string(this->get_pointer(), newsize)); } template void basic_string::append(const_pointer data, size_type len) noexcept(noexcept(this->allocate(0)) && noexcept(this->deallocate(nullptr,0))) { - if(len+m_length <= m_cap){ - memcpy(m_data+m_length, data, sizeof(Char)*len); - m_length += len; - m_data[m_length] = 0; - }else if(!m_data){ - *this = basic_string(len, len); - memcpy(m_data, data, sizeof(Char)*len); - m_data[len] = 0; + size_type mylen = this->length(); + size_type mycap = this->capacity(); + pointer raw = this->get_pointer(); + + if(mylen+len <= mycap){ + memcpy(raw+mylen, data, sizeof(value_type)*len); + this->set_length(mylen+len); + raw[mylen+len] = 0; }else{ - auto newsize = cx::max(m_length+len, m_cap*2); + auto newsize = cx::max(mylen+len, mycap*2); basic_string tmp(newsize); - tmp.append(m_data, m_length); + tmp.append(raw, mylen); tmp.append(data, len); *this = std::move(tmp); } @@ -210,13 +210,24 @@ namespace rexy{ if(data) append(data, cx::strlen(data)); } + template - template - void basic_string::append(const string_base& s) - noexcept(noexcept(this->allocate(0)) && - noexcept(this->deallocate(nullptr,0))) - { - append(s.get(), s.length()); + auto basic_string::release(void)noexcept(noexcept(this->allocate(0))) -> pointer{ + if(this->islong()){ + pointer raw = this->get_long_ptr(); + this->set_islong_flag(false); + this->set_short_ptr(); + this->set_short_length(0); + return raw; + } + size_type len = this->get_short_length(); + pointer raw = this->get_short_ptr(); + pointer retval = this->allocate(sizeof(value_type)*len+1); + memcpy(retval, raw, sizeof(value_type)*len); + retval[len] = 0; + raw[0] = 0; + this->set_short_length(0); + return retval; } template @@ -226,10 +237,11 @@ namespace rexy{ { if(!len) return (*this = basic_string(rexy::steal(nullptr), 0, 0)); - if(len <= m_length){ - m_length = len; - memcpy(m_data, s, sizeof(Char)*len); - m_data[len] = 0; + if(len <= this->length()){ + this->set_length(len); + pointer raw = this->get_pointer(); + memcpy(raw, s, sizeof(value_type)*len); + raw[len] = 0; return *this; } return (*this = basic_string(s, len)); @@ -255,19 +267,29 @@ namespace rexy{ template - constexpr static_string::static_string(const_pointer str, size_type len)noexcept: - string_base(const_cast(str), len, len){} + constexpr static_string::static_string(void)noexcept: + static_string(nullptr, 0){} + + template + constexpr static_string::static_string(const_pointer str, size_type len)noexcept{ + if(!str) + this->set_long_ptr(nullptr); + else + this->set_long_ptr(const_cast(str)); + this->set_long_length(len); + this->set_long_capacity(len); + } template constexpr static_string::static_string(const static_string& s)noexcept: - string_base(s.m_data, s.m_length, s.m_length){} + static_string(s.get_long_ptr(), s.get_long_length()){} template constexpr static_string::static_string(static_string&& s)noexcept: - string_base(s.m_data, s.m_length, s.m_length){} + static_string(s.get_long_ptr(), s.get_long_length()){} template constexpr static_string& static_string::operator=(const static_string& s)noexcept{ - m_data = s.m_data; - m_length = s.m_length; - m_cap = s.m_cap; + this->set_long_ptr(const_cast(s.get_long_ptr())); + this->set_long_length(s.get_long_length()); + this->set_long_capacity(s.get_long_capacity()); return *this; } template @@ -275,16 +297,17 @@ namespace rexy{ static_string(c, cx::strlen(c)){} template constexpr static_string& static_string::operator=(const_pointer c)noexcept{ - m_data = const_cast(c); - m_length = cx::strlen(c); - m_cap = m_length; + size_type len = cx::strlen(c); + this->set_long_ptr(const_cast(c)); + this->set_long_length(len); + this->set_long_capacity(len); return *this; } template constexpr static_string& static_string::operator=(static_string&& s)noexcept{ - m_data = s.m_data; - m_length = s.m_length; - m_cap = s.m_cap; + this->set_long_ptr(s.get_long_ptr()); + this->set_long_length(s.get_long_length()); + this->set_long_capacity(s.get_long_capacity()); return *this; }