A new string scanner thats faster than bufio.Scanner and has zero-allocation
This commit is contained in:
parent
ebc1a0f0aa
commit
b8ce441453
@ -3,7 +3,6 @@
|
||||
package utils
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
@ -23,7 +22,7 @@ func Capitalize(x string) string {
|
||||
type ScanLines struct {
|
||||
entries []string
|
||||
|
||||
scanner *bufio.Scanner
|
||||
scanner *StringScanner
|
||||
}
|
||||
|
||||
func NewScanLines(entries ...string) *ScanLines {
|
||||
@ -35,7 +34,7 @@ func (self *ScanLines) Scan() bool {
|
||||
if len(self.entries) == 0 {
|
||||
return false
|
||||
}
|
||||
self.scanner = bufio.NewScanner(strings.NewReader(self.entries[0]))
|
||||
self.scanner = NewLineScanner(self.entries[0])
|
||||
self.entries = self.entries[1:]
|
||||
return self.Scan()
|
||||
} else {
|
||||
@ -54,15 +53,86 @@ func (self *ScanLines) Text() string {
|
||||
return self.scanner.Text()
|
||||
}
|
||||
|
||||
func Splitlines(x string, expected_number_of_lines ...int) (ans []string) {
|
||||
if len(expected_number_of_lines) > 0 {
|
||||
ans = make([]string, 0, expected_number_of_lines[0])
|
||||
} else {
|
||||
ans = make([]string, 0, 8)
|
||||
type StringScannerScanFunc = func(data string) (remaining_data, token string)
|
||||
type StringScannerPostprocessFunc = func(token string) string
|
||||
|
||||
func ScanFuncForSeparator(sep string) StringScannerScanFunc {
|
||||
if len(sep) == 1 {
|
||||
sb := sep[0]
|
||||
return func(data string) (remaining_data, token string) {
|
||||
idx := strings.IndexByte(data, sb)
|
||||
if idx < 0 {
|
||||
return "", data
|
||||
}
|
||||
return data[idx+len(sep):], data[:idx]
|
||||
}
|
||||
|
||||
}
|
||||
scanner := bufio.NewScanner(strings.NewReader(x))
|
||||
for scanner.Scan() {
|
||||
ans = append(ans, scanner.Text())
|
||||
return func(data string) (remaining_data, token string) {
|
||||
idx := strings.Index(data, sep)
|
||||
if idx < 0 {
|
||||
return "", data
|
||||
}
|
||||
return data[idx+len(sep):], data[:idx]
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
// Faster, better designed, zero-allocation version of bufio.Scanner for strings
|
||||
type StringScanner struct {
|
||||
ScanFunc StringScannerScanFunc
|
||||
PostProcessTokenFunc StringScannerPostprocessFunc
|
||||
|
||||
data string
|
||||
token string
|
||||
}
|
||||
|
||||
func (self *StringScanner) Scan() bool {
|
||||
if self.data == "" {
|
||||
self.token = ""
|
||||
return false
|
||||
}
|
||||
self.data, self.token = self.ScanFunc(self.data)
|
||||
if self.PostProcessTokenFunc != nil {
|
||||
self.token = self.PostProcessTokenFunc(self.token)
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (self *StringScanner) Text() string {
|
||||
return self.token
|
||||
}
|
||||
|
||||
func (self *StringScanner) Split(data string, expected_number ...int) (ans []string) {
|
||||
if len(expected_number) != 0 {
|
||||
ans = make([]string, 0, expected_number[0])
|
||||
} else {
|
||||
ans = []string{}
|
||||
}
|
||||
self.data = data
|
||||
for self.Scan() {
|
||||
ans = append(ans, self.Text())
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func NewLineScanner(text string) *StringScanner {
|
||||
return &StringScanner{
|
||||
data: text, ScanFunc: ScanFuncForSeparator("\n"),
|
||||
PostProcessTokenFunc: func(s string) string {
|
||||
if len(s) > 0 && s[len(s)-1] == '\r' {
|
||||
s = s[:len(s)-1]
|
||||
}
|
||||
return s
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func NewSeparatorScanner(text, separator string) *StringScanner {
|
||||
return &StringScanner{
|
||||
data: text, ScanFunc: ScanFuncForSeparator(separator),
|
||||
}
|
||||
}
|
||||
|
||||
func Splitlines(x string, expected_number_of_lines ...int) (ans []string) {
|
||||
return NewLineScanner("").Split(x, expected_number_of_lines...)
|
||||
}
|
||||
|
||||
35
tools/utils/strings_test.go
Normal file
35
tools/utils/strings_test.go
Normal file
@ -0,0 +1,35 @@
|
||||
// License: GPLv3 Copyright: 2023, Kovid Goyal, <kovid at kovidgoyal.net>
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
var _ = fmt.Print
|
||||
|
||||
func TestStringScanner(t *testing.T) {
|
||||
for _, text := range []string{
|
||||
"a\nb\nc",
|
||||
"a\nb\nc\r",
|
||||
"a\n\n\nb\nc",
|
||||
"a\r\r\nb\r\nc\n",
|
||||
"\n1",
|
||||
"",
|
||||
} {
|
||||
actual := Splitlines(text)
|
||||
expected := make([]string, 0, len(actual))
|
||||
s := bufio.NewScanner(strings.NewReader(text))
|
||||
for s.Scan() {
|
||||
expected = append(expected, s.Text())
|
||||
}
|
||||
if diff := cmp.Diff(expected, actual); diff != "" {
|
||||
t.Fatalf("Failed for: %#v\n%s", text, diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user