Report stream start position for tokens

This commit is contained in:
Kovid Goyal 2022-11-10 21:06:58 +05:30
parent eae4899df4
commit a4de4b7c6f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 48 additions and 73 deletions

View File

@ -56,21 +56,9 @@ type lexerState int
// Token is a (type, value) pair representing a lexographical token.
type Token struct {
tokenType TokenType
value string
}
// Equal reports whether tokens a, and b, are equal.
// Two tokens are equal if both their types and values are equal. A nil token can
// never be equal to another token.
func (a *Token) Equal(b *Token) bool {
if a == nil || b == nil {
return false
}
if a.tokenType != b.tokenType {
return false
}
return a.value == b.value
Type TokenType
Value string
Pos int64
}
// Named classes of UTF-8 runes
@ -161,13 +149,13 @@ func (l *Lexer) Next() (string, error) {
if err != nil {
return "", err
}
switch token.tokenType {
switch token.Type {
case WordToken:
return token.value, nil
return token.Value, nil
case SpaceToken:
// skip spaces
default:
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
return "", fmt.Errorf("Unknown token type: %s", token.Type)
}
}
}
@ -207,6 +195,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
var err error
var sz int
value := strings.Builder{}
pos_at_start := t.pos
unread_rune := func() {
t.redo_rune.sz = sz
@ -215,6 +204,10 @@ func (t *Tokenizer) scanStream() (*Token, error) {
t.pos -= int64(sz)
}
token := func() *Token {
return &Token{tokenType, value.String(), pos_at_start}
}
for {
if t.redo_rune.sz > 0 {
nextRune, sz = t.redo_rune.char, t.redo_rune.sz
@ -279,11 +272,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
}
default:
{
token := &Token{
tokenType: tokenType,
value: value.String()}
unread_rune()
return token, err
return token(), err
}
}
}
@ -292,18 +282,12 @@ func (t *Tokenizer) scanStream() (*Token, error) {
switch nextRuneType {
case eofRuneClass:
{
token := &Token{
tokenType: tokenType,
value: value.String()}
return token, err
return token(), err
}
case spaceRuneClass:
{
token := &Token{
tokenType: tokenType,
value: value.String()}
unread_rune()
return token, err
return token(), err
}
case escapingQuoteRuneClass:
{
@ -329,10 +313,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass:
{
err = ErrTrailingEscape
token := &Token{
tokenType: tokenType,
value: value.String()}
return token, err
return token(), err
}
default:
{
@ -347,10 +328,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass:
{
err = ErrTrailingQuoteEscape
token := &Token{
tokenType: tokenType,
value: value.String()}
return token, err
return token(), err
}
default:
{
@ -365,10 +343,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass:
{
err = ErrUnclosedDoubleQuote
token := &Token{
tokenType: tokenType,
value: value.String()}
return token, err
return token(), err
}
case escapingQuoteRuneClass:
{
@ -390,10 +365,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass:
{
err = ErrUnclosedSingleQuote
token := &Token{
tokenType: tokenType,
value: value.String()}
return token, err
return token(), err
}
case nonEscapingQuoteRuneClass:
{

View File

@ -19,6 +19,8 @@ package shlex
import (
"strings"
"testing"
"github.com/google/go-cmp/cmp"
)
var (
@ -44,31 +46,32 @@ func TestClassifier(t *testing.T) {
func TestTokenizer(t *testing.T) {
testInput := testString
expectedTokens := []*Token{
{WordToken, "one"},
{SpaceToken, " "},
{WordToken, "two"},
{SpaceToken, " "},
{WordToken, "three four"},
{SpaceToken, " "},
{WordToken, "five \"six\""},
{SpaceToken, " "},
{WordToken, "seven#eight"},
{SpaceToken, " "},
{WordToken, "#"},
{SpaceToken, " "},
{WordToken, "nine"},
{SpaceToken, " "},
{WordToken, "#"},
{SpaceToken, " "},
{WordToken, "ten"},
{SpaceToken, " "},
{WordToken, "eleven"},
{SpaceToken, " "},
{WordToken, "twelve\\"},
{SpaceToken, " "},
{WordToken, "thirteen=13"},
{SpaceToken, " "},
{WordToken, "fourteen/14"}}
{WordToken, "one", 0},
{SpaceToken, " ", 3},
{WordToken, "two", 4},
{SpaceToken, " ", 7},
{WordToken, "three four", 8},
{SpaceToken, " ", 20},
{WordToken, "five \"six\"", 21},
{SpaceToken, " ", 35},
{WordToken, "seven#eight", 36},
{SpaceToken, " ", 47},
{WordToken, "#", 48},
{SpaceToken, " ", 49},
{WordToken, "nine", 50},
{SpaceToken, " ", 54},
{WordToken, "#", 55},
{SpaceToken, " ", 56},
{WordToken, "ten", 57},
{SpaceToken, " ", 60},
{WordToken, "eleven", 61},
{SpaceToken, " ", 67},
{WordToken, "twelve\\", 68},
{SpaceToken, " ", 77},
{WordToken, "thirteen=13", 78},
{SpaceToken, " ", 89},
{WordToken, "fourteen/14", 90},
}
tokenizer := NewTokenizer(strings.NewReader(testInput))
for i, want := range expectedTokens {
@ -76,8 +79,8 @@ func TestTokenizer(t *testing.T) {
if err != nil {
t.Error(err)
}
if !got.Equal(want) {
t.Errorf("Tokenizer.Next()[%v] of %q -> %#v. Want: %#v", i, testString, got, want)
if diff := cmp.Diff(want, got); diff != "" {
t.Fatalf("Tokenizer.Next()[%v] of: %s:\n%s", i, testString, diff)
}
}
}