Report stream start position for tokens

This commit is contained in:
Kovid Goyal 2022-11-10 21:06:58 +05:30
parent eae4899df4
commit a4de4b7c6f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 48 additions and 73 deletions

View File

@ -56,21 +56,9 @@ type lexerState int
// Token is a (type, value) pair representing a lexographical token. // Token is a (type, value) pair representing a lexographical token.
type Token struct { type Token struct {
tokenType TokenType Type TokenType
value string Value string
} Pos int64
// Equal reports whether tokens a, and b, are equal.
// Two tokens are equal if both their types and values are equal. A nil token can
// never be equal to another token.
func (a *Token) Equal(b *Token) bool {
if a == nil || b == nil {
return false
}
if a.tokenType != b.tokenType {
return false
}
return a.value == b.value
} }
// Named classes of UTF-8 runes // Named classes of UTF-8 runes
@ -161,13 +149,13 @@ func (l *Lexer) Next() (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
switch token.tokenType { switch token.Type {
case WordToken: case WordToken:
return token.value, nil return token.Value, nil
case SpaceToken: case SpaceToken:
// skip spaces // skip spaces
default: default:
return "", fmt.Errorf("Unknown token type: %v", token.tokenType) return "", fmt.Errorf("Unknown token type: %s", token.Type)
} }
} }
} }
@ -207,6 +195,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
var err error var err error
var sz int var sz int
value := strings.Builder{} value := strings.Builder{}
pos_at_start := t.pos
unread_rune := func() { unread_rune := func() {
t.redo_rune.sz = sz t.redo_rune.sz = sz
@ -215,6 +204,10 @@ func (t *Tokenizer) scanStream() (*Token, error) {
t.pos -= int64(sz) t.pos -= int64(sz)
} }
token := func() *Token {
return &Token{tokenType, value.String(), pos_at_start}
}
for { for {
if t.redo_rune.sz > 0 { if t.redo_rune.sz > 0 {
nextRune, sz = t.redo_rune.char, t.redo_rune.sz nextRune, sz = t.redo_rune.char, t.redo_rune.sz
@ -279,11 +272,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
} }
default: default:
{ {
token := &Token{
tokenType: tokenType,
value: value.String()}
unread_rune() unread_rune()
return token, err return token(), err
} }
} }
} }
@ -292,18 +282,12 @@ func (t *Tokenizer) scanStream() (*Token, error) {
switch nextRuneType { switch nextRuneType {
case eofRuneClass: case eofRuneClass:
{ {
token := &Token{ return token(), err
tokenType: tokenType,
value: value.String()}
return token, err
} }
case spaceRuneClass: case spaceRuneClass:
{ {
token := &Token{
tokenType: tokenType,
value: value.String()}
unread_rune() unread_rune()
return token, err return token(), err
} }
case escapingQuoteRuneClass: case escapingQuoteRuneClass:
{ {
@ -329,10 +313,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass: case eofRuneClass:
{ {
err = ErrTrailingEscape err = ErrTrailingEscape
token := &Token{ return token(), err
tokenType: tokenType,
value: value.String()}
return token, err
} }
default: default:
{ {
@ -347,10 +328,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass: case eofRuneClass:
{ {
err = ErrTrailingQuoteEscape err = ErrTrailingQuoteEscape
token := &Token{ return token(), err
tokenType: tokenType,
value: value.String()}
return token, err
} }
default: default:
{ {
@ -365,10 +343,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass: case eofRuneClass:
{ {
err = ErrUnclosedDoubleQuote err = ErrUnclosedDoubleQuote
token := &Token{ return token(), err
tokenType: tokenType,
value: value.String()}
return token, err
} }
case escapingQuoteRuneClass: case escapingQuoteRuneClass:
{ {
@ -390,10 +365,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
case eofRuneClass: case eofRuneClass:
{ {
err = ErrUnclosedSingleQuote err = ErrUnclosedSingleQuote
token := &Token{ return token(), err
tokenType: tokenType,
value: value.String()}
return token, err
} }
case nonEscapingQuoteRuneClass: case nonEscapingQuoteRuneClass:
{ {

View File

@ -19,6 +19,8 @@ package shlex
import ( import (
"strings" "strings"
"testing" "testing"
"github.com/google/go-cmp/cmp"
) )
var ( var (
@ -44,31 +46,32 @@ func TestClassifier(t *testing.T) {
func TestTokenizer(t *testing.T) { func TestTokenizer(t *testing.T) {
testInput := testString testInput := testString
expectedTokens := []*Token{ expectedTokens := []*Token{
{WordToken, "one"}, {WordToken, "one", 0},
{SpaceToken, " "}, {SpaceToken, " ", 3},
{WordToken, "two"}, {WordToken, "two", 4},
{SpaceToken, " "}, {SpaceToken, " ", 7},
{WordToken, "three four"}, {WordToken, "three four", 8},
{SpaceToken, " "}, {SpaceToken, " ", 20},
{WordToken, "five \"six\""}, {WordToken, "five \"six\"", 21},
{SpaceToken, " "}, {SpaceToken, " ", 35},
{WordToken, "seven#eight"}, {WordToken, "seven#eight", 36},
{SpaceToken, " "}, {SpaceToken, " ", 47},
{WordToken, "#"}, {WordToken, "#", 48},
{SpaceToken, " "}, {SpaceToken, " ", 49},
{WordToken, "nine"}, {WordToken, "nine", 50},
{SpaceToken, " "}, {SpaceToken, " ", 54},
{WordToken, "#"}, {WordToken, "#", 55},
{SpaceToken, " "}, {SpaceToken, " ", 56},
{WordToken, "ten"}, {WordToken, "ten", 57},
{SpaceToken, " "}, {SpaceToken, " ", 60},
{WordToken, "eleven"}, {WordToken, "eleven", 61},
{SpaceToken, " "}, {SpaceToken, " ", 67},
{WordToken, "twelve\\"}, {WordToken, "twelve\\", 68},
{SpaceToken, " "}, {SpaceToken, " ", 77},
{WordToken, "thirteen=13"}, {WordToken, "thirteen=13", 78},
{SpaceToken, " "}, {SpaceToken, " ", 89},
{WordToken, "fourteen/14"}} {WordToken, "fourteen/14", 90},
}
tokenizer := NewTokenizer(strings.NewReader(testInput)) tokenizer := NewTokenizer(strings.NewReader(testInput))
for i, want := range expectedTokens { for i, want := range expectedTokens {
@ -76,8 +79,8 @@ func TestTokenizer(t *testing.T) {
if err != nil { if err != nil {
t.Error(err) t.Error(err)
} }
if !got.Equal(want) { if diff := cmp.Diff(want, got); diff != "" {
t.Errorf("Tokenizer.Next()[%v] of %q -> %#v. Want: %#v", i, testString, got, want) t.Fatalf("Tokenizer.Next()[%v] of: %s:\n%s", i, testString, diff)
} }
} }
} }