Report stream start position for tokens
This commit is contained in:
parent
eae4899df4
commit
a4de4b7c6f
@ -56,21 +56,9 @@ type lexerState int
|
||||
|
||||
// Token is a (type, value) pair representing a lexographical token.
|
||||
type Token struct {
|
||||
tokenType TokenType
|
||||
value string
|
||||
}
|
||||
|
||||
// Equal reports whether tokens a, and b, are equal.
|
||||
// Two tokens are equal if both their types and values are equal. A nil token can
|
||||
// never be equal to another token.
|
||||
func (a *Token) Equal(b *Token) bool {
|
||||
if a == nil || b == nil {
|
||||
return false
|
||||
}
|
||||
if a.tokenType != b.tokenType {
|
||||
return false
|
||||
}
|
||||
return a.value == b.value
|
||||
Type TokenType
|
||||
Value string
|
||||
Pos int64
|
||||
}
|
||||
|
||||
// Named classes of UTF-8 runes
|
||||
@ -161,13 +149,13 @@ func (l *Lexer) Next() (string, error) {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch token.tokenType {
|
||||
switch token.Type {
|
||||
case WordToken:
|
||||
return token.value, nil
|
||||
return token.Value, nil
|
||||
case SpaceToken:
|
||||
// skip spaces
|
||||
default:
|
||||
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
|
||||
return "", fmt.Errorf("Unknown token type: %s", token.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,6 +195,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
var err error
|
||||
var sz int
|
||||
value := strings.Builder{}
|
||||
pos_at_start := t.pos
|
||||
|
||||
unread_rune := func() {
|
||||
t.redo_rune.sz = sz
|
||||
@ -215,6 +204,10 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
t.pos -= int64(sz)
|
||||
}
|
||||
|
||||
token := func() *Token {
|
||||
return &Token{tokenType, value.String(), pos_at_start}
|
||||
}
|
||||
|
||||
for {
|
||||
if t.redo_rune.sz > 0 {
|
||||
nextRune, sz = t.redo_rune.char, t.redo_rune.sz
|
||||
@ -279,11 +272,8 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
}
|
||||
default:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
unread_rune()
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -292,18 +282,12 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
unread_rune()
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
@ -329,10 +313,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrTrailingEscape
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
default:
|
||||
{
|
||||
@ -347,10 +328,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrTrailingQuoteEscape
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
default:
|
||||
{
|
||||
@ -365,10 +343,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrUnclosedDoubleQuote
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
@ -390,10 +365,7 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrUnclosedSingleQuote
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: value.String()}
|
||||
return token, err
|
||||
return token(), err
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
|
||||
@ -19,6 +19,8 @@ package shlex
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
var (
|
||||
@ -44,31 +46,32 @@ func TestClassifier(t *testing.T) {
|
||||
func TestTokenizer(t *testing.T) {
|
||||
testInput := testString
|
||||
expectedTokens := []*Token{
|
||||
{WordToken, "one"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "two"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "three four"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "five \"six\""},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "seven#eight"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "#"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "nine"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "#"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "ten"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "eleven"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "twelve\\"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "thirteen=13"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "fourteen/14"}}
|
||||
{WordToken, "one", 0},
|
||||
{SpaceToken, " ", 3},
|
||||
{WordToken, "two", 4},
|
||||
{SpaceToken, " ", 7},
|
||||
{WordToken, "three four", 8},
|
||||
{SpaceToken, " ", 20},
|
||||
{WordToken, "five \"six\"", 21},
|
||||
{SpaceToken, " ", 35},
|
||||
{WordToken, "seven#eight", 36},
|
||||
{SpaceToken, " ", 47},
|
||||
{WordToken, "#", 48},
|
||||
{SpaceToken, " ", 49},
|
||||
{WordToken, "nine", 50},
|
||||
{SpaceToken, " ", 54},
|
||||
{WordToken, "#", 55},
|
||||
{SpaceToken, " ", 56},
|
||||
{WordToken, "ten", 57},
|
||||
{SpaceToken, " ", 60},
|
||||
{WordToken, "eleven", 61},
|
||||
{SpaceToken, " ", 67},
|
||||
{WordToken, "twelve\\", 68},
|
||||
{SpaceToken, " ", 77},
|
||||
{WordToken, "thirteen=13", 78},
|
||||
{SpaceToken, " ", 89},
|
||||
{WordToken, "fourteen/14", 90},
|
||||
}
|
||||
|
||||
tokenizer := NewTokenizer(strings.NewReader(testInput))
|
||||
for i, want := range expectedTokens {
|
||||
@ -76,8 +79,8 @@ func TestTokenizer(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !got.Equal(want) {
|
||||
t.Errorf("Tokenizer.Next()[%v] of %q -> %#v. Want: %#v", i, testString, got, want)
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Fatalf("Tokenizer.Next()[%v] of: %s:\n%s", i, testString, diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user