Get rid of comment parsing from shlex
POSIX does not define comments
This commit is contained in:
parent
64156fd6e6
commit
87b4800fdf
@ -16,7 +16,7 @@ limitations under the License.
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
Package shlex implements a simple lexer which splits input in to tokens using
|
Package shlex implements a simple lexer which splits input in to tokens using
|
||||||
shell-style rules for quoting and commenting.
|
shell-style rules for quoting.
|
||||||
|
|
||||||
The basic use case uses the default ASCII lexer to split a string into sub-strings:
|
The basic use case uses the default ASCII lexer to split a string into sub-strings:
|
||||||
|
|
||||||
@ -29,7 +29,7 @@ To process a stream of strings:
|
|||||||
// process token
|
// process token
|
||||||
}
|
}
|
||||||
|
|
||||||
To access the raw token stream (which includes tokens for comments):
|
To access the raw token stream (which includes tokens for spaces):
|
||||||
|
|
||||||
t := NewTokenizer(os.Stdin)
|
t := NewTokenizer(os.Stdin)
|
||||||
for ; token, err := t.Next(); err != nil {
|
for ; token, err := t.Next(); err != nil {
|
||||||
@ -45,7 +45,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TokenType is a top-level token classification: A word, space, comment, unknown.
|
// TokenType is a top-level token classification: A word, space, unknown.
|
||||||
type TokenType int
|
type TokenType int
|
||||||
|
|
||||||
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
|
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
|
||||||
@ -79,7 +79,6 @@ const (
|
|||||||
escapingQuoteRunes = `"`
|
escapingQuoteRunes = `"`
|
||||||
nonEscapingQuoteRunes = "'"
|
nonEscapingQuoteRunes = "'"
|
||||||
escapeRunes = `\`
|
escapeRunes = `\`
|
||||||
commentRunes = "#"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Classes of rune token
|
// Classes of rune token
|
||||||
@ -89,7 +88,6 @@ const (
|
|||||||
escapingQuoteRuneClass
|
escapingQuoteRuneClass
|
||||||
nonEscapingQuoteRuneClass
|
nonEscapingQuoteRuneClass
|
||||||
escapeRuneClass
|
escapeRuneClass
|
||||||
commentRuneClass
|
|
||||||
eofRuneClass
|
eofRuneClass
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -98,7 +96,6 @@ const (
|
|||||||
UnknownToken TokenType = iota
|
UnknownToken TokenType = iota
|
||||||
WordToken
|
WordToken
|
||||||
SpaceToken
|
SpaceToken
|
||||||
CommentToken
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func (t TokenType) String() string {
|
func (t TokenType) String() string {
|
||||||
@ -109,8 +106,6 @@ func (t TokenType) String() string {
|
|||||||
return "WordToken"
|
return "WordToken"
|
||||||
case SpaceToken:
|
case SpaceToken:
|
||||||
return "SpaceToken"
|
return "SpaceToken"
|
||||||
case CommentToken:
|
|
||||||
return "CommentToken"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -123,7 +118,6 @@ const (
|
|||||||
escapingQuotedState // we have just consumed an escape rune within a quoted string
|
escapingQuotedState // we have just consumed an escape rune within a quoted string
|
||||||
quotingEscapingState // we are within a quoted string that supports escaping ("...")
|
quotingEscapingState // we are within a quoted string that supports escaping ("...")
|
||||||
quotingState // we are within a string that does not support escaping ('...')
|
quotingState // we are within a string that does not support escaping ('...')
|
||||||
commentState // we are within a comment (everything following an unquoted or unescaped #
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// tokenClassifier is used for classifying rune characters.
|
// tokenClassifier is used for classifying rune characters.
|
||||||
@ -142,7 +136,6 @@ func newDefaultClassifier() tokenClassifier {
|
|||||||
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
|
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
|
||||||
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
|
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
|
||||||
t.addRuneClass(escapeRunes, escapeRuneClass)
|
t.addRuneClass(escapeRunes, escapeRuneClass)
|
||||||
t.addRuneClass(commentRunes, commentRuneClass)
|
|
||||||
return t
|
return t
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -151,7 +144,7 @@ func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
|
|||||||
return t[runeVal]
|
return t[runeVal]
|
||||||
}
|
}
|
||||||
|
|
||||||
// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
|
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
|
||||||
type Lexer Tokenizer
|
type Lexer Tokenizer
|
||||||
|
|
||||||
// NewLexer creates a new lexer from an input stream.
|
// NewLexer creates a new lexer from an input stream.
|
||||||
@ -171,8 +164,8 @@ func (l *Lexer) Next() (string, error) {
|
|||||||
switch token.tokenType {
|
switch token.tokenType {
|
||||||
case WordToken:
|
case WordToken:
|
||||||
return token.value, nil
|
return token.value, nil
|
||||||
case CommentToken, SpaceToken:
|
case SpaceToken:
|
||||||
// skip comments and spaces
|
// skip spaces
|
||||||
default:
|
default:
|
||||||
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
|
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
|
||||||
}
|
}
|
||||||
@ -269,11 +262,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
|||||||
tokenType = WordToken
|
tokenType = WordToken
|
||||||
state = escapingState
|
state = escapingState
|
||||||
}
|
}
|
||||||
case commentRuneClass:
|
|
||||||
{
|
|
||||||
tokenType = CommentToken
|
|
||||||
state = commentState
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
tokenType = WordToken
|
tokenType = WordToken
|
||||||
@ -417,34 +405,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case commentState: // in a comment
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
token := &Token{
|
|
||||||
tokenType: tokenType,
|
|
||||||
value: string(value)}
|
|
||||||
return token, err
|
|
||||||
}
|
|
||||||
case spaceRuneClass:
|
|
||||||
{
|
|
||||||
if nextRune == '\n' {
|
|
||||||
state = startState
|
|
||||||
token := &Token{
|
|
||||||
tokenType: tokenType,
|
|
||||||
value: string(value)}
|
|
||||||
return token, err
|
|
||||||
} else {
|
|
||||||
value = append(value, nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
value = append(value, nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
default:
|
||||||
{
|
{
|
||||||
return nil, fmt.Errorf("Unexpected state: %v", state)
|
return nil, fmt.Errorf("Unexpected state: %v", state)
|
||||||
|
|||||||
@ -24,7 +24,7 @@ import (
|
|||||||
var (
|
var (
|
||||||
// one two "three four" "five \"six\"" seven#eight # nine # ten
|
// one two "three four" "five \"six\"" seven#eight # nine # ten
|
||||||
// eleven 'twelve\'
|
// eleven 'twelve\'
|
||||||
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14"
|
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestClassifier(t *testing.T) {
|
func TestClassifier(t *testing.T) {
|
||||||
@ -32,8 +32,7 @@ func TestClassifier(t *testing.T) {
|
|||||||
tests := map[rune]runeTokenClass{
|
tests := map[rune]runeTokenClass{
|
||||||
' ': spaceRuneClass,
|
' ': spaceRuneClass,
|
||||||
'"': escapingQuoteRuneClass,
|
'"': escapingQuoteRuneClass,
|
||||||
'\'': nonEscapingQuoteRuneClass,
|
'\'': nonEscapingQuoteRuneClass}
|
||||||
'#': commentRuneClass}
|
|
||||||
for runeChar, want := range tests {
|
for runeChar, want := range tests {
|
||||||
got := classifier.ClassifyRune(runeChar)
|
got := classifier.ClassifyRune(runeChar)
|
||||||
if got != want {
|
if got != want {
|
||||||
@ -55,7 +54,13 @@ func TestTokenizer(t *testing.T) {
|
|||||||
{SpaceToken, " "},
|
{SpaceToken, " "},
|
||||||
{WordToken, "seven#eight"},
|
{WordToken, "seven#eight"},
|
||||||
{SpaceToken, " "},
|
{SpaceToken, " "},
|
||||||
{CommentToken, " nine # ten"},
|
{WordToken, "#"},
|
||||||
|
{SpaceToken, " "},
|
||||||
|
{WordToken, "nine"},
|
||||||
|
{SpaceToken, " "},
|
||||||
|
{WordToken, "#"},
|
||||||
|
{SpaceToken, " "},
|
||||||
|
{WordToken, "ten"},
|
||||||
{SpaceToken, " "},
|
{SpaceToken, " "},
|
||||||
{WordToken, "eleven"},
|
{WordToken, "eleven"},
|
||||||
{SpaceToken, " "},
|
{SpaceToken, " "},
|
||||||
@ -79,7 +84,7 @@ func TestTokenizer(t *testing.T) {
|
|||||||
|
|
||||||
func TestLexer(t *testing.T) {
|
func TestLexer(t *testing.T) {
|
||||||
testInput := testString
|
testInput := testString
|
||||||
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||||
|
|
||||||
lexer := NewLexer(strings.NewReader(testInput))
|
lexer := NewLexer(strings.NewReader(testInput))
|
||||||
for i, want := range expectedStrings {
|
for i, want := range expectedStrings {
|
||||||
@ -94,7 +99,7 @@ func TestLexer(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestSplit(t *testing.T) {
|
func TestSplit(t *testing.T) {
|
||||||
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||||
got, err := Split(testString)
|
got, err := Split(testString)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user