Get rid of comment parsing from shlex
POSIX does not define comments
This commit is contained in:
parent
64156fd6e6
commit
87b4800fdf
@ -16,7 +16,7 @@ limitations under the License.
|
||||
|
||||
/*
|
||||
Package shlex implements a simple lexer which splits input in to tokens using
|
||||
shell-style rules for quoting and commenting.
|
||||
shell-style rules for quoting.
|
||||
|
||||
The basic use case uses the default ASCII lexer to split a string into sub-strings:
|
||||
|
||||
@ -29,7 +29,7 @@ To process a stream of strings:
|
||||
// process token
|
||||
}
|
||||
|
||||
To access the raw token stream (which includes tokens for comments):
|
||||
To access the raw token stream (which includes tokens for spaces):
|
||||
|
||||
t := NewTokenizer(os.Stdin)
|
||||
for ; token, err := t.Next(); err != nil {
|
||||
@ -45,7 +45,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// TokenType is a top-level token classification: A word, space, comment, unknown.
|
||||
// TokenType is a top-level token classification: A word, space, unknown.
|
||||
type TokenType int
|
||||
|
||||
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
|
||||
@ -79,7 +79,6 @@ const (
|
||||
escapingQuoteRunes = `"`
|
||||
nonEscapingQuoteRunes = "'"
|
||||
escapeRunes = `\`
|
||||
commentRunes = "#"
|
||||
)
|
||||
|
||||
// Classes of rune token
|
||||
@ -89,7 +88,6 @@ const (
|
||||
escapingQuoteRuneClass
|
||||
nonEscapingQuoteRuneClass
|
||||
escapeRuneClass
|
||||
commentRuneClass
|
||||
eofRuneClass
|
||||
)
|
||||
|
||||
@ -98,7 +96,6 @@ const (
|
||||
UnknownToken TokenType = iota
|
||||
WordToken
|
||||
SpaceToken
|
||||
CommentToken
|
||||
)
|
||||
|
||||
func (t TokenType) String() string {
|
||||
@ -109,8 +106,6 @@ func (t TokenType) String() string {
|
||||
return "WordToken"
|
||||
case SpaceToken:
|
||||
return "SpaceToken"
|
||||
case CommentToken:
|
||||
return "CommentToken"
|
||||
}
|
||||
}
|
||||
|
||||
@ -123,7 +118,6 @@ const (
|
||||
escapingQuotedState // we have just consumed an escape rune within a quoted string
|
||||
quotingEscapingState // we are within a quoted string that supports escaping ("...")
|
||||
quotingState // we are within a string that does not support escaping ('...')
|
||||
commentState // we are within a comment (everything following an unquoted or unescaped #
|
||||
)
|
||||
|
||||
// tokenClassifier is used for classifying rune characters.
|
||||
@ -142,7 +136,6 @@ func newDefaultClassifier() tokenClassifier {
|
||||
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
|
||||
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
|
||||
t.addRuneClass(escapeRunes, escapeRuneClass)
|
||||
t.addRuneClass(commentRunes, commentRuneClass)
|
||||
return t
|
||||
}
|
||||
|
||||
@ -151,7 +144,7 @@ func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
|
||||
return t[runeVal]
|
||||
}
|
||||
|
||||
// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
|
||||
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
|
||||
type Lexer Tokenizer
|
||||
|
||||
// NewLexer creates a new lexer from an input stream.
|
||||
@ -171,8 +164,8 @@ func (l *Lexer) Next() (string, error) {
|
||||
switch token.tokenType {
|
||||
case WordToken:
|
||||
return token.value, nil
|
||||
case CommentToken, SpaceToken:
|
||||
// skip comments and spaces
|
||||
case SpaceToken:
|
||||
// skip spaces
|
||||
default:
|
||||
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
|
||||
}
|
||||
@ -269,11 +262,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
tokenType = WordToken
|
||||
state = escapingState
|
||||
}
|
||||
case commentRuneClass:
|
||||
{
|
||||
tokenType = CommentToken
|
||||
state = commentState
|
||||
}
|
||||
default:
|
||||
{
|
||||
tokenType = WordToken
|
||||
@ -417,34 +405,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
case commentState: // in a comment
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
if nextRune == '\n' {
|
||||
state = startState
|
||||
token := &Token{
|
||||
tokenType: tokenType,
|
||||
value: string(value)}
|
||||
return token, err
|
||||
} else {
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
value = append(value, nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
return nil, fmt.Errorf("Unexpected state: %v", state)
|
||||
|
||||
@ -24,7 +24,7 @@ import (
|
||||
var (
|
||||
// one two "three four" "five \"six\"" seven#eight # nine # ten
|
||||
// eleven 'twelve\'
|
||||
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14"
|
||||
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
|
||||
)
|
||||
|
||||
func TestClassifier(t *testing.T) {
|
||||
@ -32,8 +32,7 @@ func TestClassifier(t *testing.T) {
|
||||
tests := map[rune]runeTokenClass{
|
||||
' ': spaceRuneClass,
|
||||
'"': escapingQuoteRuneClass,
|
||||
'\'': nonEscapingQuoteRuneClass,
|
||||
'#': commentRuneClass}
|
||||
'\'': nonEscapingQuoteRuneClass}
|
||||
for runeChar, want := range tests {
|
||||
got := classifier.ClassifyRune(runeChar)
|
||||
if got != want {
|
||||
@ -55,7 +54,13 @@ func TestTokenizer(t *testing.T) {
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "seven#eight"},
|
||||
{SpaceToken, " "},
|
||||
{CommentToken, " nine # ten"},
|
||||
{WordToken, "#"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "nine"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "#"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "ten"},
|
||||
{SpaceToken, " "},
|
||||
{WordToken, "eleven"},
|
||||
{SpaceToken, " "},
|
||||
@ -79,7 +84,7 @@ func TestTokenizer(t *testing.T) {
|
||||
|
||||
func TestLexer(t *testing.T) {
|
||||
testInput := testString
|
||||
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||
|
||||
lexer := NewLexer(strings.NewReader(testInput))
|
||||
for i, want := range expectedStrings {
|
||||
@ -94,7 +99,7 @@ func TestLexer(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestSplit(t *testing.T) {
|
||||
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||
got, err := Split(testString)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user