Get rid of comment parsing from shlex

POSIX does not define comments
This commit is contained in:
Kovid Goyal 2022-11-10 20:38:10 +05:30
parent 64156fd6e6
commit 87b4800fdf
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 17 additions and 52 deletions

View File

@ -16,7 +16,7 @@ limitations under the License.
/* /*
Package shlex implements a simple lexer which splits input in to tokens using Package shlex implements a simple lexer which splits input in to tokens using
shell-style rules for quoting and commenting. shell-style rules for quoting.
The basic use case uses the default ASCII lexer to split a string into sub-strings: The basic use case uses the default ASCII lexer to split a string into sub-strings:
@ -29,7 +29,7 @@ To process a stream of strings:
// process token // process token
} }
To access the raw token stream (which includes tokens for comments): To access the raw token stream (which includes tokens for spaces):
t := NewTokenizer(os.Stdin) t := NewTokenizer(os.Stdin)
for ; token, err := t.Next(); err != nil { for ; token, err := t.Next(); err != nil {
@ -45,7 +45,7 @@ import (
"strings" "strings"
) )
// TokenType is a top-level token classification: A word, space, comment, unknown. // TokenType is a top-level token classification: A word, space, unknown.
type TokenType int type TokenType int
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape. // runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
@ -79,7 +79,6 @@ const (
escapingQuoteRunes = `"` escapingQuoteRunes = `"`
nonEscapingQuoteRunes = "'" nonEscapingQuoteRunes = "'"
escapeRunes = `\` escapeRunes = `\`
commentRunes = "#"
) )
// Classes of rune token // Classes of rune token
@ -89,7 +88,6 @@ const (
escapingQuoteRuneClass escapingQuoteRuneClass
nonEscapingQuoteRuneClass nonEscapingQuoteRuneClass
escapeRuneClass escapeRuneClass
commentRuneClass
eofRuneClass eofRuneClass
) )
@ -98,7 +96,6 @@ const (
UnknownToken TokenType = iota UnknownToken TokenType = iota
WordToken WordToken
SpaceToken SpaceToken
CommentToken
) )
func (t TokenType) String() string { func (t TokenType) String() string {
@ -109,8 +106,6 @@ func (t TokenType) String() string {
return "WordToken" return "WordToken"
case SpaceToken: case SpaceToken:
return "SpaceToken" return "SpaceToken"
case CommentToken:
return "CommentToken"
} }
} }
@ -123,7 +118,6 @@ const (
escapingQuotedState // we have just consumed an escape rune within a quoted string escapingQuotedState // we have just consumed an escape rune within a quoted string
quotingEscapingState // we are within a quoted string that supports escaping ("...") quotingEscapingState // we are within a quoted string that supports escaping ("...")
quotingState // we are within a string that does not support escaping ('...') quotingState // we are within a string that does not support escaping ('...')
commentState // we are within a comment (everything following an unquoted or unescaped #
) )
// tokenClassifier is used for classifying rune characters. // tokenClassifier is used for classifying rune characters.
@ -142,7 +136,6 @@ func newDefaultClassifier() tokenClassifier {
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass) t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass) t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
t.addRuneClass(escapeRunes, escapeRuneClass) t.addRuneClass(escapeRunes, escapeRuneClass)
t.addRuneClass(commentRunes, commentRuneClass)
return t return t
} }
@ -151,7 +144,7 @@ func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
return t[runeVal] return t[runeVal]
} }
// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped. // Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
type Lexer Tokenizer type Lexer Tokenizer
// NewLexer creates a new lexer from an input stream. // NewLexer creates a new lexer from an input stream.
@ -171,8 +164,8 @@ func (l *Lexer) Next() (string, error) {
switch token.tokenType { switch token.tokenType {
case WordToken: case WordToken:
return token.value, nil return token.value, nil
case CommentToken, SpaceToken: case SpaceToken:
// skip comments and spaces // skip spaces
default: default:
return "", fmt.Errorf("Unknown token type: %v", token.tokenType) return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
} }
@ -269,11 +262,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
tokenType = WordToken tokenType = WordToken
state = escapingState state = escapingState
} }
case commentRuneClass:
{
tokenType = CommentToken
state = commentState
}
default: default:
{ {
tokenType = WordToken tokenType = WordToken
@ -417,34 +405,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
} }
} }
} }
case commentState: // in a comment
{
switch nextRuneType {
case eofRuneClass:
{
token := &Token{
tokenType: tokenType,
value: string(value)}
return token, err
}
case spaceRuneClass:
{
if nextRune == '\n' {
state = startState
token := &Token{
tokenType: tokenType,
value: string(value)}
return token, err
} else {
value = append(value, nextRune)
}
}
default:
{
value = append(value, nextRune)
}
}
}
default: default:
{ {
return nil, fmt.Errorf("Unexpected state: %v", state) return nil, fmt.Errorf("Unexpected state: %v", state)

View File

@ -24,7 +24,7 @@ import (
var ( var (
// one two "three four" "five \"six\"" seven#eight # nine # ten // one two "three four" "five \"six\"" seven#eight # nine # ten
// eleven 'twelve\' // eleven 'twelve\'
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14" testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
) )
func TestClassifier(t *testing.T) { func TestClassifier(t *testing.T) {
@ -32,8 +32,7 @@ func TestClassifier(t *testing.T) {
tests := map[rune]runeTokenClass{ tests := map[rune]runeTokenClass{
' ': spaceRuneClass, ' ': spaceRuneClass,
'"': escapingQuoteRuneClass, '"': escapingQuoteRuneClass,
'\'': nonEscapingQuoteRuneClass, '\'': nonEscapingQuoteRuneClass}
'#': commentRuneClass}
for runeChar, want := range tests { for runeChar, want := range tests {
got := classifier.ClassifyRune(runeChar) got := classifier.ClassifyRune(runeChar)
if got != want { if got != want {
@ -55,7 +54,13 @@ func TestTokenizer(t *testing.T) {
{SpaceToken, " "}, {SpaceToken, " "},
{WordToken, "seven#eight"}, {WordToken, "seven#eight"},
{SpaceToken, " "}, {SpaceToken, " "},
{CommentToken, " nine # ten"}, {WordToken, "#"},
{SpaceToken, " "},
{WordToken, "nine"},
{SpaceToken, " "},
{WordToken, "#"},
{SpaceToken, " "},
{WordToken, "ten"},
{SpaceToken, " "}, {SpaceToken, " "},
{WordToken, "eleven"}, {WordToken, "eleven"},
{SpaceToken, " "}, {SpaceToken, " "},
@ -79,7 +84,7 @@ func TestTokenizer(t *testing.T) {
func TestLexer(t *testing.T) { func TestLexer(t *testing.T) {
testInput := testString testInput := testString
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"} expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
lexer := NewLexer(strings.NewReader(testInput)) lexer := NewLexer(strings.NewReader(testInput))
for i, want := range expectedStrings { for i, want := range expectedStrings {
@ -94,7 +99,7 @@ func TestLexer(t *testing.T) {
} }
func TestSplit(t *testing.T) { func TestSplit(t *testing.T) {
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"} want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
got, err := Split(testString) got, err := Split(testString)
if err != nil { if err != nil {
t.Error(err) t.Error(err)