Get rid of comment parsing from shlex

POSIX does not define comments
This commit is contained in:
Kovid Goyal 2022-11-10 20:38:10 +05:30
parent 64156fd6e6
commit 87b4800fdf
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 17 additions and 52 deletions

View File

@ -16,7 +16,7 @@ limitations under the License.
/*
Package shlex implements a simple lexer which splits input in to tokens using
shell-style rules for quoting and commenting.
shell-style rules for quoting.
The basic use case uses the default ASCII lexer to split a string into sub-strings:
@ -29,7 +29,7 @@ To process a stream of strings:
// process token
}
To access the raw token stream (which includes tokens for comments):
To access the raw token stream (which includes tokens for spaces):
t := NewTokenizer(os.Stdin)
for ; token, err := t.Next(); err != nil {
@ -45,7 +45,7 @@ import (
"strings"
)
// TokenType is a top-level token classification: A word, space, comment, unknown.
// TokenType is a top-level token classification: A word, space, unknown.
type TokenType int
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
@ -79,7 +79,6 @@ const (
escapingQuoteRunes = `"`
nonEscapingQuoteRunes = "'"
escapeRunes = `\`
commentRunes = "#"
)
// Classes of rune token
@ -89,7 +88,6 @@ const (
escapingQuoteRuneClass
nonEscapingQuoteRuneClass
escapeRuneClass
commentRuneClass
eofRuneClass
)
@ -98,7 +96,6 @@ const (
UnknownToken TokenType = iota
WordToken
SpaceToken
CommentToken
)
func (t TokenType) String() string {
@ -109,8 +106,6 @@ func (t TokenType) String() string {
return "WordToken"
case SpaceToken:
return "SpaceToken"
case CommentToken:
return "CommentToken"
}
}
@ -123,7 +118,6 @@ const (
escapingQuotedState // we have just consumed an escape rune within a quoted string
quotingEscapingState // we are within a quoted string that supports escaping ("...")
quotingState // we are within a string that does not support escaping ('...')
commentState // we are within a comment (everything following an unquoted or unescaped #
)
// tokenClassifier is used for classifying rune characters.
@ -142,7 +136,6 @@ func newDefaultClassifier() tokenClassifier {
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
t.addRuneClass(escapeRunes, escapeRuneClass)
t.addRuneClass(commentRunes, commentRuneClass)
return t
}
@ -151,7 +144,7 @@ func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
return t[runeVal]
}
// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
type Lexer Tokenizer
// NewLexer creates a new lexer from an input stream.
@ -171,8 +164,8 @@ func (l *Lexer) Next() (string, error) {
switch token.tokenType {
case WordToken:
return token.value, nil
case CommentToken, SpaceToken:
// skip comments and spaces
case SpaceToken:
// skip spaces
default:
return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
}
@ -269,11 +262,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
tokenType = WordToken
state = escapingState
}
case commentRuneClass:
{
tokenType = CommentToken
state = commentState
}
default:
{
tokenType = WordToken
@ -417,34 +405,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
}
}
}
case commentState: // in a comment
{
switch nextRuneType {
case eofRuneClass:
{
token := &Token{
tokenType: tokenType,
value: string(value)}
return token, err
}
case spaceRuneClass:
{
if nextRune == '\n' {
state = startState
token := &Token{
tokenType: tokenType,
value: string(value)}
return token, err
} else {
value = append(value, nextRune)
}
}
default:
{
value = append(value, nextRune)
}
}
}
default:
{
return nil, fmt.Errorf("Unexpected state: %v", state)

View File

@ -24,7 +24,7 @@ import (
var (
// one two "three four" "five \"six\"" seven#eight # nine # ten
// eleven 'twelve\'
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14"
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
)
func TestClassifier(t *testing.T) {
@ -32,8 +32,7 @@ func TestClassifier(t *testing.T) {
tests := map[rune]runeTokenClass{
' ': spaceRuneClass,
'"': escapingQuoteRuneClass,
'\'': nonEscapingQuoteRuneClass,
'#': commentRuneClass}
'\'': nonEscapingQuoteRuneClass}
for runeChar, want := range tests {
got := classifier.ClassifyRune(runeChar)
if got != want {
@ -55,7 +54,13 @@ func TestTokenizer(t *testing.T) {
{SpaceToken, " "},
{WordToken, "seven#eight"},
{SpaceToken, " "},
{CommentToken, " nine # ten"},
{WordToken, "#"},
{SpaceToken, " "},
{WordToken, "nine"},
{SpaceToken, " "},
{WordToken, "#"},
{SpaceToken, " "},
{WordToken, "ten"},
{SpaceToken, " "},
{WordToken, "eleven"},
{SpaceToken, " "},
@ -79,7 +84,7 @@ func TestTokenizer(t *testing.T) {
func TestLexer(t *testing.T) {
testInput := testString
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
lexer := NewLexer(strings.NewReader(testInput))
for i, want := range expectedStrings {
@ -94,7 +99,7 @@ func TestLexer(t *testing.T) {
}
func TestSplit(t *testing.T) {
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
got, err := Split(testString)
if err != nil {
t.Error(err)