Get rid of comment parsing from shlex

POSIX does not define comments
2022-11-10 20:38:10 +05:30 · 2022-11-10 20:38:10 +05:30 · 87b4800fdf
commit 87b4800fdf
parent 64156fd6e6
2 changed files with 17 additions and 52 deletions
--- a/tools/utils/shlex/shlex.go
+++ b/tools/utils/shlex/shlex.go
@ -16,7 +16,7 @@ limitations under the License.
 /*
 Package shlex implements a simple lexer which splits input in to tokens using
-shell-style rules for quoting and commenting.
+shell-style rules for quoting.
 The basic use case uses the default ASCII lexer to split a string into sub-strings:
@ -29,7 +29,7 @@ To process a stream of strings:
 		// process token
 	}
-To access the raw token stream (which includes tokens for comments):
+To access the raw token stream (which includes tokens for spaces):
 	  t := NewTokenizer(os.Stdin)
 	  for ; token, err := t.Next(); err != nil {
@ -45,7 +45,7 @@ import (
 	"strings"
 )
-// TokenType is a top-level token classification: A word, space, comment, unknown.
+// TokenType is a top-level token classification: A word, space, unknown.
 type TokenType int
 // runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
@ -79,7 +79,6 @@ const (
 	escapingQuoteRunes    = `"`
 	nonEscapingQuoteRunes = "'"
 	escapeRunes           = `\`
 	commentRunes          = "#"
 )
 // Classes of rune token
@ -89,7 +88,6 @@ const (
 	escapingQuoteRuneClass
 	nonEscapingQuoteRuneClass
 	escapeRuneClass
 	commentRuneClass
 	eofRuneClass
 )
@ -98,7 +96,6 @@ const (
 	UnknownToken TokenType = iota
 	WordToken
 	SpaceToken
 	CommentToken
 )
 func (t TokenType) String() string {
@ -109,8 +106,6 @@ func (t TokenType) String() string {
 		return "WordToken"
 	case SpaceToken:
 		return "SpaceToken"
 	case CommentToken:
 		return "CommentToken"
 	}
 }
@ -123,7 +118,6 @@ const (
 	escapingQuotedState                    // we have just consumed an escape rune within a quoted string
 	quotingEscapingState                   // we are within a quoted string that supports escaping ("...")
 	quotingState                           // we are within a string that does not support escaping ('...')
 	commentState                           // we are within a comment (everything following an unquoted or unescaped #
 )
 // tokenClassifier is used for classifying rune characters.
@ -142,7 +136,6 @@ func newDefaultClassifier() tokenClassifier {
 	t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
 	t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
 	t.addRuneClass(escapeRunes, escapeRuneClass)
 	t.addRuneClass(commentRunes, commentRuneClass)
 	return t
 }
@ -151,7 +144,7 @@ func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
 	return t[runeVal]
 }
-// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
+// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
 type Lexer Tokenizer
 // NewLexer creates a new lexer from an input stream.
@ -171,8 +164,8 @@ func (l *Lexer) Next() (string, error) {
 		switch token.tokenType {
 		case WordToken:
 			return token.value, nil
-		case CommentToken, SpaceToken:
+		case SpaceToken:
-			// skip comments and spaces
+			// skip spaces
 		default:
 			return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
 		}
@ -269,11 +262,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
 						tokenType = WordToken
 						state = escapingState
 					}
 				case commentRuneClass:
 					{
 						tokenType = CommentToken
 						state = commentState
 					}
 				default:
 					{
 						tokenType = WordToken
@ -417,34 +405,6 @@ func (t *Tokenizer) scanStream() (*Token, error) {
 					}
 				}
 			}
 		case commentState: // in a comment
 			{
 				switch nextRuneType {
 				case eofRuneClass:
 					{
 						token := &Token{
 							tokenType: tokenType,
 							value:     string(value)}
 						return token, err
 					}
 				case spaceRuneClass:
 					{
 						if nextRune == '\n' {
 							state = startState
 							token := &Token{
 								tokenType: tokenType,
 								value:     string(value)}
 							return token, err
 						} else {
 							value = append(value, nextRune)
 						}
 					}
 				default:
 					{
 						value = append(value, nextRune)
 					}
 				}
 			}
 		default:
 			{
 				return nil, fmt.Errorf("Unexpected state: %v", state)
--- a/tools/utils/shlex/shlex_test.go
+++ b/tools/utils/shlex/shlex_test.go
@ -24,7 +24,7 @@ import (
 var (
 	// one two "three four" "five \"six\"" seven#eight # nine # ten
 	// eleven 'twelve\'
-	testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten\n eleven 'twelve\\' thirteen=13 fourteen/14"
+	testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
 )
 func TestClassifier(t *testing.T) {
@ -32,8 +32,7 @@ func TestClassifier(t *testing.T) {
 	tests := map[rune]runeTokenClass{
 		' ':  spaceRuneClass,
 		'"':  escapingQuoteRuneClass,
-		'\'': nonEscapingQuoteRuneClass,
+		'\'': nonEscapingQuoteRuneClass}
 		'#':  commentRuneClass}
 	for runeChar, want := range tests {
 		got := classifier.ClassifyRune(runeChar)
 		if got != want {
@ -55,7 +54,13 @@ func TestTokenizer(t *testing.T) {
 		{SpaceToken, " "},
 		{WordToken, "seven#eight"},
 		{SpaceToken, " "},
-		{CommentToken, " nine # ten"},
+		{WordToken, "#"},
 		{SpaceToken, " "},
 		{WordToken, "nine"},
 		{SpaceToken, " "},
 		{WordToken, "#"},
 		{SpaceToken, " "},
 		{WordToken, "ten"},
 		{SpaceToken, " "},
 		{WordToken, "eleven"},
 		{SpaceToken, " "},
@ -79,7 +84,7 @@ func TestTokenizer(t *testing.T) {
 func TestLexer(t *testing.T) {
 	testInput := testString
-	expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
+	expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
 	lexer := NewLexer(strings.NewReader(testInput))
 	for i, want := range expectedStrings {
@ -94,7 +99,7 @@ func TestLexer(t *testing.T) {
 }
 func TestSplit(t *testing.T) {
-	want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
+	want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
 	got, err := Split(testString)
 	if err != nil {
 		t.Error(err)