Commit 22ac6a64 authored by Dawid Dziurla's avatar Dawid Dziurla

New upstream version 0.0~git20161002.648efa6

parents
shlex.test
Copyright (c) anmitsu <anmitsu.s@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# go-shlex
go-shlex is a library to make a lexical analyzer like Unix shell for
Go.
## Install
go get -u "github.com/anmitsu/go-shlex"
## Usage
```go
package main
import (
"fmt"
"log"
"github.com/anmitsu/go-shlex"
)
func main() {
cmd := `cp -Rdp "file name" 'file name2' dir\ name`
words, err := shlex.Split(cmd, true)
if err != nil {
log.Fatal(err)
}
for _, w := range words {
fmt.Println(w)
}
}
```
## Documentation
http://godoc.org/github.com/anmitsu/go-shlex
package shlex_test
import (
"fmt"
"log"
"github.com/anmitsu/go-shlex"
flynn_shlex "github.com/flynn/go-shlex"
)
func ExampleSplit() {
cmd := `cp -Rdp "file name" 'file name2' dir\ name`
// Split of cmd with POSIX mode.
words1, err := shlex.Split(cmd, true)
if err != nil {
log.Fatal(err)
}
// Split of cmd with Non-POSIX mode.
words2, err := shlex.Split(cmd, false)
if err != nil {
log.Fatal(err)
}
fmt.Println("Source command:")
fmt.Println(`cp -Rdp "file name" 'file name2' dir\ name`)
fmt.Println()
fmt.Println("POSIX mode:")
for _, word := range words1 {
fmt.Println(word)
}
fmt.Println()
fmt.Println("Non-POSIX mode:")
for _, word := range words2 {
fmt.Println(word)
}
// Output:
// Source command:
// cp -Rdp "file name" 'file name2' dir\ name
//
// POSIX mode:
// cp
// -Rdp
// file name
// file name2
// dir name
//
// Non-POSIX mode:
// cp
// -Rdp
// "file name"
// 'file name2'
// dir\
// name
}
func ExampleSplit_compareFlynn() {
cmd := `English and 日本語`
// Split for github.com/flynn/go-shlex imported as flynn_shlex
words_flynn, err1 := flynn_shlex.Split(cmd)
// Split for github.com/anmitsu/go-shlex
words_anmitsu, err2 := shlex.Split(cmd, true)
fmt.Println("Source string:")
fmt.Println(cmd)
fmt.Println()
fmt.Println("Result of github.com/flynn/go-shlex:")
for _, word := range words_flynn {
fmt.Println(word)
}
fmt.Println(err1.Error())
fmt.Println()
fmt.Println("Result of github.com/anmitsu/go-shlex:")
for _, word := range words_anmitsu {
fmt.Println(word)
}
if err2 != nil {
fmt.Println(err2.Error())
}
// Output:
// Source string:
// English and 日本語
//
// Result of github.com/flynn/go-shlex:
// English
// and
// Unknown rune: 26085
//
// Result of github.com/anmitsu/go-shlex:
// English
// and
// 日本語
}
// Package shlex provides a simple lexical analysis like Unix shell.
package shlex
import (
"bufio"
"errors"
"io"
"strings"
"unicode"
)
var (
ErrNoClosing = errors.New("No closing quotation")
ErrNoEscaped = errors.New("No escaped character")
)
// Tokenizer is the interface that classifies a token according to
// words, whitespaces, quotations, escapes and escaped quotations.
type Tokenizer interface {
IsWord(rune) bool
IsWhitespace(rune) bool
IsQuote(rune) bool
IsEscape(rune) bool
IsEscapedQuote(rune) bool
}
// DefaultTokenizer implements a simple tokenizer like Unix shell.
type DefaultTokenizer struct{}
func (t *DefaultTokenizer) IsWord(r rune) bool {
return r == '_' || unicode.IsLetter(r) || unicode.IsNumber(r)
}
func (t *DefaultTokenizer) IsQuote(r rune) bool {
switch r {
case '\'', '"':
return true
default:
return false
}
}
func (t *DefaultTokenizer) IsWhitespace(r rune) bool {
return unicode.IsSpace(r)
}
func (t *DefaultTokenizer) IsEscape(r rune) bool {
return r == '\\'
}
func (t *DefaultTokenizer) IsEscapedQuote(r rune) bool {
return r == '"'
}
// Lexer represents a lexical analyzer.
type Lexer struct {
reader *bufio.Reader
tokenizer Tokenizer
posix bool
whitespacesplit bool
}
// NewLexer creates a new Lexer reading from io.Reader. This Lexer
// has a DefaultTokenizer according to posix and whitespacesplit
// rules.
func NewLexer(r io.Reader, posix, whitespacesplit bool) *Lexer {
return &Lexer{
reader: bufio.NewReader(r),
tokenizer: &DefaultTokenizer{},
posix: posix,
whitespacesplit: whitespacesplit,
}
}
// NewLexerString creates a new Lexer reading from a string. This
// Lexer has a DefaultTokenizer according to posix and whitespacesplit
// rules.
func NewLexerString(s string, posix, whitespacesplit bool) *Lexer {
return NewLexer(strings.NewReader(s), posix, whitespacesplit)
}
// Split splits a string according to posix or non-posix rules.
func Split(s string, posix bool) ([]string, error) {
return NewLexerString(s, posix, true).Split()
}
// SetTokenizer sets a Tokenizer.
func (l *Lexer) SetTokenizer(t Tokenizer) {
l.tokenizer = t
}
func (l *Lexer) Split() ([]string, error) {
result := make([]string, 0)
for {
token, err := l.readToken()
if token != "" {
result = append(result, token)
}
if err == io.EOF {
break
} else if err != nil {
return result, err
}
}
return result, nil
}
func (l *Lexer) readToken() (string, error) {
t := l.tokenizer
token := ""
quoted := false
state := ' '
escapedstate := ' '
scanning:
for {
next, _, err := l.reader.ReadRune()
if err != nil {
if t.IsQuote(state) {
return token, ErrNoClosing
} else if t.IsEscape(state) {
return token, ErrNoEscaped
}
return token, err
}
switch {
case t.IsWhitespace(state):
switch {
case t.IsWhitespace(next):
break scanning
case l.posix && t.IsEscape(next):
escapedstate = 'a'
state = next
case t.IsWord(next):
token += string(next)
state = 'a'
case t.IsQuote(next):
if !l.posix {
token += string(next)
}
state = next
default:
token = string(next)
if l.whitespacesplit {
state = 'a'
} else if token != "" || (l.posix && quoted) {
break scanning
}
}
case t.IsQuote(state):
quoted = true
switch {
case next == state:
if !l.posix {
token += string(next)
break scanning
} else {
state = 'a'
}
case l.posix && t.IsEscape(next) && t.IsEscapedQuote(state):
escapedstate = state
state = next
default:
token += string(next)
}
case t.IsEscape(state):
if t.IsQuote(escapedstate) && next != state && next != escapedstate {
token += string(state)
}
token += string(next)
state = escapedstate
case t.IsWord(state):
switch {
case t.IsWhitespace(next):
if token != "" || (l.posix && quoted) {
break scanning
}
case l.posix && t.IsQuote(next):
state = next
case l.posix && t.IsEscape(next):
escapedstate = 'a'
state = next
case t.IsWord(next) || t.IsQuote(next):
token += string(next)
default:
if l.whitespacesplit {
token += string(next)
} else if token != "" {
l.reader.UnreadRune()
break scanning
}
}
}
}
return token, nil
}
package shlex
import (
"fmt"
"testing"
)
var datanonposix = []struct {
in string
out []string
err error
}{
{`This string has an embedded apostrophe, doesn't it?`,
[]string{
"This",
"string",
"has",
"an",
"embedded",
"apostrophe",
",",
"doesn't",
"it",
"?",
},
nil,
},
{"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
[]string{
"This",
"string",
"has",
"embedded",
`"double quotes"`,
"and",
`'single quotes'`,
"in",
"it",
",",
"and",
"even",
`"a 'nested example'"`,
".",
},
nil,
},
{`Hello world!, こんにちは 世界!`,
[]string{
"Hello",
"world",
"!",
",",
"こんにちは",
"世界",
"!",
},
nil,
},
{`Do"Not"Separate`,
[]string{`Do"Not"Separate`},
nil,
},
{`"Do"Separate`,
[]string{`"Do"`, "Separate"},
nil,
},
{`Escaped \e Character not in quotes`,
[]string{
"Escaped",
`\`,
"e",
"Character",
"not",
"in",
"quotes",
},
nil,
},
{`Escaped "\e" Character in double quotes`,
[]string{
"Escaped",
`"\e"`,
"Character",
"in",
"double",
"quotes",
},
nil,
},
{`Escaped '\e' Character in single quotes`,
[]string{
"Escaped",
`'\e'`,
"Character",
"in",
"single",
"quotes",
},
nil,
},
{`Escaped '\'' \"\'\" single quote`,
[]string{
"Escaped",
`'\'`,
`' \"\'`,
`\`,
`" single quote`,
},
ErrNoClosing,
},
{`Escaped "\"" \'\"\' double quote`,
[]string{
"Escaped",
`"\"`,
`" \'\"`,
`\`,
`' double quote`,
},
ErrNoClosing,
},
{`"'Strip extra layer of quotes'"`,
[]string{`"'Strip extra layer of quotes'"`},
nil,
},
}
var dataposix = []struct {
in string
out []string
err error
}{
{`This string has an embedded apostrophe, doesn't it?`,
[]string{
"This",
"string",
"has",
"an",
"embedded",
"apostrophe",
",",
"doesnt it?",
},
ErrNoClosing,
},
{"This string has embedded \"double quotes\" and 'single quotes' in it,\nand even \"a 'nested example'\".\n",
[]string{
"This",
"string",
"has",
"embedded",
`double quotes`,
"and",
`single quotes`,
"in",
"it",
",",
"and",
"even",
`a 'nested example'`,
".",
},
nil,
},
{`Hello world!, こんにちは 世界!`,
[]string{
"Hello",
"world",
"!",
",",
"こんにちは",
"世界",
"!",
},
nil,
},
{`Do"Not"Separate`,
[]string{`DoNotSeparate`},
nil,
},
{`"Do"Separate`,
[]string{"DoSeparate"},
nil,
},
{`Escaped \e Character not in quotes`,
[]string{
"Escaped",
"e",
"Character",
"not",
"in",
"quotes",
},
nil,
},
{`Escaped "\e" Character in double quotes`,
[]string{
"Escaped",
`\e`,
"Character",
"in",
"double",
"quotes",
},
nil,
},
{`Escaped '\e' Character in single quotes`,
[]string{
"Escaped",
`\e`,
"Character",
"in",
"single",
"quotes",
},
nil,
},
{`Escaped '\'' \"\'\" single quote`,
[]string{
"Escaped",
`\ \"\"`,
"single",
"quote",
},
nil,
},
{`Escaped "\"" \'\"\' double quote`,
[]string{
"Escaped",
`"`,
`'"'`,
"double",
"quote",
},
nil,
},
{`"'Strip extra layer of quotes'"`,
[]string{`'Strip extra layer of quotes'`},
nil,
},
}
func TestSplitNonPOSIX(t *testing.T) {
testSplit(t, false)
}
func TestSplitPOSIX(t *testing.T) {
testSplit(t, true)
}
func testSplit(t *testing.T, posix bool) {
var data []struct {
in string
out []string
err error
}
if posix {
data = dataposix
} else {
data = datanonposix
}
for _, d := range data {
t.Logf("Spliting: `%s'", d.in)
result, err := NewLexerString(d.in, posix, false).Split()
// check closing and escaped error
if err != d.err {
printToken(result)
t.Fatalf("Error expected: `%v', but result catched: `%v'",
d.err, err)
}
// check splited number
if len(result) != len(d.out) {
printToken(result)
t.Fatalf("Split expeced: `%d', but result founds: `%d'",
len(d.out), len(result))
}
// check words
for j, out := range d.out {
if result[j] != out {
printToken(result)
t.Fatalf("Word expeced: `%s', but result founds: `%s' in %d",
out, result[j], j)
}
}
t.Log("ok")
}
}
func printToken(s []string) {
for _, token := range s {
fmt.Println(token)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment