lexer.go 2.36 KB
Newer Older
1
package caddyfile
2 3 4 5 6 7 8 9 10 11 12 13 14 15

import (
	"bufio"
	"io"
	"unicode"
)

type (
	// lexer is a utility which can get values, token by
	// token, from a Reader. A token is a word, and tokens
	// are separated by whitespace. A word can be enclosed
	// in quotes if it contains whitespace.
	lexer struct {
		reader *bufio.Reader
16
		token  Token
17 18 19
		line   int
	}

20 21 22 23 24
	// Token represents a single parsable unit.
	Token struct {
		File string
		Line int
		Text string
25 26 27 28
	}
)

// load prepares the lexer to scan an input for tokens.
29
// It discards any leading byte order mark.
30 31 32
func (l *lexer) load(input io.Reader) error {
	l.reader = bufio.NewReader(input)
	l.line = 1
33 34 35 36 37 38 39 40 41 42

	// discard byte order mark, if present
	firstCh, _, err := l.reader.ReadRune()
	if err == nil && firstCh != 0xFEFF {
		err := l.reader.UnreadRune()
		if err != nil {
			return err
		}
	}

43 44 45 46 47 48 49 50
	return nil
}

// next loads the next token into the lexer.
// A token is delimited by whitespace, unless
// the token starts with a quotes character (")
// in which case the token goes until the closing
// quotes (the enclosing quotes are not included).
51 52 53 54 55
// Inside quoted strings, quotes may be escaped
// with a preceding \ character. No other chars
// may be escaped. The rest of the line is skipped
// if a "#" character is read in. Returns true if
// a token was loaded; false otherwise.
56 57 58 59 60
func (l *lexer) next() bool {
	var val []rune
	var comment, quoted, escaped bool

	makeToken := func() bool {
61
		l.token.Text = string(val)
62 63 64 65 66 67 68 69 70 71 72 73
		return true
	}

	for {
		ch, _, err := l.reader.ReadRune()
		if err != nil {
			if len(val) > 0 {
				return makeToken()
			}
			if err == io.EOF {
				return false
			}
Zac Bergquist's avatar
Zac Bergquist committed
74
			panic(err)
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
		}

		if quoted {
			if !escaped {
				if ch == '\\' {
					escaped = true
					continue
				} else if ch == '"' {
					quoted = false
					return makeToken()
				}
			}
			if ch == '\n' {
				l.line++
			}
90 91 92 93 94 95
			if escaped {
				// only escape quotes
				if ch != '"' {
					val = append(val, '\\')
				}
			}
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
			val = append(val, ch)
			escaped = false
			continue
		}

		if unicode.IsSpace(ch) {
			if ch == '\r' {
				continue
			}
			if ch == '\n' {
				l.line++
				comment = false
			}
			if len(val) > 0 {
				return makeToken()
			}
			continue
		}

		if ch == '#' {
			comment = true
		}

		if comment {
			continue
		}

		if len(val) == 0 {
124
			l.token = Token{Line: l.line}
125 126 127 128 129 130 131 132 133
			if ch == '"' {
				quoted = true
				continue
			}
		}

		val = append(val, ch)
	}
}