| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276 |
- // Copyright 2015-2019 Brett Vickers.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package etree
- import (
- "bufio"
- "io"
- "strings"
- "unicode/utf8"
- )
- // A simple stack
- type stack struct {
- data []interface{}
- }
- func (s *stack) empty() bool {
- return len(s.data) == 0
- }
- func (s *stack) push(value interface{}) {
- s.data = append(s.data, value)
- }
- func (s *stack) pop() interface{} {
- value := s.data[len(s.data)-1]
- s.data[len(s.data)-1] = nil
- s.data = s.data[:len(s.data)-1]
- return value
- }
- func (s *stack) peek() interface{} {
- return s.data[len(s.data)-1]
- }
- // A fifo is a simple first-in-first-out queue.
- type fifo struct {
- data []interface{}
- head, tail int
- }
- func (f *fifo) add(value interface{}) {
- if f.len()+1 >= len(f.data) {
- f.grow()
- }
- f.data[f.tail] = value
- if f.tail++; f.tail == len(f.data) {
- f.tail = 0
- }
- }
- func (f *fifo) remove() interface{} {
- value := f.data[f.head]
- f.data[f.head] = nil
- if f.head++; f.head == len(f.data) {
- f.head = 0
- }
- return value
- }
- func (f *fifo) len() int {
- if f.tail >= f.head {
- return f.tail - f.head
- }
- return len(f.data) - f.head + f.tail
- }
- func (f *fifo) grow() {
- c := len(f.data) * 2
- if c == 0 {
- c = 4
- }
- buf, count := make([]interface{}, c), f.len()
- if f.tail >= f.head {
- copy(buf[0:count], f.data[f.head:f.tail])
- } else {
- hindex := len(f.data) - f.head
- copy(buf[0:hindex], f.data[f.head:])
- copy(buf[hindex:count], f.data[:f.tail])
- }
- f.data, f.head, f.tail = buf, 0, count
- }
- // countReader implements a proxy reader that counts the number of
- // bytes read from its encapsulated reader.
- type countReader struct {
- r io.Reader
- bytes int64
- }
- func newCountReader(r io.Reader) *countReader {
- return &countReader{r: r}
- }
- func (cr *countReader) Read(p []byte) (n int, err error) {
- b, err := cr.r.Read(p)
- cr.bytes += int64(b)
- return b, err
- }
- // countWriter implements a proxy writer that counts the number of
- // bytes written by its encapsulated writer.
- type countWriter struct {
- w io.Writer
- bytes int64
- }
- func newCountWriter(w io.Writer) *countWriter {
- return &countWriter{w: w}
- }
- func (cw *countWriter) Write(p []byte) (n int, err error) {
- b, err := cw.w.Write(p)
- cw.bytes += int64(b)
- return b, err
- }
- // isWhitespace returns true if the byte slice contains only
- // whitespace characters.
- func isWhitespace(s string) bool {
- for i := 0; i < len(s); i++ {
- if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
- return false
- }
- }
- return true
- }
- // spaceMatch returns true if namespace a is the empty string
- // or if namespace a equals namespace b.
- func spaceMatch(a, b string) bool {
- switch {
- case a == "":
- return true
- default:
- return a == b
- }
- }
- // spaceDecompose breaks a namespace:tag identifier at the ':'
- // and returns the two parts.
- func spaceDecompose(str string) (space, key string) {
- colon := strings.IndexByte(str, ':')
- if colon == -1 {
- return "", str
- }
- return str[:colon], str[colon+1:]
- }
- // Strings used by indentCRLF and indentLF
- const (
- indentSpaces = "\r\n "
- indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
- )
- // indentCRLF returns a CRLF newline followed by n copies of the first
- // non-CRLF character in the source string.
- func indentCRLF(n int, source string) string {
- switch {
- case n < 0:
- return source[:2]
- case n < len(source)-1:
- return source[:n+2]
- default:
- return source + strings.Repeat(source[2:3], n-len(source)+2)
- }
- }
- // indentLF returns a LF newline followed by n copies of the first non-LF
- // character in the source string.
- func indentLF(n int, source string) string {
- switch {
- case n < 0:
- return source[1:2]
- case n < len(source)-1:
- return source[1 : n+2]
- default:
- return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
- }
- }
- // nextIndex returns the index of the next occurrence of sep in s,
- // starting from offset. It returns -1 if the sep string is not found.
- func nextIndex(s, sep string, offset int) int {
- switch i := strings.Index(s[offset:], sep); i {
- case -1:
- return -1
- default:
- return offset + i
- }
- }
- // isInteger returns true if the string s contains an integer.
- func isInteger(s string) bool {
- for i := 0; i < len(s); i++ {
- if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
- return false
- }
- }
- return true
- }
- type escapeMode byte
- const (
- escapeNormal escapeMode = iota
- escapeCanonicalText
- escapeCanonicalAttr
- )
- // escapeString writes an escaped version of a string to the writer.
- func escapeString(w *bufio.Writer, s string, m escapeMode) {
- var esc []byte
- last := 0
- for i := 0; i < len(s); {
- r, width := utf8.DecodeRuneInString(s[i:])
- i += width
- switch r {
- case '&':
- esc = []byte("&")
- case '<':
- esc = []byte("<")
- case '>':
- if m == escapeCanonicalAttr {
- continue
- }
- esc = []byte(">")
- case '\'':
- if m != escapeNormal {
- continue
- }
- esc = []byte("'")
- case '"':
- if m == escapeCanonicalText {
- continue
- }
- esc = []byte(""")
- case '\t':
- if m != escapeCanonicalAttr {
- continue
- }
- esc = []byte("	")
- case '\n':
- if m != escapeCanonicalAttr {
- continue
- }
- esc = []byte("
")
- case '\r':
- if m == escapeNormal {
- continue
- }
- esc = []byte("
")
- default:
- if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
- esc = []byte("\uFFFD")
- break
- }
- continue
- }
- w.WriteString(s[last : i-width])
- w.Write(esc)
- last = i
- }
- w.WriteString(s[last:])
- }
- func isInCharacterRange(r rune) bool {
- return r == 0x09 ||
- r == 0x0A ||
- r == 0x0D ||
- r >= 0x20 && r <= 0xD7FF ||
- r >= 0xE000 && r <= 0xFFFD ||
- r >= 0x10000 && r <= 0x10FFFF
- }
|