helpers.go 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. // Copyright 2015-2019 Brett Vickers.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package etree
  5. import (
  6. "bufio"
  7. "io"
  8. "strings"
  9. "unicode/utf8"
  10. )
  11. // A simple stack
  12. type stack struct {
  13. data []interface{}
  14. }
  15. func (s *stack) empty() bool {
  16. return len(s.data) == 0
  17. }
  18. func (s *stack) push(value interface{}) {
  19. s.data = append(s.data, value)
  20. }
  21. func (s *stack) pop() interface{} {
  22. value := s.data[len(s.data)-1]
  23. s.data[len(s.data)-1] = nil
  24. s.data = s.data[:len(s.data)-1]
  25. return value
  26. }
  27. func (s *stack) peek() interface{} {
  28. return s.data[len(s.data)-1]
  29. }
  30. // A fifo is a simple first-in-first-out queue.
  31. type fifo struct {
  32. data []interface{}
  33. head, tail int
  34. }
  35. func (f *fifo) add(value interface{}) {
  36. if f.len()+1 >= len(f.data) {
  37. f.grow()
  38. }
  39. f.data[f.tail] = value
  40. if f.tail++; f.tail == len(f.data) {
  41. f.tail = 0
  42. }
  43. }
  44. func (f *fifo) remove() interface{} {
  45. value := f.data[f.head]
  46. f.data[f.head] = nil
  47. if f.head++; f.head == len(f.data) {
  48. f.head = 0
  49. }
  50. return value
  51. }
  52. func (f *fifo) len() int {
  53. if f.tail >= f.head {
  54. return f.tail - f.head
  55. }
  56. return len(f.data) - f.head + f.tail
  57. }
  58. func (f *fifo) grow() {
  59. c := len(f.data) * 2
  60. if c == 0 {
  61. c = 4
  62. }
  63. buf, count := make([]interface{}, c), f.len()
  64. if f.tail >= f.head {
  65. copy(buf[0:count], f.data[f.head:f.tail])
  66. } else {
  67. hindex := len(f.data) - f.head
  68. copy(buf[0:hindex], f.data[f.head:])
  69. copy(buf[hindex:count], f.data[:f.tail])
  70. }
  71. f.data, f.head, f.tail = buf, 0, count
  72. }
  73. // countReader implements a proxy reader that counts the number of
  74. // bytes read from its encapsulated reader.
  75. type countReader struct {
  76. r io.Reader
  77. bytes int64
  78. }
  79. func newCountReader(r io.Reader) *countReader {
  80. return &countReader{r: r}
  81. }
  82. func (cr *countReader) Read(p []byte) (n int, err error) {
  83. b, err := cr.r.Read(p)
  84. cr.bytes += int64(b)
  85. return b, err
  86. }
  87. // countWriter implements a proxy writer that counts the number of
  88. // bytes written by its encapsulated writer.
  89. type countWriter struct {
  90. w io.Writer
  91. bytes int64
  92. }
  93. func newCountWriter(w io.Writer) *countWriter {
  94. return &countWriter{w: w}
  95. }
  96. func (cw *countWriter) Write(p []byte) (n int, err error) {
  97. b, err := cw.w.Write(p)
  98. cw.bytes += int64(b)
  99. return b, err
  100. }
  101. // isWhitespace returns true if the byte slice contains only
  102. // whitespace characters.
  103. func isWhitespace(s string) bool {
  104. for i := 0; i < len(s); i++ {
  105. if c := s[i]; c != ' ' && c != '\t' && c != '\n' && c != '\r' {
  106. return false
  107. }
  108. }
  109. return true
  110. }
  111. // spaceMatch returns true if namespace a is the empty string
  112. // or if namespace a equals namespace b.
  113. func spaceMatch(a, b string) bool {
  114. switch {
  115. case a == "":
  116. return true
  117. default:
  118. return a == b
  119. }
  120. }
  121. // spaceDecompose breaks a namespace:tag identifier at the ':'
  122. // and returns the two parts.
  123. func spaceDecompose(str string) (space, key string) {
  124. colon := strings.IndexByte(str, ':')
  125. if colon == -1 {
  126. return "", str
  127. }
  128. return str[:colon], str[colon+1:]
  129. }
  130. // Strings used by indentCRLF and indentLF
  131. const (
  132. indentSpaces = "\r\n "
  133. indentTabs = "\r\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"
  134. )
  135. // indentCRLF returns a CRLF newline followed by n copies of the first
  136. // non-CRLF character in the source string.
  137. func indentCRLF(n int, source string) string {
  138. switch {
  139. case n < 0:
  140. return source[:2]
  141. case n < len(source)-1:
  142. return source[:n+2]
  143. default:
  144. return source + strings.Repeat(source[2:3], n-len(source)+2)
  145. }
  146. }
  147. // indentLF returns a LF newline followed by n copies of the first non-LF
  148. // character in the source string.
  149. func indentLF(n int, source string) string {
  150. switch {
  151. case n < 0:
  152. return source[1:2]
  153. case n < len(source)-1:
  154. return source[1 : n+2]
  155. default:
  156. return source[1:] + strings.Repeat(source[2:3], n-len(source)+2)
  157. }
  158. }
  159. // nextIndex returns the index of the next occurrence of sep in s,
  160. // starting from offset. It returns -1 if the sep string is not found.
  161. func nextIndex(s, sep string, offset int) int {
  162. switch i := strings.Index(s[offset:], sep); i {
  163. case -1:
  164. return -1
  165. default:
  166. return offset + i
  167. }
  168. }
  169. // isInteger returns true if the string s contains an integer.
  170. func isInteger(s string) bool {
  171. for i := 0; i < len(s); i++ {
  172. if (s[i] < '0' || s[i] > '9') && !(i == 0 && s[i] == '-') {
  173. return false
  174. }
  175. }
  176. return true
  177. }
  178. type escapeMode byte
  179. const (
  180. escapeNormal escapeMode = iota
  181. escapeCanonicalText
  182. escapeCanonicalAttr
  183. )
  184. // escapeString writes an escaped version of a string to the writer.
  185. func escapeString(w *bufio.Writer, s string, m escapeMode) {
  186. var esc []byte
  187. last := 0
  188. for i := 0; i < len(s); {
  189. r, width := utf8.DecodeRuneInString(s[i:])
  190. i += width
  191. switch r {
  192. case '&':
  193. esc = []byte("&amp;")
  194. case '<':
  195. esc = []byte("&lt;")
  196. case '>':
  197. if m == escapeCanonicalAttr {
  198. continue
  199. }
  200. esc = []byte("&gt;")
  201. case '\'':
  202. if m != escapeNormal {
  203. continue
  204. }
  205. esc = []byte("&apos;")
  206. case '"':
  207. if m == escapeCanonicalText {
  208. continue
  209. }
  210. esc = []byte("&quot;")
  211. case '\t':
  212. if m != escapeCanonicalAttr {
  213. continue
  214. }
  215. esc = []byte("&#x9;")
  216. case '\n':
  217. if m != escapeCanonicalAttr {
  218. continue
  219. }
  220. esc = []byte("&#xA;")
  221. case '\r':
  222. if m == escapeNormal {
  223. continue
  224. }
  225. esc = []byte("&#xD;")
  226. default:
  227. if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
  228. esc = []byte("\uFFFD")
  229. break
  230. }
  231. continue
  232. }
  233. w.WriteString(s[last : i-width])
  234. w.Write(esc)
  235. last = i
  236. }
  237. w.WriteString(s[last:])
  238. }
  239. func isInCharacterRange(r rune) bool {
  240. return r == 0x09 ||
  241. r == 0x0A ||
  242. r == 0x0D ||
  243. r >= 0x20 && r <= 0xD7FF ||
  244. r >= 0xE000 && r <= 0xFFFD ||
  245. r >= 0x10000 && r <= 0x10FFFF
  246. }