lex.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874
  1. package toml
  2. import (
  3. "fmt"
  4. "strings"
  5. "unicode/utf8"
  6. )
  7. type itemType int
  8. const (
  9. itemError itemType = iota
  10. itemNIL // used in the parser to indicate no type
  11. itemEOF
  12. itemText
  13. itemString
  14. itemRawString
  15. itemMultilineString
  16. itemRawMultilineString
  17. itemBool
  18. itemInteger
  19. itemFloat
  20. itemDatetime
  21. itemArray // the start of an array
  22. itemArrayEnd
  23. itemTableStart
  24. itemTableEnd
  25. itemArrayTableStart
  26. itemArrayTableEnd
  27. itemKeyStart
  28. itemCommentStart
  29. )
  30. const (
  31. eof = 0
  32. tableStart = '['
  33. tableEnd = ']'
  34. arrayTableStart = '['
  35. arrayTableEnd = ']'
  36. tableSep = '.'
  37. keySep = '='
  38. arrayStart = '['
  39. arrayEnd = ']'
  40. arrayValTerm = ','
  41. commentStart = '#'
  42. stringStart = '"'
  43. stringEnd = '"'
  44. rawStringStart = '\''
  45. rawStringEnd = '\''
  46. )
  47. type stateFn func(lx *lexer) stateFn
  48. type lexer struct {
  49. input string
  50. start int
  51. pos int
  52. width int
  53. line int
  54. state stateFn
  55. items chan item
  56. // A stack of state functions used to maintain context.
  57. // The idea is to reuse parts of the state machine in various places.
  58. // For example, values can appear at the top level or within arbitrarily
  59. // nested arrays. The last state on the stack is used after a value has
  60. // been lexed. Similarly for comments.
  61. stack []stateFn
  62. }
  63. type item struct {
  64. typ itemType
  65. val string
  66. line int
  67. }
  68. func (lx *lexer) nextItem() item {
  69. for {
  70. select {
  71. case item := <-lx.items:
  72. return item
  73. default:
  74. lx.state = lx.state(lx)
  75. }
  76. }
  77. }
  78. func lex(input string) *lexer {
  79. lx := &lexer{
  80. input: input + "\n",
  81. state: lexTop,
  82. line: 1,
  83. items: make(chan item, 10),
  84. stack: make([]stateFn, 0, 10),
  85. }
  86. return lx
  87. }
  88. func (lx *lexer) push(state stateFn) {
  89. lx.stack = append(lx.stack, state)
  90. }
  91. func (lx *lexer) pop() stateFn {
  92. if len(lx.stack) == 0 {
  93. return lx.errorf("BUG in lexer: no states to pop.")
  94. }
  95. last := lx.stack[len(lx.stack)-1]
  96. lx.stack = lx.stack[0 : len(lx.stack)-1]
  97. return last
  98. }
  99. func (lx *lexer) current() string {
  100. return lx.input[lx.start:lx.pos]
  101. }
  102. func (lx *lexer) emit(typ itemType) {
  103. lx.items <- item{typ, lx.current(), lx.line}
  104. lx.start = lx.pos
  105. }
  106. func (lx *lexer) emitTrim(typ itemType) {
  107. lx.items <- item{typ, strings.TrimSpace(lx.current()), lx.line}
  108. lx.start = lx.pos
  109. }
  110. func (lx *lexer) next() (r rune) {
  111. if lx.pos >= len(lx.input) {
  112. lx.width = 0
  113. return eof
  114. }
  115. if lx.input[lx.pos] == '\n' {
  116. lx.line++
  117. }
  118. r, lx.width = utf8.DecodeRuneInString(lx.input[lx.pos:])
  119. lx.pos += lx.width
  120. return r
  121. }
  122. // ignore skips over the pending input before this point.
  123. func (lx *lexer) ignore() {
  124. lx.start = lx.pos
  125. }
  126. // backup steps back one rune. Can be called only once per call of next.
  127. func (lx *lexer) backup() {
  128. lx.pos -= lx.width
  129. if lx.pos < len(lx.input) && lx.input[lx.pos] == '\n' {
  130. lx.line--
  131. }
  132. }
  133. // accept consumes the next rune if it's equal to `valid`.
  134. func (lx *lexer) accept(valid rune) bool {
  135. if lx.next() == valid {
  136. return true
  137. }
  138. lx.backup()
  139. return false
  140. }
  141. // peek returns but does not consume the next rune in the input.
  142. func (lx *lexer) peek() rune {
  143. r := lx.next()
  144. lx.backup()
  145. return r
  146. }
  147. // errorf stops all lexing by emitting an error and returning `nil`.
  148. // Note that any value that is a character is escaped if it's a special
  149. // character (new lines, tabs, etc.).
  150. func (lx *lexer) errorf(format string, values ...interface{}) stateFn {
  151. lx.items <- item{
  152. itemError,
  153. fmt.Sprintf(format, values...),
  154. lx.line,
  155. }
  156. return nil
  157. }
  158. // lexTop consumes elements at the top level of TOML data.
  159. func lexTop(lx *lexer) stateFn {
  160. r := lx.next()
  161. if isWhitespace(r) || isNL(r) {
  162. return lexSkip(lx, lexTop)
  163. }
  164. switch r {
  165. case commentStart:
  166. lx.push(lexTop)
  167. return lexCommentStart
  168. case tableStart:
  169. return lexTableStart
  170. case eof:
  171. if lx.pos > lx.start {
  172. return lx.errorf("Unexpected EOF.")
  173. }
  174. lx.emit(itemEOF)
  175. return nil
  176. }
  177. // At this point, the only valid item can be a key, so we back up
  178. // and let the key lexer do the rest.
  179. lx.backup()
  180. lx.push(lexTopEnd)
  181. return lexKeyStart
  182. }
  183. // lexTopEnd is entered whenever a top-level item has been consumed. (A value
  184. // or a table.) It must see only whitespace, and will turn back to lexTop
  185. // upon a new line. If it sees EOF, it will quit the lexer successfully.
  186. func lexTopEnd(lx *lexer) stateFn {
  187. r := lx.next()
  188. switch {
  189. case r == commentStart:
  190. // a comment will read to a new line for us.
  191. lx.push(lexTop)
  192. return lexCommentStart
  193. case isWhitespace(r):
  194. return lexTopEnd
  195. case isNL(r):
  196. lx.ignore()
  197. return lexTop
  198. case r == eof:
  199. lx.ignore()
  200. return lexTop
  201. }
  202. return lx.errorf("Expected a top-level item to end with a new line, "+
  203. "comment or EOF, but got %q instead.", r)
  204. }
  205. // lexTable lexes the beginning of a table. Namely, it makes sure that
  206. // it starts with a character other than '.' and ']'.
  207. // It assumes that '[' has already been consumed.
  208. // It also handles the case that this is an item in an array of tables.
  209. // e.g., '[[name]]'.
  210. func lexTableStart(lx *lexer) stateFn {
  211. if lx.peek() == arrayTableStart {
  212. lx.next()
  213. lx.emit(itemArrayTableStart)
  214. lx.push(lexArrayTableEnd)
  215. } else {
  216. lx.emit(itemTableStart)
  217. lx.push(lexTableEnd)
  218. }
  219. return lexTableNameStart
  220. }
  221. func lexTableEnd(lx *lexer) stateFn {
  222. lx.emit(itemTableEnd)
  223. return lexTopEnd
  224. }
  225. func lexArrayTableEnd(lx *lexer) stateFn {
  226. if r := lx.next(); r != arrayTableEnd {
  227. return lx.errorf("Expected end of table array name delimiter %q, "+
  228. "but got %q instead.", arrayTableEnd, r)
  229. }
  230. lx.emit(itemArrayTableEnd)
  231. return lexTopEnd
  232. }
  233. func lexTableNameStart(lx *lexer) stateFn {
  234. switch r := lx.peek(); {
  235. case r == tableEnd || r == eof:
  236. return lx.errorf("Unexpected end of table name. (Table names cannot " +
  237. "be empty.)")
  238. case r == tableSep:
  239. return lx.errorf("Unexpected table separator. (Table names cannot " +
  240. "be empty.)")
  241. case r == stringStart || r == rawStringStart:
  242. lx.ignore()
  243. lx.push(lexTableNameEnd)
  244. return lexValue // reuse string lexing
  245. case isWhitespace(r):
  246. return lexTableNameStart
  247. default:
  248. return lexBareTableName
  249. }
  250. }
  251. // lexTableName lexes the name of a table. It assumes that at least one
  252. // valid character for the table has already been read.
  253. func lexBareTableName(lx *lexer) stateFn {
  254. switch r := lx.next(); {
  255. case isBareKeyChar(r):
  256. return lexBareTableName
  257. case r == tableSep || r == tableEnd:
  258. lx.backup()
  259. lx.emitTrim(itemText)
  260. return lexTableNameEnd
  261. default:
  262. return lx.errorf("Bare keys cannot contain %q.", r)
  263. }
  264. }
  265. // lexTableNameEnd reads the end of a piece of a table name, optionally
  266. // consuming whitespace.
  267. func lexTableNameEnd(lx *lexer) stateFn {
  268. switch r := lx.next(); {
  269. case isWhitespace(r):
  270. return lexTableNameEnd
  271. case r == tableSep:
  272. lx.ignore()
  273. return lexTableNameStart
  274. case r == tableEnd:
  275. return lx.pop()
  276. default:
  277. return lx.errorf("Expected '.' or ']' to end table name, but got %q "+
  278. "instead.", r)
  279. }
  280. }
  281. // lexKeyStart consumes a key name up until the first non-whitespace character.
  282. // lexKeyStart will ignore whitespace.
  283. func lexKeyStart(lx *lexer) stateFn {
  284. r := lx.peek()
  285. switch {
  286. case r == keySep:
  287. return lx.errorf("Unexpected key separator %q.", keySep)
  288. case isWhitespace(r) || isNL(r):
  289. lx.next()
  290. return lexSkip(lx, lexKeyStart)
  291. case r == stringStart || r == rawStringStart:
  292. lx.ignore()
  293. lx.emit(itemKeyStart)
  294. lx.push(lexKeyEnd)
  295. return lexValue // reuse string lexing
  296. default:
  297. lx.ignore()
  298. lx.emit(itemKeyStart)
  299. return lexBareKey
  300. }
  301. }
  302. // lexBareKey consumes the text of a bare key. Assumes that the first character
  303. // (which is not whitespace) has not yet been consumed.
  304. func lexBareKey(lx *lexer) stateFn {
  305. switch r := lx.next(); {
  306. case isBareKeyChar(r):
  307. return lexBareKey
  308. case isWhitespace(r):
  309. lx.emitTrim(itemText)
  310. return lexKeyEnd
  311. case r == keySep:
  312. lx.backup()
  313. lx.emitTrim(itemText)
  314. return lexKeyEnd
  315. default:
  316. return lx.errorf("Bare keys cannot contain %q.", r)
  317. }
  318. }
  319. // lexKeyEnd consumes the end of a key and trims whitespace (up to the key
  320. // separator).
  321. func lexKeyEnd(lx *lexer) stateFn {
  322. switch r := lx.next(); {
  323. case r == keySep:
  324. return lexSkip(lx, lexValue)
  325. case isWhitespace(r):
  326. return lexSkip(lx, lexKeyEnd)
  327. default:
  328. return lx.errorf("Expected key separator %q, but got %q instead.",
  329. keySep, r)
  330. }
  331. }
  332. // lexValue starts the consumption of a value anywhere a value is expected.
  333. // lexValue will ignore whitespace.
  334. // After a value is lexed, the last state on the next is popped and returned.
  335. func lexValue(lx *lexer) stateFn {
  336. // We allow whitespace to precede a value, but NOT new lines.
  337. // In array syntax, the array states are responsible for ignoring new
  338. // lines.
  339. r := lx.next()
  340. if isWhitespace(r) {
  341. return lexSkip(lx, lexValue)
  342. }
  343. switch {
  344. case r == arrayStart:
  345. lx.ignore()
  346. lx.emit(itemArray)
  347. return lexArrayValue
  348. case r == stringStart:
  349. if lx.accept(stringStart) {
  350. if lx.accept(stringStart) {
  351. lx.ignore() // Ignore """
  352. return lexMultilineString
  353. }
  354. lx.backup()
  355. }
  356. lx.ignore() // ignore the '"'
  357. return lexString
  358. case r == rawStringStart:
  359. if lx.accept(rawStringStart) {
  360. if lx.accept(rawStringStart) {
  361. lx.ignore() // Ignore """
  362. return lexMultilineRawString
  363. }
  364. lx.backup()
  365. }
  366. lx.ignore() // ignore the "'"
  367. return lexRawString
  368. case r == 't':
  369. return lexTrue
  370. case r == 'f':
  371. return lexFalse
  372. case r == '-':
  373. return lexNumberStart
  374. case isDigit(r):
  375. lx.backup() // avoid an extra state and use the same as above
  376. return lexNumberOrDateStart
  377. case r == '.': // special error case, be kind to users
  378. return lx.errorf("Floats must start with a digit, not '.'.")
  379. }
  380. return lx.errorf("Expected value but found %q instead.", r)
  381. }
  382. // lexArrayValue consumes one value in an array. It assumes that '[' or ','
  383. // have already been consumed. All whitespace and new lines are ignored.
  384. func lexArrayValue(lx *lexer) stateFn {
  385. r := lx.next()
  386. switch {
  387. case isWhitespace(r) || isNL(r):
  388. return lexSkip(lx, lexArrayValue)
  389. case r == commentStart:
  390. lx.push(lexArrayValue)
  391. return lexCommentStart
  392. case r == arrayValTerm:
  393. return lx.errorf("Unexpected array value terminator %q.",
  394. arrayValTerm)
  395. case r == arrayEnd:
  396. return lexArrayEnd
  397. }
  398. lx.backup()
  399. lx.push(lexArrayValueEnd)
  400. return lexValue
  401. }
  402. // lexArrayValueEnd consumes the cruft between values of an array. Namely,
  403. // it ignores whitespace and expects either a ',' or a ']'.
  404. func lexArrayValueEnd(lx *lexer) stateFn {
  405. r := lx.next()
  406. switch {
  407. case isWhitespace(r) || isNL(r):
  408. return lexSkip(lx, lexArrayValueEnd)
  409. case r == commentStart:
  410. lx.push(lexArrayValueEnd)
  411. return lexCommentStart
  412. case r == arrayValTerm:
  413. lx.ignore()
  414. return lexArrayValue // move on to the next value
  415. case r == arrayEnd:
  416. return lexArrayEnd
  417. }
  418. return lx.errorf("Expected an array value terminator %q or an array "+
  419. "terminator %q, but got %q instead.", arrayValTerm, arrayEnd, r)
  420. }
  421. // lexArrayEnd finishes the lexing of an array. It assumes that a ']' has
  422. // just been consumed.
  423. func lexArrayEnd(lx *lexer) stateFn {
  424. lx.ignore()
  425. lx.emit(itemArrayEnd)
  426. return lx.pop()
  427. }
  428. // lexString consumes the inner contents of a string. It assumes that the
  429. // beginning '"' has already been consumed and ignored.
  430. func lexString(lx *lexer) stateFn {
  431. r := lx.next()
  432. switch {
  433. case isNL(r):
  434. return lx.errorf("Strings cannot contain new lines.")
  435. case r == '\\':
  436. lx.push(lexString)
  437. return lexStringEscape
  438. case r == stringEnd:
  439. lx.backup()
  440. lx.emit(itemString)
  441. lx.next()
  442. lx.ignore()
  443. return lx.pop()
  444. }
  445. return lexString
  446. }
  447. // lexMultilineString consumes the inner contents of a string. It assumes that
  448. // the beginning '"""' has already been consumed and ignored.
  449. func lexMultilineString(lx *lexer) stateFn {
  450. r := lx.next()
  451. switch {
  452. case r == '\\':
  453. return lexMultilineStringEscape
  454. case r == stringEnd:
  455. if lx.accept(stringEnd) {
  456. if lx.accept(stringEnd) {
  457. lx.backup()
  458. lx.backup()
  459. lx.backup()
  460. lx.emit(itemMultilineString)
  461. lx.next()
  462. lx.next()
  463. lx.next()
  464. lx.ignore()
  465. return lx.pop()
  466. }
  467. lx.backup()
  468. }
  469. }
  470. return lexMultilineString
  471. }
  472. // lexRawString consumes a raw string. Nothing can be escaped in such a string.
  473. // It assumes that the beginning "'" has already been consumed and ignored.
  474. func lexRawString(lx *lexer) stateFn {
  475. r := lx.next()
  476. switch {
  477. case isNL(r):
  478. return lx.errorf("Strings cannot contain new lines.")
  479. case r == rawStringEnd:
  480. lx.backup()
  481. lx.emit(itemRawString)
  482. lx.next()
  483. lx.ignore()
  484. return lx.pop()
  485. }
  486. return lexRawString
  487. }
  488. // lexMultilineRawString consumes a raw string. Nothing can be escaped in such
  489. // a string. It assumes that the beginning "'" has already been consumed and
  490. // ignored.
  491. func lexMultilineRawString(lx *lexer) stateFn {
  492. r := lx.next()
  493. switch {
  494. case r == rawStringEnd:
  495. if lx.accept(rawStringEnd) {
  496. if lx.accept(rawStringEnd) {
  497. lx.backup()
  498. lx.backup()
  499. lx.backup()
  500. lx.emit(itemRawMultilineString)
  501. lx.next()
  502. lx.next()
  503. lx.next()
  504. lx.ignore()
  505. return lx.pop()
  506. }
  507. lx.backup()
  508. }
  509. }
  510. return lexMultilineRawString
  511. }
  512. // lexMultilineStringEscape consumes an escaped character. It assumes that the
  513. // preceding '\\' has already been consumed.
  514. func lexMultilineStringEscape(lx *lexer) stateFn {
  515. // Handle the special case first:
  516. if isNL(lx.next()) {
  517. lx.next()
  518. return lexMultilineString
  519. } else {
  520. lx.backup()
  521. lx.push(lexMultilineString)
  522. return lexStringEscape(lx)
  523. }
  524. }
  525. func lexStringEscape(lx *lexer) stateFn {
  526. r := lx.next()
  527. switch r {
  528. case 'b':
  529. fallthrough
  530. case 't':
  531. fallthrough
  532. case 'n':
  533. fallthrough
  534. case 'f':
  535. fallthrough
  536. case 'r':
  537. fallthrough
  538. case '"':
  539. fallthrough
  540. case '\\':
  541. return lx.pop()
  542. case 'u':
  543. return lexShortUnicodeEscape
  544. case 'U':
  545. return lexLongUnicodeEscape
  546. }
  547. return lx.errorf("Invalid escape character %q. Only the following "+
  548. "escape characters are allowed: "+
  549. "\\b, \\t, \\n, \\f, \\r, \\\", \\/, \\\\, "+
  550. "\\uXXXX and \\UXXXXXXXX.", r)
  551. }
  552. func lexShortUnicodeEscape(lx *lexer) stateFn {
  553. var r rune
  554. for i := 0; i < 4; i++ {
  555. r = lx.next()
  556. if !isHexadecimal(r) {
  557. return lx.errorf("Expected four hexadecimal digits after '\\u', "+
  558. "but got '%s' instead.", lx.current())
  559. }
  560. }
  561. return lx.pop()
  562. }
  563. func lexLongUnicodeEscape(lx *lexer) stateFn {
  564. var r rune
  565. for i := 0; i < 8; i++ {
  566. r = lx.next()
  567. if !isHexadecimal(r) {
  568. return lx.errorf("Expected eight hexadecimal digits after '\\U', "+
  569. "but got '%s' instead.", lx.current())
  570. }
  571. }
  572. return lx.pop()
  573. }
  574. // lexNumberOrDateStart consumes either a (positive) integer, float or
  575. // datetime. It assumes that NO negative sign has been consumed.
  576. func lexNumberOrDateStart(lx *lexer) stateFn {
  577. r := lx.next()
  578. if !isDigit(r) {
  579. if r == '.' {
  580. return lx.errorf("Floats must start with a digit, not '.'.")
  581. } else {
  582. return lx.errorf("Expected a digit but got %q.", r)
  583. }
  584. }
  585. return lexNumberOrDate
  586. }
  587. // lexNumberOrDate consumes either a (positive) integer, float or datetime.
  588. func lexNumberOrDate(lx *lexer) stateFn {
  589. r := lx.next()
  590. switch {
  591. case r == '-':
  592. if lx.pos-lx.start != 5 {
  593. return lx.errorf("All ISO8601 dates must be in full Zulu form.")
  594. }
  595. return lexDateAfterYear
  596. case isDigit(r):
  597. return lexNumberOrDate
  598. case r == '.':
  599. return lexFloatStart
  600. }
  601. lx.backup()
  602. lx.emit(itemInteger)
  603. return lx.pop()
  604. }
  605. // lexDateAfterYear consumes a full Zulu Datetime in ISO8601 format.
  606. // It assumes that "YYYY-" has already been consumed.
  607. func lexDateAfterYear(lx *lexer) stateFn {
  608. formats := []rune{
  609. // digits are '0'.
  610. // everything else is direct equality.
  611. '0', '0', '-', '0', '0',
  612. 'T',
  613. '0', '0', ':', '0', '0', ':', '0', '0',
  614. 'Z',
  615. }
  616. for _, f := range formats {
  617. r := lx.next()
  618. if f == '0' {
  619. if !isDigit(r) {
  620. return lx.errorf("Expected digit in ISO8601 datetime, "+
  621. "but found %q instead.", r)
  622. }
  623. } else if f != r {
  624. return lx.errorf("Expected %q in ISO8601 datetime, "+
  625. "but found %q instead.", f, r)
  626. }
  627. }
  628. lx.emit(itemDatetime)
  629. return lx.pop()
  630. }
  631. // lexNumberStart consumes either an integer or a float. It assumes that
  632. // a negative sign has already been read, but that *no* digits have been
  633. // consumed. lexNumberStart will move to the appropriate integer or float
  634. // states.
  635. func lexNumberStart(lx *lexer) stateFn {
  636. // we MUST see a digit. Even floats have to start with a digit.
  637. r := lx.next()
  638. if !isDigit(r) {
  639. if r == '.' {
  640. return lx.errorf("Floats must start with a digit, not '.'.")
  641. } else {
  642. return lx.errorf("Expected a digit but got %q.", r)
  643. }
  644. }
  645. return lexNumber
  646. }
  647. // lexNumber consumes an integer or a float after seeing the first digit.
  648. func lexNumber(lx *lexer) stateFn {
  649. r := lx.next()
  650. switch {
  651. case isDigit(r):
  652. return lexNumber
  653. case r == '.':
  654. return lexFloatStart
  655. }
  656. lx.backup()
  657. lx.emit(itemInteger)
  658. return lx.pop()
  659. }
  660. // lexFloatStart starts the consumption of digits of a float after a '.'.
  661. // Namely, at least one digit is required.
  662. func lexFloatStart(lx *lexer) stateFn {
  663. r := lx.next()
  664. if !isDigit(r) {
  665. return lx.errorf("Floats must have a digit after the '.', but got "+
  666. "%q instead.", r)
  667. }
  668. return lexFloat
  669. }
  670. // lexFloat consumes the digits of a float after a '.'.
  671. // Assumes that one digit has been consumed after a '.' already.
  672. func lexFloat(lx *lexer) stateFn {
  673. r := lx.next()
  674. if isDigit(r) {
  675. return lexFloat
  676. }
  677. lx.backup()
  678. lx.emit(itemFloat)
  679. return lx.pop()
  680. }
  681. // lexConst consumes the s[1:] in s. It assumes that s[0] has already been
  682. // consumed.
  683. func lexConst(lx *lexer, s string) stateFn {
  684. for i := range s[1:] {
  685. if r := lx.next(); r != rune(s[i+1]) {
  686. return lx.errorf("Expected %q, but found %q instead.", s[:i+1],
  687. s[:i]+string(r))
  688. }
  689. }
  690. return nil
  691. }
  692. // lexTrue consumes the "rue" in "true". It assumes that 't' has already
  693. // been consumed.
  694. func lexTrue(lx *lexer) stateFn {
  695. if fn := lexConst(lx, "true"); fn != nil {
  696. return fn
  697. }
  698. lx.emit(itemBool)
  699. return lx.pop()
  700. }
  701. // lexFalse consumes the "alse" in "false". It assumes that 'f' has already
  702. // been consumed.
  703. func lexFalse(lx *lexer) stateFn {
  704. if fn := lexConst(lx, "false"); fn != nil {
  705. return fn
  706. }
  707. lx.emit(itemBool)
  708. return lx.pop()
  709. }
  710. // lexCommentStart begins the lexing of a comment. It will emit
  711. // itemCommentStart and consume no characters, passing control to lexComment.
  712. func lexCommentStart(lx *lexer) stateFn {
  713. lx.ignore()
  714. lx.emit(itemCommentStart)
  715. return lexComment
  716. }
  717. // lexComment lexes an entire comment. It assumes that '#' has been consumed.
  718. // It will consume *up to* the first new line character, and pass control
  719. // back to the last state on the stack.
  720. func lexComment(lx *lexer) stateFn {
  721. r := lx.peek()
  722. if isNL(r) || r == eof {
  723. lx.emit(itemText)
  724. return lx.pop()
  725. }
  726. lx.next()
  727. return lexComment
  728. }
  729. // lexSkip ignores all slurped input and moves on to the next state.
  730. func lexSkip(lx *lexer, nextState stateFn) stateFn {
  731. return func(lx *lexer) stateFn {
  732. lx.ignore()
  733. return nextState
  734. }
  735. }
  736. // isWhitespace returns true if `r` is a whitespace character according
  737. // to the spec.
  738. func isWhitespace(r rune) bool {
  739. return r == '\t' || r == ' '
  740. }
  741. func isNL(r rune) bool {
  742. return r == '\n' || r == '\r'
  743. }
  744. func isDigit(r rune) bool {
  745. return r >= '0' && r <= '9'
  746. }
  747. func isHexadecimal(r rune) bool {
  748. return (r >= '0' && r <= '9') ||
  749. (r >= 'a' && r <= 'f') ||
  750. (r >= 'A' && r <= 'F')
  751. }
  752. func isBareKeyChar(r rune) bool {
  753. return (r >= 'A' && r <= 'Z') ||
  754. (r >= 'a' && r <= 'z') ||
  755. (r >= '0' && r <= '9') ||
  756. r == '_' ||
  757. r == '-'
  758. }
  759. func (itype itemType) String() string {
  760. switch itype {
  761. case itemError:
  762. return "Error"
  763. case itemNIL:
  764. return "NIL"
  765. case itemEOF:
  766. return "EOF"
  767. case itemText:
  768. return "Text"
  769. case itemString:
  770. return "String"
  771. case itemRawString:
  772. return "String"
  773. case itemMultilineString:
  774. return "String"
  775. case itemRawMultilineString:
  776. return "String"
  777. case itemBool:
  778. return "Bool"
  779. case itemInteger:
  780. return "Integer"
  781. case itemFloat:
  782. return "Float"
  783. case itemDatetime:
  784. return "DateTime"
  785. case itemTableStart:
  786. return "TableStart"
  787. case itemTableEnd:
  788. return "TableEnd"
  789. case itemKeyStart:
  790. return "KeyStart"
  791. case itemArray:
  792. return "Array"
  793. case itemArrayEnd:
  794. return "ArrayEnd"
  795. case itemCommentStart:
  796. return "CommentStart"
  797. }
  798. panic(fmt.Sprintf("BUG: Unknown type '%d'.", int(itype)))
  799. }
  800. func (item item) String() string {
  801. return fmt.Sprintf("(%s, %s)", item.typ.String(), item.val)
  802. }