parser.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. import re, sys
  2. from .core import TomlError
  3. from .utils import rfc3339_re, parse_rfc3339_re
  4. if sys.version_info[0] == 2:
  5. _chr = unichr
  6. else:
  7. _chr = chr
  8. def load(fin, translate=lambda t, x, v: v, object_pairs_hook=dict):
  9. return loads(fin.read(), translate=translate, object_pairs_hook=object_pairs_hook, filename=getattr(fin, 'name', repr(fin)))
  10. def loads(s, filename='<string>', translate=lambda t, x, v: v, object_pairs_hook=dict):
  11. if isinstance(s, bytes):
  12. s = s.decode('utf-8')
  13. s = s.replace('\r\n', '\n')
  14. root = object_pairs_hook()
  15. tables = object_pairs_hook()
  16. scope = root
  17. src = _Source(s, filename=filename)
  18. ast = _p_toml(src, object_pairs_hook=object_pairs_hook)
  19. def error(msg):
  20. raise TomlError(msg, pos[0], pos[1], filename)
  21. def process_value(v, object_pairs_hook):
  22. kind, text, value, pos = v
  23. if kind == 'array':
  24. if value and any(k != value[0][0] for k, t, v, p in value[1:]):
  25. error('array-type-mismatch')
  26. value = [process_value(item, object_pairs_hook=object_pairs_hook) for item in value]
  27. elif kind == 'table':
  28. value = object_pairs_hook([(k, process_value(value[k], object_pairs_hook=object_pairs_hook)) for k in value])
  29. return translate(kind, text, value)
  30. for kind, value, pos in ast:
  31. if kind == 'kv':
  32. k, v = value
  33. if k in scope:
  34. error('duplicate_keys. Key "{0}" was used more than once.'.format(k))
  35. scope[k] = process_value(v, object_pairs_hook=object_pairs_hook)
  36. else:
  37. is_table_array = (kind == 'table_array')
  38. cur = tables
  39. for name in value[:-1]:
  40. if isinstance(cur.get(name), list):
  41. d, cur = cur[name][-1]
  42. else:
  43. d, cur = cur.setdefault(name, (None, object_pairs_hook()))
  44. scope = object_pairs_hook()
  45. name = value[-1]
  46. if name not in cur:
  47. if is_table_array:
  48. cur[name] = [(scope, object_pairs_hook())]
  49. else:
  50. cur[name] = (scope, object_pairs_hook())
  51. elif isinstance(cur[name], list):
  52. if not is_table_array:
  53. error('table_type_mismatch')
  54. cur[name].append((scope, object_pairs_hook()))
  55. else:
  56. if is_table_array:
  57. error('table_type_mismatch')
  58. old_scope, next_table = cur[name]
  59. if old_scope is not None:
  60. error('duplicate_tables')
  61. cur[name] = (scope, next_table)
  62. def merge_tables(scope, tables):
  63. if scope is None:
  64. scope = object_pairs_hook()
  65. for k in tables:
  66. if k in scope:
  67. error('key_table_conflict')
  68. v = tables[k]
  69. if isinstance(v, list):
  70. scope[k] = [merge_tables(sc, tbl) for sc, tbl in v]
  71. else:
  72. scope[k] = merge_tables(v[0], v[1])
  73. return scope
  74. return merge_tables(root, tables)
  75. class _Source:
  76. def __init__(self, s, filename=None):
  77. self.s = s
  78. self._pos = (1, 1)
  79. self._last = None
  80. self._filename = filename
  81. self.backtrack_stack = []
  82. def last(self):
  83. return self._last
  84. def pos(self):
  85. return self._pos
  86. def fail(self):
  87. return self._expect(None)
  88. def consume_dot(self):
  89. if self.s:
  90. self._last = self.s[0]
  91. self.s = self[1:]
  92. self._advance(self._last)
  93. return self._last
  94. return None
  95. def expect_dot(self):
  96. return self._expect(self.consume_dot())
  97. def consume_eof(self):
  98. if not self.s:
  99. self._last = ''
  100. return True
  101. return False
  102. def expect_eof(self):
  103. return self._expect(self.consume_eof())
  104. def consume(self, s):
  105. if self.s.startswith(s):
  106. self.s = self.s[len(s):]
  107. self._last = s
  108. self._advance(s)
  109. return True
  110. return False
  111. def expect(self, s):
  112. return self._expect(self.consume(s))
  113. def consume_re(self, re):
  114. m = re.match(self.s)
  115. if m:
  116. self.s = self.s[len(m.group(0)):]
  117. self._last = m
  118. self._advance(m.group(0))
  119. return m
  120. return None
  121. def expect_re(self, re):
  122. return self._expect(self.consume_re(re))
  123. def __enter__(self):
  124. self.backtrack_stack.append((self.s, self._pos))
  125. def __exit__(self, type, value, traceback):
  126. if type is None:
  127. self.backtrack_stack.pop()
  128. else:
  129. self.s, self._pos = self.backtrack_stack.pop()
  130. return type == TomlError
  131. def commit(self):
  132. self.backtrack_stack[-1] = (self.s, self._pos)
  133. def _expect(self, r):
  134. if not r:
  135. raise TomlError('msg', self._pos[0], self._pos[1], self._filename)
  136. return r
  137. def _advance(self, s):
  138. suffix_pos = s.rfind('\n')
  139. if suffix_pos == -1:
  140. self._pos = (self._pos[0], self._pos[1] + len(s))
  141. else:
  142. self._pos = (self._pos[0] + s.count('\n'), len(s) - suffix_pos)
  143. _ews_re = re.compile(r'(?:[ \t]|#[^\n]*\n|#[^\n]*\Z|\n)*')
  144. def _p_ews(s):
  145. s.expect_re(_ews_re)
  146. _ws_re = re.compile(r'[ \t]*')
  147. def _p_ws(s):
  148. s.expect_re(_ws_re)
  149. _escapes = { 'b': '\b', 'n': '\n', 'r': '\r', 't': '\t', '"': '"',
  150. '\\': '\\', 'f': '\f' }
  151. _basicstr_re = re.compile(r'[^"\\\000-\037]*')
  152. _short_uni_re = re.compile(r'u([0-9a-fA-F]{4})')
  153. _long_uni_re = re.compile(r'U([0-9a-fA-F]{8})')
  154. _escapes_re = re.compile(r'[btnfr\"\\]')
  155. _newline_esc_re = re.compile('\n[ \t\n]*')
  156. def _p_basicstr_content(s, content=_basicstr_re):
  157. res = []
  158. while True:
  159. res.append(s.expect_re(content).group(0))
  160. if not s.consume('\\'):
  161. break
  162. if s.consume_re(_newline_esc_re):
  163. pass
  164. elif s.consume_re(_short_uni_re) or s.consume_re(_long_uni_re):
  165. v = int(s.last().group(1), 16)
  166. if 0xd800 <= v < 0xe000:
  167. s.fail()
  168. res.append(_chr(v))
  169. else:
  170. s.expect_re(_escapes_re)
  171. res.append(_escapes[s.last().group(0)])
  172. return ''.join(res)
  173. _key_re = re.compile(r'[0-9a-zA-Z-_]+')
  174. def _p_key(s):
  175. with s:
  176. s.expect('"')
  177. r = _p_basicstr_content(s, _basicstr_re)
  178. s.expect('"')
  179. return r
  180. if s.consume('\''):
  181. if s.consume('\'\''):
  182. s.consume('\n')
  183. r = s.expect_re(_litstr_ml_re).group(0)
  184. s.expect('\'\'\'')
  185. else:
  186. r = s.expect_re(_litstr_re).group(0)
  187. s.expect('\'')
  188. return r
  189. return s.expect_re(_key_re).group(0)
  190. _float_re = re.compile(r'[+-]?(?:0|[1-9](?:_?\d)*)(?:\.\d(?:_?\d)*)?(?:[eE][+-]?(?:\d(?:_?\d)*))?')
  191. _basicstr_ml_re = re.compile(r'(?:""?(?!")|[^"\\\000-\011\013-\037])*')
  192. _litstr_re = re.compile(r"[^'\000\010\012-\037]*")
  193. _litstr_ml_re = re.compile(r"(?:(?:|'|'')(?:[^'\000-\010\013-\037]))*")
  194. def _p_value(s, object_pairs_hook):
  195. pos = s.pos()
  196. if s.consume('true'):
  197. return 'bool', s.last(), True, pos
  198. if s.consume('false'):
  199. return 'bool', s.last(), False, pos
  200. if s.consume('"'):
  201. if s.consume('""'):
  202. s.consume('\n')
  203. r = _p_basicstr_content(s, _basicstr_ml_re)
  204. s.expect('"""')
  205. else:
  206. r = _p_basicstr_content(s, _basicstr_re)
  207. s.expect('"')
  208. return 'str', r, r, pos
  209. if s.consume('\''):
  210. if s.consume('\'\''):
  211. s.consume('\n')
  212. r = s.expect_re(_litstr_ml_re).group(0)
  213. s.expect('\'\'\'')
  214. else:
  215. r = s.expect_re(_litstr_re).group(0)
  216. s.expect('\'')
  217. return 'str', r, r, pos
  218. if s.consume_re(rfc3339_re):
  219. m = s.last()
  220. return 'datetime', m.group(0), parse_rfc3339_re(m), pos
  221. if s.consume_re(_float_re):
  222. m = s.last().group(0)
  223. r = m.replace('_','')
  224. if '.' in m or 'e' in m or 'E' in m:
  225. return 'float', m, float(r), pos
  226. else:
  227. return 'int', m, int(r, 10), pos
  228. if s.consume('['):
  229. items = []
  230. with s:
  231. while True:
  232. _p_ews(s)
  233. items.append(_p_value(s, object_pairs_hook=object_pairs_hook))
  234. s.commit()
  235. _p_ews(s)
  236. s.expect(',')
  237. s.commit()
  238. _p_ews(s)
  239. s.expect(']')
  240. return 'array', None, items, pos
  241. if s.consume('{'):
  242. _p_ws(s)
  243. items = object_pairs_hook()
  244. if not s.consume('}'):
  245. k = _p_key(s)
  246. _p_ws(s)
  247. s.expect('=')
  248. _p_ws(s)
  249. items[k] = _p_value(s, object_pairs_hook=object_pairs_hook)
  250. _p_ws(s)
  251. while s.consume(','):
  252. _p_ws(s)
  253. k = _p_key(s)
  254. _p_ws(s)
  255. s.expect('=')
  256. _p_ws(s)
  257. items[k] = _p_value(s, object_pairs_hook=object_pairs_hook)
  258. _p_ws(s)
  259. s.expect('}')
  260. return 'table', None, items, pos
  261. s.fail()
  262. def _p_stmt(s, object_pairs_hook):
  263. pos = s.pos()
  264. if s.consume( '['):
  265. is_array = s.consume('[')
  266. _p_ws(s)
  267. keys = [_p_key(s)]
  268. _p_ws(s)
  269. while s.consume('.'):
  270. _p_ws(s)
  271. keys.append(_p_key(s))
  272. _p_ws(s)
  273. s.expect(']')
  274. if is_array:
  275. s.expect(']')
  276. return 'table_array' if is_array else 'table', keys, pos
  277. key = _p_key(s)
  278. _p_ws(s)
  279. s.expect('=')
  280. _p_ws(s)
  281. value = _p_value(s, object_pairs_hook=object_pairs_hook)
  282. return 'kv', (key, value), pos
  283. _stmtsep_re = re.compile(r'(?:[ \t]*(?:#[^\n]*)?\n)+[ \t]*')
  284. def _p_toml(s, object_pairs_hook):
  285. stmts = []
  286. _p_ews(s)
  287. with s:
  288. stmts.append(_p_stmt(s, object_pairs_hook=object_pairs_hook))
  289. while True:
  290. s.commit()
  291. s.expect_re(_stmtsep_re)
  292. stmts.append(_p_stmt(s, object_pairs_hook=object_pairs_hook))
  293. _p_ews(s)
  294. s.expect_eof()
  295. return stmts