Procházet zdrojové kódy

improved parser code

Torkel Ödegaard před 12 roky
rodič
revize
2ded42e9fe

+ 7 - 119
src/app/services/graphite/lexer.js

@@ -130,22 +130,12 @@ define([
       i2 >= 48 && i2 <= 57;        // 0-9
   }
 
-  var Token = {
-    Identifier: "Identifier",
-    NumericLiteral: "NumericLiteral",
-    StringLiteral: "StringLiteral",
-    Punctuator: "Punctuator"
-  };
-
-
   function Lexer(expression) {
     this.input = expression;
     this.char = 1;
     this.from = 1;
   }
 
-  Lexer.Token = Token;
-
   Lexer.prototype = {
 
     peek: function (i) {
@@ -344,7 +334,7 @@ define([
 
       switch (id) {
       default:
-        type = Token.Identifier;
+        type = "identifier";
       }
 
       return {
@@ -415,7 +405,7 @@ define([
 
             if (value.length <= 2) { // 0x
               return {
-                type: Token.NumericLiteral,
+                type: 'number',
                 value: value,
                 isMalformed: true
               };
@@ -429,7 +419,7 @@ define([
             }
 
             return {
-              type: Token.NumericLiteral,
+              type: 'number',
               value: value,
               base: 16,
               isMalformed: false
@@ -465,7 +455,7 @@ define([
             }
 
             return {
-              type: Token.NumericLiteral,
+              type: 'number',
               value: value,
               base: 8,
               isMalformed: false
@@ -545,7 +535,7 @@ define([
       }
 
       return {
-        type: Token.NumericLiteral,
+        type: 'number',
         value: value,
         base: 10,
         isMalformed: !isFinite(value)
@@ -563,7 +553,7 @@ define([
       case "{":
       case "}":
         return {
-          type: Token.Punctuator,
+          type: ch1,
           value: ch1
         };
       }
@@ -594,93 +584,22 @@ define([
       var value = "";
       var startLine = this.line;
       var startChar = this.char;
-      var allowNewLine = false;
 
       this.skip();
 
       while (this.peek() !== quote) {
-        while (this.peek() === "") { // End Of Line
-
-          // If an EOL is not preceded by a backslash, show a warning
-          // and proceed like it was a legit multi-line string where
-          // author simply forgot to escape the newline symbol.
-          //
-          // Another approach is to implicitly close a string on EOL
-          // but it generates too many false positives.
-
-          if (!allowNewLine) {
-            this.trigger("warning", {
-              code: "W112",
-              line: this.line,
-              character: this.char
-            });
-          } else {
-            allowNewLine = false;
-
-            // Otherwise show a warning if multistr option was not set.
-            // For JSON, show warning no matter what.
-
-            this.triggerAsync("warning", {
-              code: "W043",
-              line: this.line,
-              character: this.char
-            }, checks, function () { return !state.option.multistr; });
-
-            this.triggerAsync("warning", {
-              code: "W042",
-              line: this.line,
-              character: this.char
-            }, checks, function () { return state.jsonMode && state.option.multistr; });
-          }
-
-          // If we get an EOF inside of an unclosed string, show an
-          // error and implicitly close it at the EOF point.
-
-          if (!this.nextLine()) {
-            this.trigger("error", {
-              code: "E029",
-              line: startLine,
-              character: startChar
-            });
-
-            return {
-              type: Token.StringLiteral,
-              value: value,
-              isUnclosed: true,
-              quote: quote
-            };
-          }
-        }
 
-        allowNewLine = false;
         var char = this.peek();
         var jump = 1; // A length of a jump, after we're done
                       // parsing this character.
 
-        if (char < " ") {
-          // Warn about a control character in a string.
-          this.trigger("warning", {
-            code: "W113",
-            line: this.line,
-            character: this.char,
-            data: [ "<non-printable>" ]
-          });
-        }
-
         // Special treatment for some escaped characters.
-
         if (char === "\\") {
           this.skip();
           char = this.peek();
 
           switch (char) {
           case "'":
-            this.triggerAsync("warning", {
-              code: "W114",
-              line: this.line,
-              character: this.char,
-              data: [ "\\'" ]
-            }, checks, function () {return state.jsonMode; });
             break;
           case "b":
             char = "\b";
@@ -699,41 +618,16 @@ define([
             break;
           case "0":
             char = "\0";
-
-            // Octal literals fail in strict mode.
-            // Check if the number is between 00 and 07.
-            var n = parseInt(this.peek(1), 10);
-            this.triggerAsync("warning", {
-              code: "W115",
-              line: this.line,
-              character: this.char
-            }, checks,
-            function () { return n >= 0 && n <= 7 && state.directive["use strict"]; });
             break;
           case "u":
             char = String.fromCharCode(parseInt(this.input.substr(1, 4), 16));
             jump = 5;
             break;
           case "v":
-            this.triggerAsync("warning", {
-              code: "W114",
-              line: this.line,
-              character: this.char,
-              data: [ "\\v" ]
-            }, checks, function () { return state.jsonMode; });
-
             char = "\v";
             break;
           case "x":
             var x = parseInt(this.input.substr(1, 2), 16);
-
-            this.triggerAsync("warning", {
-              code: "W114",
-              line: this.line,
-              character: this.char,
-              data: [ "\\x-" ]
-            }, checks, function () { return state.jsonMode; });
-
             char = String.fromCharCode(x);
             jump = 3;
             break;
@@ -742,7 +636,6 @@ define([
           case "/":
             break;
           case "":
-            allowNewLine = true;
             char = "";
             break;
           case "!":
@@ -753,11 +646,6 @@ define([
             /*falls through */
           default:
             // Weird escaping.
-            this.trigger("warning", {
-              code: "W044",
-              line: this.line,
-              character: this.char
-            });
           }
         }
 
@@ -767,7 +655,7 @@ define([
 
       this.skip();
       return {
-        type: Token.StringLiteral,
+        type: 'string',
         value: value,
         isUnclosed: false,
         quote: quote

+ 128 - 167
src/app/services/graphite/parser.js

@@ -20,186 +20,147 @@ define([
     this.lexer = new Lexer(expression);
     this.state = "start";
     this.error = null;
+    this.tokens = this.lexer.tokenize();
+    this.index = 0;
   }
 
   Parser.Nodes = NodeTypes;
 
   Parser.prototype = {
-      getAst: function () {
-        return this.parse('start');
-      },
 
-      isUnexpectedToken: function (expected, value) {
-        if (this.token === null) {
-          this.error = "Expected token: " + expected + " instead found end of string";
-          return true;
-        }
+    getAst: function () {
+      return this.start();
+    },
 
-        if (this.token.type === expected) {
-          return false;
-        }
+    start: function () {
+      return this.functionCall() || this.metricExpression();
+    },
 
-        if (value && this.token.value === value) {
-          return false;
-        }
+    metricExpression: function() {
+      if (!this.match('identifier')) {
+        return null;
+      }
+
+      var node = {
+        type: 'metric',
+        segments: [{
+          type: 'segment',
+          value: this.tokens[this.index].value
+        }]
+      }
+
+      this.index++;
+
+      if (this.match('.')) {
+        this.index++;
+        var rest = this.metricExpression();
+        node.segments = node.segments.concat(rest.segments)
+      }
+
+      return node;
+    },
+
+    matchToken: function(type, index) {
+      var token = this.tokens[this.index + index];
+      return (token === undefined && type === '') ||
+             token && token.type === type;
+    },
+
+    match: function(token1, token2) {
+      return this.matchToken(token1, 0) &&
+        (!token2 || this.matchToken(token2, 1))
+    },
+
+    functionCall: function() {
+      if (!this.match('identifier', '(')) {
+        return null;
+      }
+
+      var node = {
+        type: 'function',
+        name: this.tokens[this.index].value,
+      };
+
+      this.index += 2;
+
+      node.params = this.functionParameters();
+
+      if (!this.match(')')) {
+        this.error = 'missing closing paranthesis';
+        return null;
+      }
+
+      this.index++;
+
+      return node;
+    },
 
-        this.error = "Expected  token " + expected +
-            ' instead found token ' + this.token.type +
-            ' ("'  + this.token.value + '")' +
-            " at position: " + this.lexer.char;
+    functionParameters: function () {
+      if (this.match(')') || this.match('')) {
+        return [];
+      }
+
+      var param =
+        this.functionCall() ||
+        this.metricExpression() ||
+        this.numericLiteral() ||
+        this.stringLiteral();
+
+      if (!this.match(',')) {
+        return [param];
+      }
+
+      this.index++;
+      return [param].concat(this.functionParameters());
+    },
+
+    numericLiteral: function () {
+      if (!this.match('number')) {
+        return null;
+      }
+
+      this.index++;
+
+      return {
+        type: 'number',
+        value: this.tokens[this.index-1].value
+      };
+    },
 
+    stringLiteral: function () {
+      if (!this.match('string')) {
+        return null;
+      }
+
+      this.index++;
+
+      return {
+        type: 'string',
+        value: this.tokens[this.index-1].value
+      };
+    },
+
+    isUnexpectedToken: function (expected, value) {
+      if (this.token === null) {
+        this.error = "Expected token: " + expected + " instead found end of string";
         return true;
-      },
-
-      parse: function (state, allowParams) {
-        var node = { };
-
-        while(true) {
-          this.token = this.lexer.next();
-
-          switch(state) {
-          case "start":
-            if (allowParams) {
-              if (this.token === null) {
-                return null;
-              }
-
-              if (this.token.type === Lexer.Token.NumericLiteral) {
-                return {
-                  type: NodeTypes.NumericLiteral,
-                  value: parseInt(this.token.value)
-                };
-              }
-
-              if (this.token.type === Lexer.Token.StringLiteral) {
-                return {
-                  type: NodeTypes.StringLiteral,
-                  value: this.token.value
-                };
-              }
-            }
-
-            if (this.isUnexpectedToken(Lexer.Token.Identifier)) {
-              return;
-            }
-
-            state = "identifier";
-            this.prevToken = this.token;
-            break;
-
-          case "identifier":
-            if (this.token == null || (allowParams && this.token.value === ',')) {
-              return {
-                type: NodeTypes.MetricExpression,
-                segments: [{
-                    type: NodeTypes.MetricExpression,
-                    value: this.prevToken.value
-                }]
-              };
-            }
-
-            if (this.isUnexpectedToken(Lexer.Token.Punctuator)) {
-              return null;
-            }
-
-            if (this.token.value === '.') {
-              state = "metricNode";
-              node.type = NodeTypes.MetricExpression;
-              node.segments = [{
-                type: NodeTypes.MetricNode,
-                value: this.prevToken.value
-              }];
-
-              continue;
-            }
-
-            if (this.token.value === '(') {
-              node.type = NodeTypes.FunctionCall;
-              node.name = this.prevToken.value;
-              node.params = this.parseFunc();
-              return node;
-            }
-
-            if (this.token.value === ')') {
-              return node;
-            }
-
-            break;
-
-          case 'metricEnd':
-            if (this.token === null) {
-              return node;
-            }
-
-            if (this.isUnexpectedToken(Lexer.Token.Punctuator)) {
-              return null;
-            }
-
-            if (this.token.value === '.') {
-              state = 'metricNode';
-            }
-
-            if (allowParams && (this.token.value === ',' || this.token.value === ')')) {
-              return node;
-            }
-
-            break;
-          case 'metricNode':
-            if (this.isUnexpectedToken(Lexer.Token.Identifier)) {
-              return null;
-            }
-
-            node.segments.push({
-              type: NodeTypes.MetricNode,
-              value: this.token.value
-            });
-
-            state = 'metricEnd';
-            break;
-          default:
-            this.error = 'unknown token: ' + this.token.type;
-          }
-        }
-      },
-
-      parseFunc: function() {
-        var arguments = [];
-        var arg;
-
-        while(true) {
-
-          arg = this.parse('start', true);
-          if (arg === null) {
-            this.error = "expected function arguments";
-            return null;
-          }
-
-          arguments.push(arg);
-
-          if (this.token === null) {
-            this.error = "expected closing function at position: " + this.lexer.char;
-            return null;
-          }
-
-          if (this.token.value === ')') {
-            return arguments;
-          }
-
-          if (this.token.type === Lexer.Token.NumericLiteral ||
-              this.token.type === Lexer.Token.StringLiteral) {
-            this.token = this.lexer.next();
-          }
-
-          if (this.isUnexpectedToken(Lexer.Token.Punctuator, ',')) {
-            return null;
-          }
-
-          if (this.token.value === ')') {
-            return arguments;
-          }
-        }
+      }
 
+      if (this.token.type === expected) {
+        return false;
       }
+
+      if (value && this.token.value === value) {
+        return false;
+      }
+
+      this.error = "Expected  token " + expected +
+          ' instead found token ' + this.token.type +
+          ' ("'  + this.token.value + '")' +
+          " at position: " + this.lexer.char;
+
+      return true;
+    },
   };
 
   return Parser;

+ 7 - 7
src/test/specs/lexer-specs.js

@@ -9,22 +9,22 @@ define([
       var tokens = lexer.tokenize();
       expect(tokens[0].value).to.be('metric');
       expect(tokens[1].value).to.be('.');
-      expect(tokens[2].type).to.be(Lexer.Token.Identifier);
-      expect(tokens[3].type).to.be(Lexer.Token.Punctuator);
+      expect(tokens[2].type).to.be('identifier');
+      expect(tokens[4].type).to.be('identifier');
     });
 
     it('should tokenize functions and args', function() {
       var lexer = new Lexer("sum(metric.test, 12, 'test')");
       var tokens = lexer.tokenize();
       expect(tokens[0].value).to.be('sum');
-      expect(tokens[0].type).to.be(Lexer.Token.Identifier);
+      expect(tokens[0].type).to.be('identifier');
       expect(tokens[1].value).to.be('(');
-      expect(tokens[1].type).to.be(Lexer.Token.Punctuator);
-      expect(tokens[5].type).to.be(Lexer.Token.Punctuator);
+      expect(tokens[1].type).to.be('(');
+      expect(tokens[5].type).to.be(',');
       expect(tokens[5].value).to.be(',');
-      expect(tokens[6].type).to.be(Lexer.Token.NumericLiteral);
+      expect(tokens[6].type).to.be('number');
       expect(tokens[6].value).to.be('12');
-      expect(tokens[8].type).to.be(Lexer.Token.StringLiteral);
+      expect(tokens[8].type).to.be('string');
       expect(tokens[8].value).to.be('test');
       expect(tokens[tokens.length - 1].value).to.be(')');
     });

+ 13 - 13
src/test/specs/parser-specs.js

@@ -9,7 +9,7 @@ define([
       var rootNode = parser.getAst();
 
       expect(parser.error).to.be(null);
-      expect(rootNode.type).to.be(Parser.Nodes.MetricExpression);
+      expect(rootNode.type).to.be('metric');
       expect(rootNode.segments.length).to.be(5);
       expect(rootNode.segments[0].value).to.be('metric');
 
@@ -19,7 +19,7 @@ define([
       var parser = new Parser('sum(test)');
       var rootNode = parser.getAst();
       expect(parser.error).to.be(null);
-      expect(rootNode.type).to.be(Parser.Nodes.FunctionCall);
+      expect(rootNode.type).to.be('function');
       expect(rootNode.params.length).to.be(1);
     });
 
@@ -28,11 +28,11 @@ define([
       var rootNode = parser.getAst();
 
       expect(parser.error).to.be(null);
-      expect(rootNode.type).to.be(Parser.Nodes.FunctionCall);
+      expect(rootNode.type).to.be('function');
       expect(rootNode.params.length).to.be(3);
-      expect(rootNode.params[0].type).to.be(Parser.Nodes.MetricExpression);
-      expect(rootNode.params[1].type).to.be(Parser.Nodes.NumericLiteral);
-      expect(rootNode.params[2].type).to.be(Parser.Nodes.StringLiteral);
+      expect(rootNode.params[0].type).to.be('metric');
+      expect(rootNode.params[1].type).to.be('number');
+      expect(rootNode.params[2].type).to.be('string');
     });
 
     it('function with nested function', function() {
@@ -40,13 +40,13 @@ define([
       var rootNode = parser.getAst();
 
       expect(parser.error).to.be(null);
-      expect(rootNode.type).to.be(Parser.Nodes.FunctionCall);
+      expect(rootNode.type).to.be('function');
       expect(rootNode.params.length).to.be(1);
-      expect(rootNode.params[0].type).to.be(Parser.Nodes.FunctionCall);
+      expect(rootNode.params[0].type).to.be('function');
       expect(rootNode.params[0].name).to.be('scaleToSeconds');
       expect(rootNode.params[0].params.length).to.be(2);
-      expect(rootNode.params[0].params[0].type).to.be(Parser.Nodes.MetricExpression);
-      expect(rootNode.params[0].params[1].type).to.be(Parser.Nodes.NumericLiteral);
+      expect(rootNode.params[0].params[0].type).to.be('metric');
+      expect(rootNode.params[0].params[1].type).to.be('number');
     });
 
     it('function with multiple series', function() {
@@ -54,10 +54,10 @@ define([
       var rootNode = parser.getAst();
 
       expect(parser.error).to.be(null);
-      expect(rootNode.type).to.be(Parser.Nodes.FunctionCall);
+      expect(rootNode.type).to.be('function');
       expect(rootNode.params.length).to.be(2);
-      expect(rootNode.params[0].type).to.be(Parser.Nodes.MetricExpression);
-      expect(rootNode.params[1].type).to.be(Parser.Nodes.MetricExpression);
+      expect(rootNode.params[0].type).to.be('metric');
+      expect(rootNode.params[1].type).to.be('metric');
     });
 
   });