{ /* ast.syn -- Create an Abstract Syntax Tree Copyright (c) 2002 Parsifal Software. All Rights Reserved. The grammar contained in this file describes the same language as the grammar in pll.syn. The differences can be categorized as follows: . Rules rewritten to make computation easier. In pll.syn all of the arithmetic operators have been factored out. Here the arithmetic operators have been substituted back into the rules for expressions to make computation more straightforward. . Rules rewritten to handle looping. The rules for loop constructs have been rewritten to handle repetitive parsing of loops. . Semantically determined rules (true, false) added to support parse time implementation of loops and if-else statements. The actual parser modules, ast.h and ast.cpp are created from ast.syn by the AnaGram parser generator using the Build Parser command. To build a demonstration program that illustrates the use of this parser in an interpreter that executes from an abstract syntax tree, compile and link as a single single project: astxi.cpp // Execution module demo.cpp // Demo program ast.cpp // This parser, creates abstract syntax tree astdefs.cpp // Node definitions comdefs.cpp // Common script definitions agclib1\src\ag*.cpp // Supporting class library Include files describing the class library are in agclib1\include To build a demonstration program that illustrates the use of this parser in an interpreter that compiles bytecode from an abstract syntax tree, compile and link as a single single project: astci.cpp // Execution module demo.cpp // Demo program ast.cpp // This parser, creates abstract syntax tree astdefs.cpp // Node definitions bcidefs.cpp // Bytecode and compiler definitions comdefs.cpp // Common script definitions agclib1\src\ag*.cpp // Supporting class library Include files describing the class library are in agclib1\include This grammar describes a language that is somewhat similar to Pascal. The language consists of Pascal like statements and expressions. The language also implements Fortran-style exponentiation and simple dump and print statements. The syntax below uses "open" and "closed" statements to eliminate the traditional "dangling else" or "if-else" ambiguity problem. See http://www.parsifalsoft.com/ifelse.html or doc/ifelse.htm for a more detailed discussion. Statement types supported are: assignment statements compound statements if/else statements while statements repeat/until statements for statements dump and print statements There are no declarations. Scalar values may be explicitly cast to (long) or (double). Both integer and floating point values are stored as doubles. Scripts may contain any number of statements. White space may be used freely, including both C and C++ style comments. For further information about this program or the AnaGram parser generator, please contact: Parsifal Software http://www.parsifalsoft.com info@parsifalsoft.com +1-800-879-2577, Voice/Fax +1-508-358-2564 P.O. Box 219 Wayland, Massachusetts 01778 USA */ #include "comdefs.h" #include #include "astdefs.h" // Defines node structure of abstract syntax tree #include // Forward reference for parxer control block struct parse_pcb_struct; struct Tree : public AbstractSyntaxTree { parse_pcb_struct *pcbPointer; FileLocation context() const; }; #define AST PCB.ast } /*** CONFIGURATION SECTION **************************************************/ [ // Grammar adjustments ~case sensitive disregard white space // Skip over white space (defined below) distinguish lexemes lexeme {integer, real, name, string element, character constant} distinguish keywords {'a-z' + 'A-Z'} wrapper {AgString, Value} // Special handling on parser stack wrapper {AgStack} parser name = parse // name parser function parser file name = "#.cpp" // # will be replaced by name of syntax file test file mask = "*.scf" // filter for File Trace no cr //omit carriage returns in output files, for *nix compatibility default token type = AstNode * // Operating modes pointer input // Take input from array in memory pointer type = const unsigned char * reentrant parser // Make parser reentrant context type = FileLocation // Put the following into the parser control block for use during parsing extend pcb { // Declare an abstract syntax tree Tree ast; int loopDepth; // Constructor for pcb parse_pcb_struct(const char *text); // Functions used during parsing void checkLoop(); void reportError(); } ] /*** GRAMMAR ****************************************************************/ /* "script" is marked with a $ to indicate it is the "grammar token", that is, the whole of the input we're intending to parse. */ (void) script $ -> statement list:sl, eof = AST.root = AST.node(typeScript, AST.listNode(typeStatementBlock, sl)); // Zero or more statements. Note that "statement list" is recursively defined. (AgStack) statement list -> =AgStack(); -> statement list:s, statement:x =s.push(x); /* A single statement can be "open" or "closed". An "open" statement is a statement that can legally be followed by an "else" keyword. A "closed" statement is one that cannot. "Open" and "closed" statements are a means for resolving a problem common to many programming languages known as the if-then-else ambiguity or "dangling else". See www.parsifalsoft.com/ifelse.htm for a discussion of the ambiguity and this technique for resolving it. */ statement -> open statement -> closed statement open statement -> if condition:x, statement:s =AST.node(typeIfStatement, x, s); -> if condition:x, closed statement:s1, "else", open statement:s2 = AST.node(typeIfElseStatement, x, s1, s2); -> WHILE, expression:x, "do", open statement:s = PCB.loopDepth--, AST.node(typeWhileStatement, x, s); -> FOR, lvalue:lv, ":=", expression:b, for increment:i, "to", expression:e, "do", open statement:s = AST.node(typeSimpleFor, lv, b, i, e, s); closed statement -> if condition:x, closed statement:s1, "else", closed statement:s2 = AST.node(typeIfElseStatement,x,s1,s2); -> WHILE, expression:x, "do", closed statement:s = AST.node(typeWhileStatement,x,s); -> FOR, lvalue:lv, ":=", expression:b, for increment:i, "to", expression:e, "do", closed statement:s = AST.node(typeSimpleFor, lv, b, i, e, s); -> simple statement:x =x; for increment -> =AST.constantNode(1); -> "by", expression:x =x; /* A "simple statement" is one that does not end with another statement. All simple statements are closed statements and are therefore factored out for clarity. */ simple statement -> REPEAT, statement list:s, UNTIL, expression:x, ';' = AST.node(typeRepeatStatement,AST.listNode(typeStatementBlock, s), x); -> lvalue:lv, ":=", expression:x, ';' =AST.assignmentNode(lv,SAP,x); -> ';' =AST.node(typeNull); -> compound statement -> "return", optional expression:x, ';' =AST.node(typeReturn, x); -> dump statement:ns, ';' =AST.listNode(typeDump, ns); -> print statement:ns, ';' =AST.listNode(typePrint, ns); (void) REPEAT -> "repeat" (void) UNTIL -> "until" (void) WHILE -> "while" (void) FOR -> "for" if condition -> "if", expression:x, "then" =x; compound statement -> "begin", statement list:s, "end" =AST.listNode(typeStatementBlock, s); // The dump statement accepts a comma delimited list of variable names (AgStack) dump statement -> "dump", name:k =AgStack().push(AST.variableNode(k)); -> dump statement:ns, ',', name:k =ns.push(AST.variableNode(k)); // The print statement accepts a comma delimited list of expressions (AgStack) print statement -> "print", expression:x =AgStack().push(x); -> print statement:s, ',', expression:x =s.push(x); optional expression -> =AST.node(typeNull); -> expression // General expression. expression -> simple expression -> simple expression:x, relational op:op, simple expression:y = AST.binaryNode(op,x,y); simple expression -> term -> simple expression:x, additive op:op, term:y =AST.binaryNode(op,x,y); term -> unary expression -> term:x, multiplicative op:op, unary expression:y =AST.binaryNode(op,x,y); unary expression -> factor // next higher precedence level -> '+', unary expression:x =x; -> unary op:op, unary expression:x =AST.unaryNode(op, x); /* Syntactically, we can use ** for exponentiation because we don't have pointers. (In C, ** could be confused with pointer indirection.) */ factor -> primary // next higher precedence level -> primary:x, "**", unary expression:y =AST.binaryNode(POW, x, y); /* Primary expression - bottom level of expression syntax. Variable references, constants, the builtin functions. Also, another expression in parentheses. (This is how you make parentheses work the way they're supposed to.) */ primary -> '(', expression:x, ')' =x; -> constant -> lvalue:x =AST.unaryNode(FETCH, x); -> function call -> '(', "long", ')', primary:x =AST.unaryNode(CAST_LONG, x); -> '(', "double", ')', primary:x =AST.unaryNode(CAST_DOUBLE, x); lvalue -> name:n =AST.variableNode(n); function call -> name:k, '(', optional arg list:args, ')' =AST.functionCallNode(k, args); (AgStack) optional arg list -> =AgStack(); -> arg list (AgStack) arg list -> expression:x =AgStack().push(x); -> arg list:al, ',', expression:x =al.push(x); constant -> integer:x =AST.constantNode(x); -> real:x =AST.constantNode(x); -> string:x =AST.constantNode(x); -> character constant:x =AST.constantNode(x); // Operator definitions (Opcode) relational op -> '=' =EQ; -> "<>" =NE; -> '<' =LT; -> "<=" =LE; -> '>' =GT; -> ">=" =GE; (Opcode) additive op -> '+' =ADD; -> '-' =SUB; -> "or" =IOR; -> "xor" =XOR; (Opcode) multiplicative op -> '*' =MUL; -> '/' =RDIV; -> "div" =IDIV; -> "mod" =MOD; -> "and" =AND; -> "shl" =LS; -> "shr" =RS; (Opcode) unary op -> '-' =NEG; -> "not" =NOT; /*** LEXICAL UNITS **********************************************************/ digit = '0-9' eof = 0 // string null terminator letter = 'a-z' + 'A-Z' + '_' space = ' ' + '\t' + '\f' + '\v' + '\r' + '\n' // blank, tab, etc. (void) white space -> space -> "/*", ~eof?..., "*/" // C style comment -> "//", ~(eof+'\n')?..., '\n' // C++ style comment /* Identifying variable names Characters in a name are accumulated in an AgString structure. */ (AgString) name -> letter:c =AgString().append(tolower(c)); -> name:ns, letter+digit:c =ns.append(tolower(c)); // Parsing and evaluating numeric constants (double) real -> simple real -> simple real:x, 'e'+'E', signed exponent:e =x*pow(10,e); -> integer part:x, 'e'+'E', signed exponent:e =x*pow(10,e); (double) simple real -> integer part:i, '.', fraction part:f =i+f; -> integer part:i, '.' =i; -> '.', fraction part:f =f; (double) integer part -> decimal integer:x =x; -> hybrid integer:x =x; -> octal integer:x =makeDecimal(x); (long) signed exponent -> '+'?, exponent:x =(long)x; -> '-', exponent:x =-(long)x; (double) fraction part -> digit:d =(d-'0')/10.0; -> digit:d, fraction part:f =(d-'0' + f)/10.0; (long) integer -> decimal integer -> octal integer -> hex integer (long) decimal integer -> '1-9':d =d-'0'; -> decimal integer:x, digit:d =10*x + d-'0'; (long) exponent -> '0-9':d =d-'0'; -> exponent:x, digit:d =10*x + d-'0'; (long) hybrid integer -> octal integer:x, '8-9':d =10*makeDecimal(x) + d - '0'; -> hybrid integer:x, digit:d =10*x + d-'0'; (long) octal integer -> '0' =0; -> octal integer:n, '0-7':d =8*n + d - '0'; (long) hex integer -> "0x" =0; -> hex integer:n, hex digit:d =16*n + d; (int) hex digit -> '0-9':x =x - '0'; -> 'a-f' + 'A-F':d =(d & 7) + 9; // string constant (Value) string -> string element -> string:s, string element:e =s+=e; (Value) string element -> '"', s char sequence:b, '"' =Value(b); (AgString) s char sequence -> =AgString(); -> s char sequence:s, s char:c =s.append(c); (int) s char -> ~(eof + '"' + '\n' + '\\') -> escape sequence (int) escape sequence -> simple escape sequence -> octal escape sequence -> hexadecimal escape sequence (int) simple escape sequence -> "\\'" ='\''; -> "\\\"" ='"'; -> "\\?" = '\?'; -> "\\\\" ='\\'; -> "\\a" ='\a'; -> "\\b" ='\b'; -> "\\f" ='\f'; -> "\\n" ='\n'; -> "\\r" ='\r'; -> "\\t" ='\t'; -> "\\v" ='\v'; (int) octal escape sequence -> one octal | two octal | three octal (int) one octal -> '\\', '0-7':d =d-'0'; (int) two octal -> one octal:n, '0-7':d =8*n + d-'0'; (int) three octal -> two octal:n, '0-7':d =8*n + d-'0'; (int) hexadecimal escape sequence -> "\\x", hexadecimal digit:d =d; -> hexadecimal escape sequence:n, hexadecimal digit:d =16*n + d; (int) hexadecimal digit -> '0-9':d =d-'0'; -> 'A-F' + 'a-f':d =9 + (d & 7); [ sticky {one octal, two octal, hexadecimal escape sequence} ] // character constant (int) character constant -> '\'', c char:c, '\'' =c; (int) c char -> ~(eof + '\'' + '\n' + '\\') -> escape sequence /*** SUPPORT CODE ***********************************************************/ { // Begin embedded C++ // Don't use default error handling. #define SYNTAX_ERROR PCB.reportError() #define GET_CONTEXT CONTEXT = FileLocation(PCB.pointer, PCB.line, PCB.column) FileLocation Tree::context() const { return PCONTEXT(*pcbPointer); } // Function definitions for parser control block // Constructor parse_pcb_struct::parse_pcb_struct(const char *text) : pointer((const unsigned char *) text), loopDepth(0) { ast.pcbPointer = this; } void parse_pcb_struct::checkLoop() { if (loopDepth) return; char buf[100]; FileLocation &context = PCONTEXT(*this); sprintf(buf, "Error(%d,%d): No loop active", context.line, context.column); throw ErrorDiagnostic(buf); } // Record syntax error void parse_pcb_struct::reportError() { ag_delete_wrappers(this); char buf[100]; sprintf(buf, "Error(%d,%d): %s", line, column, error_message); throw ErrorDiagnostic(buf); } // External Interface AbstractSyntaxTree buildTree(const char *text) { parse_pcb_type pcb(text); parse(&pcb); return pcb.ast; } } // End of embedded C++ /*** End of syntax file *****************************************************/