{ /* dxi.syn -- Direct Execution Script Interpreter Copyright (c) 2002 Parsifal Software. All Rights Reserved. The grammar contained in this file describes the same language as the grammar in pll.syn. The differences can be categorized as follows: . Rules rewritten to make computation easier. In pll.syn all of the arithmetic operators have been factored out. Here the arithmetic operators have been substituted back into the rules for expressions to make computation more straightforward. . Rules rewritten to handle looping. The rules for loop constructs have been rewritten to handle repetitive parsing of loops. . Semantically determined rules (true, false) added to support parse time implementation of loops and if-else statements. The actual parser modules, dxi.h and dxi.cpp are created from dxi.syn by the AnaGram parser generator using the Build Parser command. To build a demonstration program for this parser, compile and link as a single project: demo.cpp // Demo program comdefs.cpp // Common script definitions dxi.cpp // Direct execution parser agclib1.lib // Supporting class library Include files describing the class library are in agclib1\include This grammar describes a language that is somewhat similar to Pascal. The language consists of Pascal like statements and expressions. The language also implements Fortran-style exponentiation and simple dump and print statements. The syntax below uses "open" and "closed" statements to eliminate the traditional "dangling else" or "if-else" ambiguity problem. See http://www.parsifalsoft.com/ifelse.html or doc/ifelse.htm for a more detailed discussion. Statement types supported are: assignment statements compound statements if/else statements while statements repeat/until statements for statements dump and print statements There are no declarations. Scalar values may be explicitly cast to (long) or (double). Both integer and floating point values are stored as doubles. Scripts may contain any number of statements. White space may be used freely, including both C and C++ style comments. For further information about this program or the AnaGram parser generator, please contact: Parsifal Software http://www.parsifalsoft.com info@parsifalsoft.com +1-800-879-2577, Voice/Fax +1-508-358-2564 P.O. Box 219 Wayland, Massachusetts 01778 USA */ #include "comdefs.h" // Contains definitions of support classes #include #include // to declare tolower() #include // Used by dump and print statements /* It is sometimes necessary to skip over code without executing it. The Mode enumeration specifies the various execution modes. */ enum Mode { executeMode, // executing continueMode, // continue statement encountered, skip to end of loop breakMode, // break statement encountered, skip to end of loop elseMode, // Skipping to else statement blockMode, // skipping a block of code scanMode // scanning code to determine loop parameters }; /* The parser uses "pointer input". The ParseContext struct is used to store the value of the pointer and the execution mode at critical locations in the script, such as at the head of a loop, at the exit point of a loop, or at the increment and condition fields of a for loop. Since ParseContext inherits from FileLocation, it also stores line and column number. In the configuration section of the parser, the "context type" is specified as ParseContext. The GET_CONTEXT macro constructs a ParseContext object to store on the context stack. dxi_pcb_struct is the "parser control block". The ParseContext constructor retrieves the necessary information from the pcb. */ struct dxi_pcb_struct; // Because of forward reference in constructor struct ParseContext : public FileLocation { Mode mode; ParseContext() : FileLocation(), mode(executeMode) {} ParseContext(const char *key) : FileLocation((const unsigned char *) key), mode(executeMode) {} ParseContext(dxi_pcb_struct &); }; // struct to describe a for loop struct ForControl { Value lvalue; long begin; long increment; long end; ForControl() : lvalue(), begin(0), increment(1), end(0) {} ForControl(const Value &lv, const Value &b, const Value &i, const Value &e) : lvalue(lv), begin(b.getLong()), increment(i.getLong()), end(e.getLong()) { // Nothing to do } }; /* The VALUE macro controls the execution of expressions in accordance with the current execution mode. */ #define VALUE(x) PCB.mode == executeMode ? Value(x) : Value() } /*** CONFIGURATION SECTION **************************************************/ [ // Grammar adjustments case sensitive = OFF // not case sensitive disregard white space // Skip over white space (defined below) distinguish lexemes lexeme {integer, real, name, string element, character constant} lexeme {SCRIPT, WHILE LOOP, REPEAT LOOP, STATEMENT} distinguish keywords {'a-z' + 'A-Z'} wrapper {AgString, Value} // Special handling on parser stack wrapper {AgStack} wrapper {ForControl} context type = ParseContext // track file location parser name = dxi // Name parser function parser file name = "#.cpp" // # will be replaced by name of syntax file test file mask = "*.scf" // filter for File Trace no cr //omit carriage returns in output files, for *nix compatibility // Operating modes pointer input // Take input from array in memory pointer type = const unsigned char * reentrant parser // Make parser reentrant // Add the following to the parser control block for use during parsing extend pcb { Dataset &dataset; // data values Mode mode; // Skipping or executing FileLocation startLocation; // starting location Value returnValue; //return value // Constructor for parser control block dxi_pcb_struct(Dataset &d); // Functions used during parsing void parseSyntax(const char *select, const FileLocation &start); void setStart(); void simpleLoop(const char *selector); ForControl forControl(const Value &, const Value &, const Value &, const Value &); void forLoop(ForControl &); ParseContext location(); // Capture current location of parse void setLocation(const FileLocation &l); // Set location of parse void setReturnValue(const Value &v); // Mode changing functions int changeMode(Mode currentMode, Mode desiredMode); void restoreMode(Mode current); void restoreMode(); void dump(const AgString &) const; // dump variable name and value void print(const Value &) const; void reportError(); // Record error message and location void reportError(const char *); // Record error message and location } ] /*** GRAMMAR ****************************************************************/ /* "syntax selection" is marked with a $ to indicate it is the "grammar token", that is, the whole of the input we're intending to parse. Each production belonging to syntax selection begins with a "selector" which determines how the specific input is to be parsed. */ (Value) syntax selection $ -> SCRIPT, statement list, eof =Value(); -> WHILE LOOP, loop condition, "do", statement =Value(); -> REPEAT LOOP, statement list, UNTIL, until condition, ';' =Value(); -> STATEMENT, statement =Value(); // Selector definitions SCRIPT -> "script" =PCB.setStart(); WHILE LOOP -> "whileLoop" =PCB.setStart(); REPEAT LOOP -> "repeatLoop" =PCB.setStart(); STATEMENT -> "statement" =PCB.setStart(); // Zero or more statements. Note that "statement list" is recursively defined. statement list -> -> statement list, statement /* A single statement can be "open" or "closed". An "open" statement is a statement that can legally be followed by an "else" keyword. A "closed" statement is one that cannot. "Open" and "closed" statements are a means for resolving a problem common to many programming languages known as the if-then-else ambiguity or "dangling else". See www.parsifalsoft.com/ifelse.htm for a discussion of the ambiguity and this technique for resolving it. */ statement -> open statement -> closed statement open statement -> if condition, statement =PCB.restoreMode(elseMode); -> if condition, closed statement, ELSE, open statement = PCB.restoreMode(blockMode); closed statement -> if condition, closed statement, ELSE, closed statement = PCB.restoreMode(blockMode); -> simple statement /* A "simple statement" is one that does not end with another statement. All simple statements are closed statements and are therefore factored out for clarity. Although in the prototype grammar, pll.syn, while and for statements are carefully factored into open and closed statements, this is not necesary in this parser, since the statements that comprise the body of the loops are invisible to the script syntax. */ simple statement -> REPEAT =PCB.simpleLoop("repeatLoop"); -> WHILE =PCB.simpleLoop("whileLoop"); -> for initialization:fc, "do" =PCB.forLoop(fc); -> assignment statement, ';' -> ';' -> compound statement -> "return", optional expression:x, ';' =PCB.setReturnValue(x); -> dump statement, ';' -> print statement, ';' REPEAT -> "repeat" UNTIL -> "until" =PCB.changeMode(continueMode, executeMode); WHILE -> "while" FOR -> "for" // Support for loop execution loop condition -> expression:x = {if (PCB.mode != executeMode || x.isFalse()) PCB.mode = breakMode;} until condition -> expression:x = {if (PCB.mode != executeMode || x.isTrue()) PCB.mode = breakMode;} (ForControl) for initialization -> FOR, lvalue:lv, ":=", expression:b, for increment:i, "to", expression:e =PCB.forControl(lv, b, i, e); (Value) for increment -> =VALUE(1); -> "by", expression:x =x; // if and if-else statements if condition -> "if", expression:x, "then" = {if (PCB.mode == executeMode && x.isFalse()) PCB.mode = elseMode;} ELSE -> "else" = PCB.changeMode(executeMode, blockMode),PCB.changeMode(elseMode, executeMode); assignment statement -> lvalue:lv, ":=", expression:x =VALUE(lv = x); compound statement -> "begin", statement list, "end" // The dump statement accepts a comma delimited list of variable names dump statement -> "dump", name:n =PCB.dump(n); -> dump statement, ',', name:n =PCB.dump(n); // The print statement accepts a comma delimited list of expressions print statement -> "print", expression:x =PCB.print(x); -> print statement, ',', expression:x =PCB.print(x); (Value) optional expression -> =Value(); -> expression // General expression. As in C, comma expression has the lowest precedence. (Value) expression -> simple expression -> simple expression:x, '<', simple expression:y =VALUE(x < y); -> simple expression:x, "<=", simple expression:y =VALUE(x <= y); -> simple expression:x, '>', simple expression:y =VALUE(x > y); -> simple expression:x, ">=", simple expression:y =VALUE(x >= y); -> simple expression:x, '=', simple expression:y =VALUE(x == y); -> simple expression:x, "<>", simple expression:y =VALUE(x != y); (Value) simple expression -> term -> simple expression:x, '+', term:y =VALUE(x + y); -> simple expression:x, '-', term:y =VALUE(x - y); -> simple expression:x, "or", term:y =VALUE(x | y); -> simple expression:x, "xor", term:y =VALUE(x ^ y); (Value) term -> unary expression -> term:x, '*', unary expression:y =VALUE(x * y); -> term:x, '/', unary expression:y =VALUE(x.rdiv(y)); -> term:x, "div", unary expression:y =VALUE(x.idiv(y)); -> term:x, "mod", unary expression:y =VALUE(x % y); -> term:x, "and", unary expression:y =VALUE(x & y); -> term:x, "shl", unary expression:y =VALUE(x << y); -> term:x, "shr", unary expression:y =VALUE(x >> y); (Value) unary expression -> factor // next higher precedence level -> '+', unary expression:x =VALUE(x); -> '-', unary expression:x =VALUE(-x); -> "NOT", unary expression:x =VALUE(!x); /* Syntactically, we can use ** for exponentiation because we don't have pointers. (In C, ** could be confused with pointer indirection.) */ (Value) factor -> primary // next higher precedence level -> primary:x, "**", unary expression:y =VALUE(pow(x, y)); /* Primary expression - bottom level of expression syntax. Variable references, constants, the builtin functions. Also, another expression in parentheses. (This is how you make parentheses work the way they're supposed to.) */ (Value) primary -> '(', expression:x, ')' =x; -> constant -> lvalue:lv =VALUE(lv.deref()); -> function call -> '(', "long", ')', primary:x =VALUE(x.makeInteger()); -> '(', "double", ')', primary:x =VALUE(x.makeReal()); (Value) lvalue -> name:n =VALUE(&PCB.dataset.value(n)); (Value) function call -> name:n, '(', optional arg list:args, ')' =VALUE(callFunction(n, args)); (AgStack) optional arg list -> =AgStack(); -> arg list (AgStack) arg list -> expression:x =AgStack().push(x); -> arg list:stack, ',', expression:x =stack.push(x); (Value) constant -> integer:x =VALUE(x); -> real:x =VALUE(x); -> string -> character constant:x =VALUE(x); /*** LEXICAL UNITS **********************************************************/ digit = '0-9' eof = 0 // string null terminator letter = 'a-z' + 'A-Z' + '_' space = ' ' + '\t' + '\f' + '\v' + '\r' + '\n' // blank, tab, etc. (void) white space -> space -> "/*", ~eof?..., "*/" // C style comment -> "//", ~(eof+'\n')?..., '\n' // C++ style comment /* Identifying variable names Characters in a name are accumulated in an AgString structure. */ (AgString) name -> letter:c =AgString().append(tolower(c)); -> name:ns, letter+digit:c =ns.append(tolower(c)); // Parsing and evaluating numeric constants (double) real -> simple real -> simple real:x, 'e'+'E', signed exponent:e =x*pow(10,e); -> integer part:x, 'e'+'E', signed exponent:e =x*pow(10,e); (double) simple real -> integer part:i, '.', fraction part:f =i+f; -> integer part:i, '.' =i; -> '.', fraction part:f =f; (double) integer part -> decimal integer:x =x; -> hybrid integer:x =x; -> octal integer:x =makeDecimal(x); (long) signed exponent -> '+'?, exponent:x =(long)x; -> '-', exponent:x =-(long)x; (double) fraction part -> digit:d =(d-'0')/10.0; -> digit:d, fraction part:f =(d-'0' + f)/10.0; (long) integer -> decimal integer -> octal integer -> hex integer (long) decimal integer -> '1-9':d =d-'0'; -> decimal integer:x, digit:d =10*x + d-'0'; (long) exponent -> '0-9':d =d-'0'; -> exponent:x, digit:d =10*x + d-'0'; (long) hybrid integer -> octal integer:x, '8-9':d =10*makeDecimal(x) + d - '0'; -> hybrid integer:x, digit:d =10*x + d-'0'; (long) octal integer -> '0' =0; -> octal integer:n, '0-7':d =8*n + d - '0'; (long) hex integer -> "0x" =0; -> hex integer:n, hex digit:d =16*n + d; (int) hex digit -> '0-9':x =x - '0'; -> 'a-f' + 'A-F':d =(d & 7) + 9; // string constant (Value) string -> string element -> string:s, string element:e =VALUE(s+=e); (Value) string element -> '"', s char sequence:b, '"' =VALUE(b); (AgString) s char sequence -> =AgString(); -> s char sequence:s, s char:c =s.append(c); (int) s char -> ~(eof + '"' + '\n' + '\\') -> escape sequence (int) escape sequence -> simple escape sequence -> octal escape sequence -> hexadecimal escape sequence (int) simple escape sequence -> "\\'" ='\''; -> "\\\"" ='"'; -> "\\?" = '\?'; -> "\\\\" ='\\'; -> "\\a" ='\a'; -> "\\b" ='\b'; -> "\\f" ='\f'; -> "\\n" ='\n'; -> "\\r" ='\r'; -> "\\t" ='\t'; -> "\\v" ='\v'; (int) octal escape sequence -> one octal | two octal | three octal (int) one octal -> '\\', '0-7':d =d-'0'; (int) two octal -> one octal:n, '0-7':d =8*n + d-'0'; (int) three octal -> two octal:n, '0-7':d =8*n + d-'0'; (int) hexadecimal escape sequence -> "\\x", hexadecimal digit:d =d; -> hexadecimal escape sequence:n, hexadecimal digit:d =16*n + d; (int) hexadecimal digit -> '0-9':d =d-'0'; -> 'A-F' + 'a-f':d =9 + (d & 7); [ sticky {one octal, two octal, hexadecimal escape sequence} ] // character constant (int) character constant -> '\'', c char:c, '\'' =c; (int) c char -> ~(eof + '\'' + '\n' + '\\') -> escape sequence /*** SUPPORT CODE ***********************************************************/ { // Begin embedded C++ // Don't use default error handling. #define SYNTAX_ERROR PCB.reportError() // implement context tracking #define GET_CONTEXT CONTEXT = PCB.location() ParseContext::ParseContext(dxi_pcb_struct &pcb) : FileLocation(pcb.pointer, pcb.line, pcb.column), mode(pcb.mode) { /* empty */ } // constructor for parser control block dxi_pcb_struct::dxi_pcb_struct(Dataset &d) : dataset(d), mode(executeMode), returnValue() { /* empty */ } void dxi_pcb_struct::parseSyntax (const char *selector, const FileLocation &s) { pointer = (const unsigned char *) selector; startLocation = s; try { dxi(this); } catch (ErrorMessage e) { reportError(e.message()); } } void dxi_pcb_type::setStart() { setLocation(startLocation); } void dxi_pcb_struct::simpleLoop(const char *selector) { ParseContext loopLocation(*this); dxi_pcb_struct loopPCB(dataset); loopPCB.mode = mode == executeMode ? executeMode : breakMode; // Set mode for (;;) { loopPCB.parseSyntax(selector, loopLocation); setLocation(ParseContext(loopPCB)); //Transfer location to parent if (loopPCB.mode == breakMode) return; loopPCB.mode = executeMode; // mode could have been set to continueMode } } ForControl dxi_pcb_struct::forControl(const Value &lv, const Value &b, const Value &i, const Value &e) { if (mode == executeMode) return ForControl(lv, b, i, e); return ForControl(); } void dxi_pcb_struct::forLoop(ForControl &fc) { restoreMode(); // mode was set to scanMode to pick up loop header info ParseContext statementLocation(*this); // Capture statement location dxi_pcb_struct loopPCB(dataset); if (fc.increment == 0) reportError("Loop increment is zero"); // Calculate number of times through the loop int count = (fc.end + fc.increment - fc.begin)/fc.increment; int index = fc.begin; // Set mode for parsing loop loopPCB.mode = (mode == executeMode && count > 0) ? executeMode : breakMode; if (loopPCB.mode == executeMode) fc.lvalue = index; while (count--) { loopPCB.parseSyntax("statement", statementLocation); setLocation(ParseContext(loopPCB)); // Set location in parent parser index += fc.increment; if (loopPCB.mode == executeMode) fc.lvalue = index; if (loopPCB.mode == breakMode) return; } } ParseContext dxi_pcb_struct::location() { return ParseContext(*this); } // Set parse location in source text void dxi_pcb_struct::setLocation(const FileLocation &l) { pointer = l.pointer; line = l.line; column = l.column; } // Set return value void dxi_pcb_struct::setReturnValue(const Value &v) { if (mode != executeMode) return; // exit setReturnValue returnValue = v; exit_flag = AG_SUCCESS_CODE; // Exit parser } // if mode == currentMode change mode to desiredMode and return true. // Otherwise, leave mode unchanged and return false. int dxi_pcb_struct::changeMode(Mode currentMode, Mode desiredMode) { if (mode != currentMode) return 0; mode = desiredMode; return 1; } // if mode == currentMode, restore the mode in effect at the beginning of the // rule. Otherwise, leave the mode unchanged. void dxi_pcb_struct::restoreMode(Mode currentMode) { if (mode == currentMode) mode = PCONTEXT(*this).mode; } // Unconditionally restore the mode in effect at the beginning of the rule. void dxi_pcb_struct::restoreMode() { mode = PCONTEXT(*this).mode; } // dump variable name and value void dxi_pcb_struct::dump(const AgString &n) const { if (mode != executeMode) return; cout << (const char *) n << " = " << (const char *) dataset.value(n).asLiteral() << "\n"; } // print value void dxi_pcb_struct::print(const Value &value) const { if (mode != executeMode) return; cout << (const char *) value.asString(); } // Record syntax error void dxi_pcb_struct::reportError() { ag_delete_wrappers(this); char buf[100]; sprintf(buf, "Error(%d,%d): %s", line, column, error_message); throw ErrorDiagnostic(buf); } void dxi_pcb_struct::reportError(const char *msg) { ag_delete_wrappers(this); char buf[100]; ParseContext &context = PCONTEXT(*this); sprintf(buf, "Error(%d,%d): %s", context.line, context.column, msg); throw ErrorDiagnostic(buf); } Value interpret(const char *text, Dataset &d) { dxi_pcb_struct pcb(d); pcb.parseSyntax("script", FileLocation((const unsigned char *)text)); return pcb.returnValue; } } // End of embedded C++ /*** End of syntax file *****************************************************/