{ /* dci.syn -- Direct Compilation of Bytecode Copyright (c) 2002 Parsifal Software. All Rights Reserved. This version of dxi.syn implements CLL, a C-like language. The grammar contained in this file describes the same language as the grammar in cll.syn. Reduction procedures have been added to enable it to compile bytecode as it parses. Changes in the syntax have been minimal. dci.syn is compiled with the AnaGram parser generator, which creates dci.h and dci.cpp. To build a demonstration program for this parser, compile and link as a single project: dci.cpp // This parser, compiles bytecode demo.cpp // Demo program bcidefs.cpp // Bytecode and compiler definitions comdefs.cpp // Common script definitions agclib1.lib // Supporting class library Include files describing the class library are in agclib1\include This grammar describes a language that is similar to C. The language consists of C statements (not including declarations or the switch statement) and C expressions (not including pointers or subscript operators). The language also implements Fortran-style exponentiation and simple dump and print statements. The syntax below uses "open" and "closed" statements to eliminate the traditional "dangling else" or "if-else" ambiguity problem. See http://www.parsifalsoft.com/ifelse.html or doc/ifelse.htm for a more detailed discussion. Statement types supported are: expression statements compound statements if/else statements while statements do/while statements for statements break and continue statements dump and print statements There are no declarations. Scalar values may be explicitly cast to (long) or (double). Both integer and floating point values are stored as doubles. Scripts may contain any number of statements. White space may be used freely, including both C and C++ style comments. For further information about this program or the AnaGram parser generator, please contact: Parsifal Software http://www.parsifalsoft.com info@parsifalsoft.com +1-800-879-2577, Voice/Fax +1-508-358-2564 P.O. Box 219 Wayland, Massachusetts 01778 USA */ #include "comdefs.h" // Contains definitions of support classes #include #include "bcidefs.h" // Bytecode interpreter definitions } /*** CONFIGURATION SECTION **************************************************/ [ // Grammar adjustments disregard white space // Skip over white space (defined below) distinguish lexemes lexeme {integer, real, name, string element, character constant} distinguish keywords {'a-z' + 'A-Z'} wrapper {AgString, Value} // Special handling on parser stack wrapper {CodeFragment, AgStack, Constant} context type = FileLocation // track file location parser name = dci // name parser function parser file name = "#.cpp" // # will be replaced by name of syntax file test file mask = "*.scf" // filter for File Trace no cr //omit carriage returns in output files, for *nix compatibility // Operating modes pointer input // Take input from array in memory pointer type = const unsigned char * reentrant parser // Make parser reentrant // Put the following into the parser control block for use during parsing extend pcb { // Maps symbol names to variables AgDictionary &dictionary; // symbol table // table of constants AgDictionary constants; int loopDepth; // Constructor for pcb dci_pcb_struct(AgDictionary &d, const char *text); // Functions used during parsing void reportError(); void reportError(const char *msg); void checkLoop(); int idName(const AgString &); CodeFragment code(Opcode, const AgString &); CodeFragment code(Opcode, const Constant &); CodeFragment codeCall(const AgString &, AgStack &); } ] /*** GRAMMAR ****************************************************************/ /* "script" is marked with a $ to indicate it is the "grammar token", that is, the whole of the input we're intending to parse. Append HLT (halt) to the bytecode we've got so far, so it stops when it's run. */ (CodeFragment) script $ -> statement list:x, eof =x.append(HLT); // Zero or more statements. Note that "statement list" is recursively defined. (CodeFragment) statement list -> =CodeFragment(); -> statement list:x, statement:y =x.concat(y); /* A single statement can be "open" or "closed". An "open" statement is a statement that can legally be followed by an "else" keyword. A "closed" statement is one that cannot. "Open" and "closed" statements are a means for resolving a problem common to many programming languages known as the if-then-else ambiguity or "dangling else". See www.parsifalsoft.com/ifelse.htm for a discussion of the ambiguity and this technique for resolving it. */ (CodeFragment) statement -> open statement -> closed statement (CodeFragment) open statement -> if condition:x, statement:s =CodeFragment::ifStatement(x, s); -> if condition:x, closed statement:s1, "else", open statement:s2 =CodeFragment::ifElse(x,s1,s2); -> WHILE, '(', expression:x, ')', open statement:s = PCB.loopDepth--, CodeFragment::whileLoop(x,s); -> FOR, '(', optional expression:init, ';', optional expression:cond, ';', optional expression:inc, ')', open statement:s = PCB.loopDepth--,CodeFragment::forLoop(init, cond, inc, s); (CodeFragment) closed statement -> if condition:x, closed statement:s1, "else", closed statement:s2 =CodeFragment::ifElse(x,s1,s2); -> WHILE, '(', expression:x, ')', closed statement:s = PCB.loopDepth--, CodeFragment::whileLoop(x,s); -> FOR, '(', optional expression:init, ';', optional expression:cond, ';', optional expression:inc, ')', closed statement:s = PCB.loopDepth--, CodeFragment::forLoop(init, cond, inc, s); -> simple statement:x =x; /* A "simple statement" is one that does not end with another statement. All simple statements are closed statements and are therefore factored out for clarity. The simple expression statement appends a pop instruction so that the resulting bytecode will discard the expression's value. (This is not an optimizing compiler.) */ (CodeFragment) simple statement -> DO, statement:s, "while", '(', expression:x, ')', ';' = PCB.loopDepth--, CodeFragment::doLoop(s,x); -> expression:x, ';' =x.append(POP); -> ';' =CodeFragment(); -> compound statement -> "break", ';' =PCB.checkLoop(), CodeFragment().appendBreak(BR); -> "continue", ';' =PCB.checkLoop(), CodeFragment().appendContinue(BR); -> "return", ';' =CodeFragment().append(HLT); -> "return", expression:x, ';' =x.append(RETURN); -> dump statement, ';' -> print statement, ';' DO -> "do" =PCB.loopDepth++; WHILE -> "while" =PCB.loopDepth++; FOR -> "for" =PCB.loopDepth++; (CodeFragment) if condition -> "if", '(', expression:x, ')' =x; (CodeFragment) compound statement -> '{', statement list:s, '}' =s; // The dump statement accepts a comma delimited list of variable names (CodeFragment) dump statement -> "dump", name:n =PCB.code(DUMP,n); -> dump statement:code, ',', name:n =code.append(DUMP, PCB.idName(n)); // The print statement accepts a comma delimited list of expressions (CodeFragment) print statement -> "print", assignment expression:code =code.append(PRINT); -> print statement:code, ',', assignment expression:x = code.concat(x).append(PRINT); (CodeFragment) optional expression -> =CodeFragment(); -> expression // General expression. As in C, comma expression has the lowest precedence. // Discard the value of the left operand, since it will never be used. (CodeFragment) expression -> assignment expression -> expression:x, ',', assignment expression:y = x.append(POP).concat(y); // Assignment expression (CodeFragment) assignment expression -> conditional expression // next higher precedence level -> lvalue:lv, assignment op:op, assignment expression:x = lv.concat(x).append(op); // Conditional expression (ternary operator) (CodeFragment) conditional expression -> logical or expression // next higher precedence level -> logical or expression:c, '?', expression:x, ':', conditional expression:y =CodeFragment::ifElse(c, x, y); /* Logical expressions with short-cut evaluation, as in C. The size() call determines how far to jump to skip over the part of the expression being skipped over. */ (CodeFragment) logical or expression -> logical and expression // next higher precedence level -> logical and expression:x, "||", logical or expression:y =x.append(LOR, y.size()).concat(y); (CodeFragment) logical and expression -> inclusive or expression -> inclusive or expression:x, "&&", logical and expression:y =x.append(LAND, y.size()).concat(y); // Arithmetic expressions, in operator precedence order. (CodeFragment) inclusive or expression -> exclusive or expression // next higher precedence level -> inclusive or expression:x, '|', exclusive or expression:y =x.concat(y).append(IOR); (CodeFragment) exclusive or expression -> and expression // next higher precedence level -> exclusive or expression:x, '^', and expression:y =x.concat(y).append(XOR); (CodeFragment) and expression -> equality expression // next higher precedence level -> and expression:x, '&', equality expression:y =x.concat(y).append(AND); (CodeFragment) equality expression -> relational expression // next higher precedence level -> equality expression:x, equality op:op, relational expression:y =x.concat(y).append(op); (CodeFragment) relational expression -> shift expression // next higher precedence level -> relational expression:x, relational op:op, shift expression:y =x.concat(y).append(op); (CodeFragment) shift expression -> additive expression // next higher precedence level -> shift expression:x, shift op:op, additive expression:y =x.concat(y).append(op); (CodeFragment) additive expression -> multiplicative expression // next higher precedence level -> additive expression:x, additive op:op, multiplicative expression:y =x.concat(y).append(op); (CodeFragment) multiplicative expression -> unary expression // next higher precedence level -> multiplicative expression:x, multiplicative op:op, unary expression:y =x.concat(y).append(op); (CodeFragment) unary expression -> factor // next higher precedence level -> '+', unary expression:x =x; -> unary op:op, unary expression:x =x.append(op); /* Syntactically, we can use ** for exponentiation because we don't have pointers. (In C, ** could be confused with pointer indirection.) */ (CodeFragment) factor -> primary // next higher precedence level -> primary:x, "**", unary expression:y =x.concat(y).append(POW); /* Primary expression - bottom level of expression syntax. Variable references, constants, the builtin functions. Also, another expression in parentheses. (This is how you make parentheses work the way they're supposed to.) */ (CodeFragment) primary -> '(', expression:x, ')' =x; -> constant:x =PCB.code(PUSHC, x); -> lvalue:x =x.append(FETCH); -> "++", lvalue:x =x.append(I_FETCH); -> "--", lvalue:x =x.append(D_FETCH); -> lvalue:x, "++" =x.append(FETCH_I); -> lvalue:x, "--" =x.append(FETCH_D); -> function call -> '(', "long", ')', primary:x =x.append(CAST_LONG); -> '(', "double", ')', primary:x =x.append(CAST_DOUBLE); (CodeFragment) lvalue -> name:n =PCB.code(LOCATE, n); (CodeFragment) function call -> name:n, '(', optional arg list:args, ')' =PCB.codeCall(n, args); (AgStack) optional arg list -> =AgStack(); -> arg list (AgStack) arg list -> assignment expression:code =AgStack().push(code); -> arg list:stack, ',', assignment expression:x =stack.push(x); (Constant) constant -> integer:x =Constant(x); -> real:x =Constant(x); -> string -> character constant:x =Constant(x); // Operator definitions (Opcode) assignment op -> '=' =STORE; -> "+=" =ADDM; -> "-=" =SUBM; -> "*=" =MULM; -> "/=" =DIVM; -> "%=" =MODM; -> "|=" =IORM; -> "&=" =ANDM; -> "^=" =XORM; -> "<<=" =LSM; -> ">>=" =RSM; -> "**=" =POWM; (Opcode) equality op -> "==" =EQ; -> "!=" =NE; (Opcode) relational op -> '<' =LT; -> "<=" =LE; -> '>' =GT; -> ">=" =GE; (Opcode) shift op -> "<<" =LS; -> ">>" =RS; (Opcode) additive op -> '+' =ADD; -> '-' =SUB; (Opcode) multiplicative op -> '*' =MUL; -> '/' =DIV; -> '%' =MOD; (Opcode) unary op -> '-' =NEG; -> '!' =NOT; -> '~' =COM; /*** LEXICAL UNITS **********************************************************/ digit = '0-9' eof = 0 // string null terminator letter = 'a-z' + 'A-Z' + '_' space = ' ' + '\t' + '\f' + '\v' + '\r' + '\n' // blank, tab, etc. (void) white space -> space -> "/*", ~eof?..., "*/" // C style comment -> "//", ~(eof+'\n')?..., '\n' // C++ style comment /* Identifying variable names Characters in a name are accumulated in an AgString structure. */ (AgString) name -> letter:c =AgString().append(c); -> name:ns, letter+digit:c =ns.append(c); // Parsing and evaluating numeric constants (double) real -> simple real -> simple real:x, 'e'+'E', signed exponent:e =x*pow(10,e); -> integer part:x, 'e'+'E', signed exponent:e =x*pow(10,e); (double) simple real -> integer part:i, '.', fraction part:f =i+f; -> integer part:i, '.' =i; -> '.', fraction part:f =f; (double) integer part -> decimal integer:x =x; -> hybrid integer:x =x; -> octal integer:x =makeDecimal(x); (long) signed exponent -> '+'?, exponent:x =(long)x; -> '-', exponent:x =-(long)x; (double) fraction part -> digit:d =(d-'0')/10.0; -> digit:d, fraction part:f =(d-'0' + f)/10.0; (long) integer -> decimal integer -> octal integer -> hex integer (long) decimal integer -> '1-9':d =d-'0'; -> decimal integer:x, digit:d =10*x + d-'0'; (long) exponent -> '0-9':d =d-'0'; -> exponent:x, digit:d =10*x + d-'0'; (long) hybrid integer -> octal integer:x, '8-9':d =10*makeDecimal(x) + d - '0'; -> hybrid integer:x, digit:d =10*x + d-'0'; (long) octal integer -> '0' =0; -> octal integer:n, '0-7':d =8*n + d - '0'; (long) hex integer -> {"0x" | "0X"} =0; -> hex integer:n, hex digit:d =16*n + d; (int) hex digit -> '0-9':x =x - '0'; -> 'a-f' + 'A-F':d =(d & 7) + 9; // string constant (Constant) string -> string element:s =Constant(s); -> string:s, string element:e =Constant(s+=e); (Value) string element -> '"', s char sequence:b, '"' =Value(b); (AgString) s char sequence -> =AgString(); -> s char sequence:s, s char:c =s.append(c); (int) s char -> ~(eof + '"' + '\n' + '\\') -> escape sequence (int) escape sequence -> simple escape sequence -> octal escape sequence -> hexadecimal escape sequence (int) simple escape sequence -> "\\'" ='\''; -> "\\\"" ='"'; -> "\\?" = '\?'; -> "\\\\" ='\\'; -> "\\a" ='\a'; -> "\\b" ='\b'; -> "\\f" ='\f'; -> "\\n" ='\n'; -> "\\r" ='\r'; -> "\\t" ='\t'; -> "\\v" ='\v'; (int) octal escape sequence -> one octal | two octal | three octal (int) one octal -> '\\', '0-7':d =d-'0'; (int) two octal -> one octal:n, '0-7':d =8*n + d-'0'; (int) three octal -> two octal:n, '0-7':d =8*n + d-'0'; (int) hexadecimal escape sequence -> "\\x", hexadecimal digit:d =d; -> hexadecimal escape sequence:n, hexadecimal digit:d =16*n + d; (int) hexadecimal digit -> '0-9':d =d-'0'; -> 'A-F' + 'a-f':d =9 + (d & 7); [ sticky {one octal, two octal, hexadecimal escape sequence} ] // character constant (int) character constant -> '\'', c char:c, '\'' =c; (int) c char -> ~(eof + '\'' + '\n' + '\\') -> escape sequence /*** SUPPORT CODE ***********************************************************/ { // Begin embedded C++ // Don't use default error handling. #define SYNTAX_ERROR PCB.reportError() // implement context tracking #define GET_CONTEXT CONTEXT = FileLocation(PCB.pointer, PCB.line, PCB.column) void dci_pcb_struct::reportError() { ag_delete_wrappers(this); char buf[100]; sprintf(buf, "Error(%d,%d): %s", line, column, error_message); throw ErrorDiagnostic(buf); } void dci_pcb_struct::reportError(const char *msg) { ag_delete_wrappers(this); char buf[100]; FileLocation &context = PCONTEXT(*this); sprintf(buf, "Error(%d,%d): %s", context.line, context.column, msg); throw ErrorDiagnostic(buf); } void dci_pcb_struct::checkLoop() { if (loopDepth) return; PCONTEXT(*this) = FileLocation(pointer, line, column); reportError("No loop active"); } // Constructor for parser control block // Initializes dictionary, input pointer, and loopDepth dci_pcb_struct::dci_pcb_struct(AgDictionary &d, const char *text) : pointer((unsigned char *) text), dictionary(d), loopDepth(0) { // Nothing else to do } int dci_pcb_struct::idName(const AgString &name) { return dictionary.intern(name); } CodeFragment dci_pcb_struct::code(Opcode op, const AgString &name) { return CodeFragment().append(op,dictionary.intern(name)); } CodeFragment dci_pcb_struct::code(Opcode op, const Constant &x) { int n = constants.intern(x); return CodeFragment().append(op,n); } CodeFragment dci_pcb_struct::codeCall( const AgString &name, AgStack &argStack) { int argCount = argStack.size(); CodeFragment argCode; for (int i = 0; i < argCount; i++) argCode.concat(argStack[i]); argCode.append(CALL, idFunction(name, argCount)); return argCode; } ScriptMethod::ScriptMethod(const char *text, AgDictionary &d) : dictionary(d), bytecode(), constantList() { // Create a parser using the dictionary specified by the dataset dci_pcb_type pcb(dictionary, text); // run the parser try { dci(&pcb); } catch(ErrorMessage e) { pcb.reportError(e.message()); } //Use dci_value() to retrieve the generated code bytecode = dci_value(&pcb).getBytecode(); constantList = pcb.constants.contents(); } // Apply script to a dataset Value interpret(const char *text, Dataset &d) { ScriptMethod method(text, d.dictionary); // List the generated code //method.list(cout); // Execute the generated code return method.apply(d); } } // End of embedded C++ /*** End of syntax file *****************************************************/