// Parser for pasgol, using a supplied parse table (pasgol.ptf) #include #include #include #include #include #include using namespace std; const unsigned char CHARLIT = 29; // Token type numbers const unsigned char INTLIT = 30; const unsigned char REALLIT = 31; const unsigned char DOLLAR = 41; const unsigned char MULOP = 26; const unsigned char ASTERISK = 40; const unsigned char SEMICOLON = 32; const unsigned char BEGIN = 2; const unsigned char END = 6; const unsigned char ID = 23; struct actionEntry // Parse table entry { char action; // Parsing action (S,R,D.C) short inputTerminal; // Token type short nonTerminal; // Nonterminal for reductions short length; // Production length short stateProduction; // Next state for shift }; struct token // Token form from scanner { unsigned char kind; // Token type unsigned char charPos; // Position on line unsigned short linePos; // Line number in file unsigned short value; // Distinguishes the lexeme }; struct jump // GoTo table entry { short current; // Current state short jumpTo; // State to change to }; struct errorMark // Error entry { short linePos; // Line number unsigned char charPos; // Position on the line unsigned char unit; // Module bool syntactic; // Indicates syntactic or semantic char event; // Parsing action used short messNo; // Message number for semantic errors short theState; // Parser state }; struct symbolEntry // Symbol table entry { unsigned char kind; // Type of symbol bool formal; // Indicator of formal argument bool defined; // Indicator of whether it is defined bool str; // Indicator for an array unsigned char length; // Storage units to use relative to type short lowerBound; // Lowest index for an array short upperBound; // Highest index for an array string theName; // Symbol's name }; struct literalEntry // Literal table entry { string item; // Literal string unsigned char kind; // Indicates type of literal unsigned char length; // Storage units to use relative to type }; struct stackItem // Token stack entry { unsigned char kind; // Token type unsigned char table; // Which table it is in short offset; // Where info is in a table short size; // Length in bytes short terminalNo; // Terminal number }; stack stateStack; // The stack for states stack tokenStack; // The stack for tokens errorMark errors[500]; // Can handle up to 500 errors symbolEntry symbolTable[500]; // Can handle up to 500 symbols literalEntry numTable[500]; // Can handle up to 500 numeric literals literalEntry charTable[500]; // Can handle up to 500 character literals stackItem production[20]; // Can handle productions with up to 20 terms list startRow[300]; // Row of Parse table kept as a list list goToCol[300]; // Column of Go To table kept as a list string terminal[100]; // Terminal names string nonTerminal[100]; // Non-Terminal names string seMessages[50]; // Text of semantic error messages short state; // Parsing state short errorCount = 0; // Number of errors short charLiteralCount; // Number of character literals short numLiteralCount; // Number of numeric literals short symbolCount; // Number of symbols short unitNo = 0; // Number of modules short terminalNo; // Current terminal number short save; // Stack size needed for restoration bool started; // Indicates if "begin" found for a module bool haveToken; // Indicates that a token was read bool recall; bool remember; bool init; // Indicates if initialization done bool parseProblem; // Indicates if there was a parsing error bool endSeen = false; // Indicates whether "end" was seen short numStates; // Number of states short numNonTerms; // Number of NonTerminals short numTerms; // Number of Terminals token t; // Current token const short TSIZE = sizeof(token); // Number of bytes in a token actionEntry act; // Current parse table entry fstream srcfile; // Stream for source file fstream tokfile; // Stream for token file fstream symfile; // Stream for symbol table file fstream tabfile; // Stream for parse table file void Initialize (); // Header for Initialize // Read in Symbols and Literals void LoadSymbols () { char stuff[256]; char kind; int j; symfile >> charLiteralCount; // Read character literals symfile.get(kind); for (j = 1; j <= charLiteralCount; j++) { symfile.getline(stuff, 255); charTable[j].item = stuff; charTable[j].kind = 3; charTable[j].length = charTable[j].item.size(); // Save # units } symfile >> numLiteralCount; // Read in numeric literals for (j = 1; j <= numLiteralCount; j++) { symfile >> kind >> numTable[j].item; if (kind == 'I') numTable[j].kind = 1; else numTable[j].kind = 2; numTable[j].length = 1; // Save # units } symfile >> symbolCount; // Read in symbols for (j = 1; j <= symbolCount; j++) { symfile >> symbolTable[j].theName; symbolTable[j].formal = symbolTable[j].defined = // Mark not defined, not symbolTable[j].str = false; // formal, not array symbolTable[j].kind = 0; } } // Read in parse table from a file named pasgol.ptf void LoadTable () { int j; int k; short junk; char tablefile[] = "pasgolg.ptf"; // Parse table file short records; // Number of non-blank entries in a state actionEntry pAction; // Parse table entry jump gAction; // Go To table entry char hold; tabfile.open (tablefile, ios::in); tabfile >> numNonTerms; for (j = 1; j <= numNonTerms; j++) // Read Non-Terminals and keep { // Six letters of each name tabfile >> hold; nonTerminal[j] = hold; for (k = 1; k < 6; k++) { tabfile.get(hold); nonTerminal[j] += hold; } tabfile.ignore(255, '\n'); } tabfile >> numTerms; // Read Terminals and keep for (j = 1; j <= numTerms; j++) // nine letters of each name { tabfile >> hold; terminal[j] = hold; for (k = 1; k < 9; k++) { tabfile.get(hold); terminal[j] += hold; } tabfile.ignore(255, '\n'); } tabfile >> numStates >> junk >> junk; // Get number of states, discard rest for (j = 0; j < numStates; j++) // Read Parse Table entries { tabfile >> junk >> records; // Get number of non-blank entries in for (k = 1; k <= records; k++) { tabfile >> pAction.action >> pAction.stateProduction >> pAction.inputTerminal; if (pAction.action == 'R' || pAction.action == 'C' || pAction.action == 'F') tabfile >> pAction.length >> pAction.nonTerminal; else pAction.length = pAction.nonTerminal = 0; tabfile.ignore(255,'\n'); // Throw away the comment startRow[j].push_back(pAction); } } for (j = 2; j <= numNonTerms; j++) // Read GoTo table entries { tabfile >> records; // Number of non-blank entries in column tabfile.ignore(255, '\n'); for (k = 1; k <= records; k++) { tabfile >> gAction.current >> gAction.jumpTo; // Get entry values goToCol[j].push_back(gAction); } tabfile.ignore(255, '\n'); // Throw away the comment } tabfile.close(); } // Reads one token and makes notes and adjustments if necessary void GetToken () { tokfile.read ((char *) &t, TSIZE); // Read a token if (t.kind != DOLLAR && endSeen) // If just saw an "end", but not eof { if (errorCount == 0) // Report parsing outcome cout << "Module " << symbolTable[1].theName << " is accepted." << endl; else cout << "Parsing is finished, but module " << symbolTable[1].theName << " has problems." << endl; unitNo++; // Count the module Initialize(); // Reinitialize - ELIMINATED PASS 2 endSeen = false; // Turn off "seen" indicator } terminalNo = t.kind; // Extract terminal number if ((t.value == 2) && (terminalNo == MULOP) && (!started)) terminalNo = ASTERISK; // An * before started is part of a // declaration, not a multiply if (remember) // If at sync point, set up save { // of stack (in main) recall = true; remember = false; } if ((terminalNo == SEMICOLON) || (terminalNo == BEGIN) || (terminalNo == END)) // Indicate found sync point remember = true; if (terminalNo == END) // If at "end", note it endSeen = true; } // Record a syntax error void NoteSyntax () { if ((errors[errorCount].linePos != t.linePos) || // Only one error per position (errors[errorCount].charPos != t.charPos) || // is kept track of (errors[errorCount].unit != unitNo)) { errorCount++; errors[errorCount].syntactic = true; // Mark it syntactic errors[errorCount].linePos = t.linePos; // Record position errors[errorCount].charPos = t.charPos; errors[errorCount].unit = unitNo; errors[errorCount].theState = state; // Record state and action errors[errorCount].event = act.action; } } // Search row of the parse table for appropriate action // If none is found, record an error void GetAction () { list::iterator p; p = startRow[state].begin(); // Find appropriate action for input while ((p != startRow[state].end()) && (p->inputTerminal != terminalNo)) p++; if (p == startRow[state].end()) p--; // Back up to error handling entry act = *p; if (p->inputTerminal != terminalNo) // Report error on no match NoteSyntax(); } // Change states after a reduction using the GoTo table void GetNextState () { list::iterator p; short currentState; p = goToCol[act.nonTerminal].begin(); currentState = stateStack.top(); // Find the appropriate state in the column while ((p != goToCol[act.nonTerminal].end()) && (p->current != currentState)) p++; if (p == goToCol[act.nonTerminal].end()) p--; // Back up to error handling entry stateStack.push(p->jumpTo); // Go to the new state if (p->current != currentState) // Error if no match found NoteSyntax(); state = p->jumpTo; } // Remove a production right side from the stack and adjust state stack void PopProduction (short pieces) { int j; for (j = pieces; j >= 1; j--) { production[j] = tokenStack.top(); // Save elements in an array tokenStack.pop(); // for directed action stateStack.pop(); } } // Put empty Non-Terminal token on the stack after a reduction // This should be done in conjunction with GetNextState which changes the state stack void PushBlank () { stackItem piece; piece.table = 7; // Should fill in token with useful things piece.offset = act.nonTerminal; tokenStack.push(piece); } // Initialize many markers - needs to be done for each module void Initialize () { haveToken = false; remember = false; recall = false; started = false; init = true; } // At end of parsing, declare the outcome for the last module void Accept () { if (errorCount == 0) cout << "Module " << symbolTable[1].theName << " is accepted." << endl; else cout << "Parsing is finished, but module " << symbolTable[1].theName << " has problems." << endl; } // Perform a shift action void Shift () { stackItem piece; stateStack.push(act.stateProduction); // Put new state on stack if (t.kind == ID) // Propagate id information { piece.kind = symbolTable[t.value].kind; piece.table = 1; piece.size = symbolTable[t.value].length; } else if (t.kind == CHARLIT) // Propagate literal information { piece.kind = charTable[t.value].kind; piece.table = 2; piece.size = charTable[t.value].length; } else if ((t.kind == INTLIT) || (t.kind == REALLIT)) { piece.kind = numTable[t.value].kind; piece.table = 2; piece.size = numTable[t.value].length; } else // Just use terminal for all else { piece.kind = t.kind; piece.table = 6; piece.size = 0; } piece.terminalNo = terminalNo; piece.offset = t.value; // Put in offset (value) and push tokenStack.push(piece); // onto the token stack state = act.stateProduction; // Set new state } // Used if the Parse table was tinkered with on error handling // Does a fake shift, inserting a token that is not present void PushFake () { stackItem piece; stateStack.push(act.stateProduction); state = act.stateProduction; // Change to specified state piece.offset = act.nonTerminal; // Make a fake shift piece.kind = piece.terminalNo = act.inputTerminal; piece.size = 0; if (act.length == 0) piece.table = 6; else { piece.table = act.length; piece.size = 1; } tokenStack.push(piece); // Put token on stack haveToken = true; if (parseProblem) // Note fake action cout << "Inserted token type " << terminal[act.inputTerminal] << " at line " << t.linePos << " column " << (short) t.charPos << endl; } // Handle reduce actions (as well as forced reductions - "crunches") // A complete pass 2 will do much more on a reduce than is shown here void Reduce () { // Note forced reductions if (parseProblem && act.action == 'C') cout << "Assumed token at line " << t.linePos << " column " << (short) t.charPos << " reduced to " << nonTerminal[act.nonTerminal] << endl; PopProduction (act.length); // Remove right side of production GetNextState(); // Determine the next state PushBlank(); // Push a token that will be filled in if (t.kind == BEGIN) // Watch out for BEGIN to know how started = true; // to interpret * } // Get rid of inappropriate tokens void Drop () { if (parseProblem) // Note what was done cout << "Dropped token type " << terminal[t.kind] << " found at line " << t.linePos << " column " << (short) t.charPos << endl; if ((terminalNo == SEMICOLON) || (terminalNo == BEGIN) || (terminalNo == END)) { // begin, end, and ; are synchronizing tokens while (stateStack.size() > save) // Remove entries from stacks back to { // Known good state stateStack.pop(); tokenStack.pop(); } state = stateStack.top(); } if (terminalNo == BEGIN) // In case it was a "begin", set indicator started = true; if (terminalNo == END) // In case it was an "end", set indicator haveToken = true; if (terminalNo == DOLLAR) // On last token, accept whatever you have Accept(); } // Display source program with error indicators detected during syntax // and semantic analysis void Listing (string &sourcefile) { char line[255]; // A source line int lineCount = 0; // Source line count list::iterator event; // Reference to parse table action list::iterator ahead; short errorNo = 1; // Error count bool done; // Controls loop for errors on a line short printPos; // Display position of a character short first; // First possible error for a line int j; srcfile.open(sourcefile.c_str(), ios::in); srcfile.getline(line,255); while (!srcfile.eof()) { lineCount++; cout << setw(4) << lineCount << ": " << line << endl; if ((errorCount > 0) && (errorNo <= errorCount)) // Error on this line { done = false; first = errorNo; printPos = 1; while (!done) { if (errors[errorNo].linePos == lineCount) // If error is on this line { if (printPos == 1) cout << " "; // Position to make err mark for (j = printPos; j < errors[errorNo].charPos; j++) cout << ' '; cout << '|'; printPos = errors[errorNo].charPos + 1; errorNo++; if (errorNo > errorCount) // If no more errors, quit done = true; } else done = true; } if (first != errorNo) cout << endl; for (j = first; j < errorNo; j++) { if (errors[j].syntactic) // Is error syntactic { cout << "Syntax " << j // Yes, show action << " expecting "; if (startRow[errors[j].theState].size() < 7) { ahead = event = startRow[errors[j].theState].begin(); ahead++; while (ahead != startRow[errors[j].theState].end()) // Show possible tokens { cout << terminal[event->inputTerminal] << " "; event++; ahead++; } } else cout << "one of many things. "; if (errors[j].event == 'F') // Show parser response cout << "TOKEN INSERTED" << endl; else if (errors[j].event == 'D') cout << "TOKEN DROPPED" << endl; else cout << "ASSUMED ONE PRESENT" << endl; } else // Note semantic errors cout << "Semantic error: " << seMessages[errors[j].messNo] << endl; } } srcfile.getline(line,255); } srcfile.close(); } int main () { string play; string sourcefile; // Strings holding the file names string tokenfile; string symbolfile; stackItem piece; int reductions = 0; int shifts = 0; char showit; cout << "Enter the Pasgol source file name: "; // Get file name cin >> play; sourcefile = play + ".pgl"; // Create source name tokenfile = play + ".tok"; // Create token name symbolfile = play + ".syt"; // Create symbol name symfile.open (symbolfile.c_str(), ios::in); // Open token and symbol files tokfile.open (tokenfile.c_str(), ios::in|ios::binary); tokfile.seekg(0, ios::beg); parseProblem = false; stateStack.push(0); // Always start in state 0 piece.table = 7; // With augmenting non-terminal on token stack piece.offset = 1; piece.size = 0; piece.terminalNo = 0; tokenStack.push(piece); state = 0; save = 1; Initialize(); errors[0].linePos = 0; // Put in a fake error cout << "Show parsing problems as they occur (Y/N)? "; // Determine if parsing cin >> showit; // problems to be shown if (showit == 'Y' || showit == 'y') parseProblem = true; LoadTable(); // Read in the parse table do { if (init) // For each module, read in { // symbols and literals LoadSymbols(); init = false; } if (!haveToken) // If need to get a token, get one GetToken(); haveToken = false; GetAction(); // Find parse table entry // Handle reductions repeatedly while ((act.action == 'R') || (act.action == 'C')) { reductions++; Reduce(); GetAction(); } if (recall) // If at synchronizing point, { // save where we are save = stateStack.size(); recall = false; } shifts++; switch (act.action) // Perform parsing action { case 'S': Shift(); break; case 'D': Drop(); break; case 'F': PushFake(); break; case 'A': Accept(); } } while (act.action != 'A'); // Report outcome cout << reductions << " reductions " << shifts << " shifts and other actions." << endl; cout << "Number of errors: " << errorCount << endl; cout << "Syntax/Semantic listing (Y/N)? "; cin >> showit; if (showit == 'Y' || showit == 'y') // Show listing if requested Listing (sourcefile); symfile.close(); tokfile.close(); return 0; }