Implementing Lexical and Syntax Analysis with Lex and Yacc

Classified in Computers

Written on in English with a size of 2.73 KB

Lexer Implementation (lexer.l)

%{ #include "y.tab.h" %} %%% "+" { return PLUS; } "-" { return MINUS; } "*" { return MULTIPLY; } "/" { return DIVIDE; } "%" { return MODULUS; } "(" { return LPAREN; } ")" { return RPAREN; } [0-9]+ { yylval.num = atoi(yytext); return NUMBER; } [a-zA-Z][a-zA-Z0-9]* { yylval.id = strdup(yytext); return IDENTIFIER; } [ \t\n] { /* ignore whitespace */ } . { return yytext[0]; } %%%

Parser Implementation (parser.y)

%{ #include <stdio.h> #include <stdlib.h> #include <string.h> void yyerror(const char *s); int yylex(); int temp_count = 0; int label_count = 0; void gen(char op, int arg1, int arg2, int result) { printf("%d: %c, %d, %d, %d\n", label_count++, op, arg1, arg2, result); } int new_temp() { return ++temp_count; } %} %token NUMBER IDENTIFIER PLUS MINUS MULTIPLY DIVIDE MODULUS LPAREN RPAREN %% stmt: expr { printf("\n"); } ; expr: expr PLUS expr { int t = new_temp(); gen('+', $1, $3, t); $$ = t; } | expr MINUS expr { int t = new_temp(); gen('-', $1, $3, t); $$ = t; } | expr MULTIPLY expr { int t = new_temp(); gen('*', $1, $3, t); $$ = t; } | expr DIVIDE expr { int t = new_temp(); gen('/', $1, $3, t); $$ = t; } | expr MODULUS expr { int t = new_temp(); gen('%', $1, $3, t); $$ = t; } | LPAREN expr RPAREN { $$ = $2; } | NUMBER { $$ = $1; } | IDENTIFIER { $$ = 0; } ; %% int main() { printf("Enter an expression: "); yyparse(); return 0; } void yyerror(const char *s) { fprintf(stderr, "Error: %s\n", s); }

Recursive Descent Parser Example

Below is a manual implementation of a recursive descent parser in C.

#include <stdio.h> #include <stdlib.h> #include <string.h> #include <stdbool.h> static int ptr = 0; static char input[20]; static bool E(); static bool T(); static bool S() { int fallback = ptr; if (input[ptr++] != '(') { ptr = fallback; } if (input[ptr++] != 'x') { ptr = fallback; return false; } printf("x in s parsed"); if (E() == false) { ptr = fallback; printf("error in s"); return false; } return true; } static bool E() { int fallback = ptr; if (input[ptr] == '') { ptr++; return true; } printf("+ in E parsed"); if (T() == false) { ptr = fallback; return false; } } static bool T() { int fallback = ptr; if (input[ptr] == 'x') printf("x in passed"); if (ptr < strlen(input) - 1) if (input[ptr++] == ')') return true; else E(); return true; } int main() { scanf("%s", input); bool isValid = S(); if (isValid) { printf("\nThe input string is valid."); } else { printf("\nThe input string is invalid."); } }

Output

(5+3)*2
The input string is valid

Input Grammar

  • E -> E+T
  • E -> T
  • E -> (E)
  • T -> x

Related entries: