LL(1) parsers are categorized as top-down parsers that specifically operate on the LL(1) grammar class. The initial "L" signifies the left-to-right scanning of input, the subsequent "L" denotes leftmost derivation, and the numeral "1" represents the count of lookaheads considered during decision-making.
It is essential to ascertain the LL(1) nature of the grammar before generating an LL(1) parsing table.
Building Rules for the LL(1) Parsing Table:
We can utilize the subsequent guidelines to comprehend the LL(1) parsing in C.
Step 1: Grammar Evaluation:
- Begin by examining a provided context-free grammar written in BNF (Backus-Naur Form) .
- Make that the grammar is LL(1) parseable and not left-recursive . Correct transformations can be used to get rid of left recursion .
- Put the grammar in a format that will be useful for analysis, such as a table of parsing rules or a collection of production rules .
Step 2: Analyzing the initial and subsequent groups:
- Identify the FIRST set for every non-terminal symbol within the grammar. The FIRST set of a non-terminal A encompasses all terminal symbols capable of initiating a derived string from A.
- Establish the FOLLOW set for each non-terminal symbol within the grammar. The FOLLOW set of a non-terminal A comprises all terminal symbols that can directly follow a derived string from A in any derivation.
Step 3: How to Build a Parsing Table:
- In each instance where A is a non-terminal and is a string of terminals and/or non-terminals:
- Add A-> to [A, t] in the table for each terminal symbol t in the FIRST(α).
- If ε (epsilon) is in FIRST(α) , for each terminal symbol t in FOLLOW(A) , add A -> α to the table entry [A, t].
- Add A-> to the table entry [A, $] if it is in FIRST and $ (end of input) is in FOLLOW(A) .
- The grammar is not LL(1) and cannot be parsed using LL(1) parsing if there are any parsing table conflicts (different entries in the same cell).
Step 4: How to Use the Parsing Table:
- First, create a parsing stack from scratch and place the grammar's start symbol on it.
- The current input token is read .
- Use the current input token as the column index and the top of the stack as the row index to consult the parsing table .
- If the table entry is blank or contains a mistake, the input contains a syntax error .
- Pop A from the stack and push in reverse order onto the stack if the table entry has a production A -> .
Program for LL(1) Parsing Table in C:
In this script, the system reads a file named "text.txt" to parse the grammar. The symbol "^" is used to denote epsilon. The script assumes the input grammar to be LL(1).
text.txt
E->TA
A->+TA|^
T->FB
B->*FB|^
F->t|(E)
Program:
#include<stdio.h>
#include<string.h>
#define TSIZE 128
// If the input is jth non-terminal, table[i][j] retains the index of production that must be applied to the ith variable.
int table[100][TSIZE];
// keeps a complete list of terminals
//When using the ASCII value to index terminals, terminal[i] = 1 indicates that the character has an ASCII value.
char terminal[TSIZE];
// only saves the list of terminals that begin with the upper case letters "A" through "Z."
//Non-terminal[i] denotes that the grammar is non-terminal and the alphabet is present.
char non-terminal[26];
// structure to hold each production
// str[] stores the production
// len is the length of production
struct product {
char str[100];
int len;
}pro[20];
// no of productions in form A->ß
int no_pro;
char first[26][TSIZE];
char follow[26][TSIZE];
// stores first of each production in form A->ß
char first_rhs[100][TSIZE];
// check if the symbol is non-terminal
int isNT(char c) {
return c >= 'A' && c <= 'Z';
}
// reading data from the file
void readFromFile() {
FILE* fptr;
fptr = fopen("text.txt", "r");
char buffer[255];
int i;
int j;
while (fgets(buffer, sizeof(buffer), fptr)) {
printf("%s", buffer);
j = 0;
non-terminal[buffer[0] - 'A'] = 1;
for (i = 0; i<strlen(buffer) - 1; ++i) {
if (buffer[i] == '|') {
++no_pro;
pro[no_pro - 1].str[j] = '\0';
pro[no_pro - 1].len = j;
pro[no_pro].str[0] = pro[no_pro - 1].str[0];
pro[no_pro].str[1] = pro[no_pro - 1].str[1];
pro[no_pro].str[2] = pro[no_pro - 1].str[2];
j = 3;
}
else {
pro[no_pro].str[j] = buffer[i];
++j;
if (!isNT(buffer[i]) && buffer[i] != '-' && buffer[i] != '>') {
terminal[buffer[i]] = 1;
}
}
}
pro[no_pro].len = j;
++no_pro;
}
}
void add_FIRST_A_to_FOLLOW_B(char A, char B) {
int i;
for (i = 0; i< TSIZE; ++i) {
if (i != '^')
follow[B - 'A'][i] = follow[B - 'A'][i] || first[A - 'A'][i];
}
}
void add_FOLLOW_A_to_FOLLOW_B(char A, char B) {
int i;
for (i = 0; i< TSIZE; ++i) {
if (i != '^')
follow[B - 'A'][i] = follow[B - 'A'][i] || follow[A - 'A'][i];
}
}
void FOLLOW() {
int t = 0;
int i, j, k, x;
while (t++ <no_pro) {
for (k = 0; k < 26; ++k) {
if (!non-terminal[k]) continue;
char nt = k + 'A';
for (i = 0; i<no_pro; ++i) {
for (j = 3; j < pro[i].len; ++j) {
if (nt == pro[i].str[j]) {
for (x = j + 1; x < pro[i].len; ++x) {
char sc = pro[i].str[x];
if (isNT(sc)) {
add_FIRST_A_to_FOLLOW_B(sc, nt);
if (first[sc - 'A']['^'])
continue;
}
else {
follow[nt - 'A'][sc] = 1;
}
break;
}
if (x == pro[i].len)
add_FOLLOW_A_to_FOLLOW_B(pro[i].str[0], nt);
}
}
}
}
}
}
void add_FIRST_A_to_FIRST_B(char A, char B) {
int i;
for (i = 0; i< TSIZE; ++i) {
if (i != '^') {
first[B - 'A'][i] = first[A - 'A'][i] || first[B - 'A'][i];
}
}
}
void FIRST() {
int i, j;
int t = 0;
while (t <no_pro) {
for (i = 0; i<no_pro; ++i) {
for (j = 3; j < pro[i].len; ++j) {
char sc = pro[i].str[j];
if (isNT(sc)) {
add_FIRST_A_to_FIRST_B(sc, pro[i].str[0]);
if (first[sc - 'A']['^'])
continue;
}
else {
first[pro[i].str[0] - 'A'][sc] = 1;
}
break;
}
if (j == pro[i].len)
first[pro[i].str[0] - 'A']['^'] = 1;
}
++t;
}
}
void add_FIRST_A_to_FIRST_RHS__B(char A, int B) {
int i;
for (i = 0; i< TSIZE; ++i) {
if (i != '^')
first_rhs[B][i] = first[A - 'A'][i] || first_rhs[B][i];
}
}
// Calculates FIRST(ß) for each A->ß
void FIRST_RHS() {
int i, j;
int t = 0;
while (t <no_pro) {
for (i = 0; i<no_pro; ++i) {
for (j = 3; j < pro[i].len; ++j) {
char sc = pro[i].str[j];
if (isNT(sc)) {
add_FIRST_A_to_FIRST_RHS__B(sc, i);
if (first[sc - 'A']['^'])
continue;
}
else {
first_rhs[i][sc] = 1;
}
break;
}
if (j == pro[i].len)
first_rhs[i]['^'] = 1;
}
++t;
}
}
int main() {
readFromFile();
follow[pro[0].str[0] - 'A']['$'] = 1;
FIRST();
FOLLOW();
FIRST_RHS();
int i, j, k;
// display first of each variable
printf("\n");
for (i = 0; i<no_pro; ++i) {
if (i == 0 || (pro[i - 1].str[0] != pro[i].str[0])) {
char c = pro[i].str[0];
printf("FIRST OF %c: ", c);
for (j = 0; j < TSIZE; ++j) {
if (first[c - 'A'][j]) {
printf("%c ", j);
}
}
printf("\n");
}
}
// display follow of each variable
printf("\n");
for (i = 0; i<no_pro; ++i) {
if (i == 0 || (pro[i - 1].str[0] != pro[i].str[0])) {
char c = pro[i].str[0];
printf("FOLLOW OF %c: ", c);
for (j = 0; j < TSIZE; ++j) {
if (follow[c - 'A'][j]) {
printf("%c ", j);
}
}
printf("\n");
}
}
// display first of each variable ß
// in form A->ß
printf("\n");
for (i = 0; i<no_pro; ++i) {
printf("FIRST OF %s: ", pro[i].str);
for (j = 0; j < TSIZE; ++j) {
if (first_rhs[i][j]) {
printf("%c ", j);
}
}
printf("\n");
}
// the parse table contains '$'
// set terminal['$'] = 1
// to include '$' in the parse table
terminal['$'] = 1;
// the parse table do not read '^'
// as input
// so we set terminal['^'] = 0
// to remove '^' from terminals
terminal['^'] = 0;
// printing parse table
printf("\n");
printf("\n\t**************** LL(1) PARSING TABLE *******************\n");
printf("\t--------------------------------------------------------\n");
printf("%-10s", "");
for (i = 0; i< TSIZE; ++i) {
if (terminal[i]) printf("%-10c", i);
}
printf("\n");
int p = 0;
for (i = 0; i<no_pro; ++i) {
if (i != 0 && (pro[i].str[0] != pro[i - 1].str[0]))
p = p + 1;
for (j = 0; j < TSIZE; ++j) {
if (first_rhs[i][j] &&j != '^') {
table[p][j] = i + 1;
}
else if (first_rhs[i]['^']) {
for (k = 0; k < TSIZE; ++k) {
if (follow[pro[i].str[0] - 'A'][k]) {
table[p][k] = i + 1;
}
}
}
}
}
k = 0;
for (i = 0; i<no_pro; ++i) {
if (i == 0 || (pro[i - 1].str[0] != pro[i].str[0])) {
printf("%-10c", pro[i].str[0]);
for (j = 0; j < TSIZE; ++j) {
if (table[k][j]) {
printf("%-10s", pro[table[k][j] - 1].str);
}
else if (terminal[j]) {
printf("%-10s", "");
}
}
++k;
printf("\n");
}
}
}
Output:
FIRST OF E: ( t
FIRST OF A: + ^
FIRST OF T: ( t
FIRST OF B: * ^
FIRST OF F: ( t
FOLLOW OF E: $ )
FOLLOW OF A: $ )
FOLLOW OF T: + $ )
FOLLOW OF B: + $ )
FOLLOW OF F: * + ) $
FIRST OF TA: ( t
FIRST OF +TA: + ^
FIRST OF FB: ( t
FIRST OF *FB: * ^
FIRST OF t: t
FIRST OF (E): (
( ) * + t $
E TA TA
A ^ ^ ^ +TA ^
T FB FB
B ^ ^ *FB ^ ^
F (E) t