Chapter 0: Administrivia

Compilers: Principles, Techniques, and Tools · Required

x := 2; y := x + 3;
  [preprocessor] --> [compiler] --> [assembler] --> [linker] --> [loader]

    int xor (char s[])		// native C speakers say char *s
    {
	int ans = 0;
	int i = 0;

	while (s[i] != 0) {
	    ans = ans ^ s[i];
	    i = i + 1;
	}

	return ans;
    }

	.file	"xor.c"
	.text
.globl xor
	.type	xor, @function
xor:
	subl	$8, %esp
	movl	$0, 4(%esp)
	movl	$0, (%esp)
.L2:
	movl	(%esp), %eax
	addl	12(%esp), %eax
	cmpb	$0, (%eax)
	je	.L3
	movl	(%esp), %eax
	addl	12(%esp), %eax
	movsbl	(%eax),%edx
	leal	4(%esp), %eax
	xorl	%edx, (%eax)
	movl	%esp, %eax
	incl	(%eax)
	jmp	.L2
.L3:
	movl	4(%esp), %eax
	addl	$8, %esp
	ret
	.size	xor, .-xor
	.section	.note.GNU-stack,"",@progbits
	.ident	"GCC: (GNU) 3.4.6 (Gentoo 3.4.6-r1, ssp-3.4.5-1.0, pie-8.7.9)"

.globl xor
xor:
	subl	$8, %esp
	movl	$0, 4(%esp)
	movl	$0, (%esp)
.L2:
	movl	(%esp), %eax
	addl	12(%esp), %eax
	cmpb	$0, (%eax)
	je	.L3
	movl	(%esp), %eax
	addl	12(%esp), %eax
	movsbl	(%eax),%edx
	leal	4(%esp), %eax
	xorl	%edx, (%eax)
	movl	%esp, %eax
	incl	(%eax)
	jmp	.L2
.L3:
	movl	4(%esp), %eax
	addl	$8, %esp
	ret

  x3 := y + 3;
  x3  :=   y   +   3   ;
  x3   :=y+ 3  ;

  x 3 := y + 3;

  x3 := y + 3;

  asst-stmt --> id := expr ;
  expr --> number
        |  id
        |  expr + expr

  [scanner]→[parser]→[sem anal]→[inter code gen]→[opt1]→[code gen]→[opt2]

  x3 := y + 3;

  id₁ := id₂ + 3 ;

temp1 := inttoreal(3)
temp2 := id2 + temp1
temp3 := realtoint(temp2)
id1 := temp3

inttoreal temp1 3     --
add       temp2 id2   temp1
realtoint temp3 temp2 --
assign    id1   temp3 --

  operation  target source1 source2

  add       temp2 id2  3.0
  
  realtoint id1   temp2
  
MOVE id2,  R1
ADD  #3.0, R1
RTOI R1,   R2
MOVE R2,   id1

7+4-5
74+5-
  Terminals: 0 1 2 3 4 5 6 7 8 9 + -
  Nonterminals: list digit
  Productions: list → list + digit
               list → list - digit
               list → digit
               digit → 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
  Start symbol: list

  list → list - digit
       → list - 5
       → list + digit - 5
       → list + 4 - 5
       → digit + 4 - 5
       → 7 + 4 - 5

list → ε
1+2+3
list → digit + list
list → digit - list
expr   → expr + term | expr - term | term
term   → term * factor | term / factor | factor
factor → digit | ( expr )
digit  → 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9

  A → B | C

  A → B
  A → C

stmt → id := expr
     | if expr then stmt
     | if expr then stmt else stmt
     | while expr do stmt
     | begin opt-stmts end
opt-stmts → stmt-list | ε
stmt-list → stmt-list ; stmt | stmt

  expr   → expr + term | expr - term | term
  term   → term * factor | term / factor | factor
  factor → digit | ( expr )
  digit  → 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9

expr → expr + term    { print('+') }
expr → expr - term    { print('-') }
term → term / factor  { print('/') }
term → factor         { null }
digit → 3             { print('3') }

  rest → + term rest | - term rest | term
  term → 1 | 2 | 3

    type →   simple
    type →   ↑ id
    type →   array [ simple ] of type
    simple → integer
    simple → char
    simple → num dotdot num
  
    expr → term + term - 9
    term → factor / factor
    factor → digit
    digit → 7
  
7/7+7/7-9
    expr → term + term
    term → factor / factor
    factor → ( expr )
  
type → simple | ↑ id | array [ simple ] of type
  rest → + term rest | - term rest | term
  term → 1 | 2 | 3

    stmt → expr ;
         | if ( expr ) stmt
         | for ( optexpr ; optexpr ; optexpr ) stmt
         | other
 optexpr → expr | ε
  
expr → expr + term
expr → term
expr → term rest
rest → + term rest
rest → ε
term + term + ... + term
term + term + term
A → A α | β
A → β R
R → α R | ε
  expr → expr + term { print('+') }
  expr → expr - term { print('-') }
  expr → term
  term → 0           { print('0') }
  . . .
  term → 9           { print('9') }

A → A α | A β | γ
A → γ R
R → α R | β R | ε
α
+ term { print('+') }
  expr → term rest
  rest → + term { print('+') } rest
       | - term { print('-') } rest
       | ε
  term → 0           { print('0') }
       . . .
       | 9           { print('9') }

else ;
rest()
else
then
else if
x<y
x<=y
sum = sum + x;
id = id + id ;
x<y
x<=y
then
thenewvalue
  expr   → expr + term    { print('+') }
  expr   → expr - term    { print('-') }
  expr   → term
  term   → num            { print(num,value) }

  expr   → expr + term    { print('+') }
  expr   → expr - term    { print('-') }
  expr   → term
  term   → factor
  factor → ( expr ) | num { print(num,value) }

insert(s,t)
   lookup(s)
insert("div",div)
  struct symtableType {
    char lexeme[BIGNUMBER];
    int  token;
  } symtable[ANOTHERBIGNUMBER];

Q := Z;
A[f(x)+B*D] := g(B+C*h(x,y));
12
s+t
y := 7 * xx + 6 * (z + w)
  lvalue y
  push 7
  rvalue xx
  *
  push 6
  rvalue z
  rvalue w
  +
  *
  +
  :=

id
  stmt → id := expr
      { stmt.t := 'lvalue' || id.lexime || expr.t || := }

  stmt → if expr then stmt₁ { out := newlabel();
                           stmt.t := expr.t || 'gofalse' out || stmt₁.t || 'label' out

  stmt → if
	 expr      { out := newlabel; emit('gofalse', out); }
	 then
         stmt₁     { emit('label', out) }
  
out:=newlabel
  procedure stmt
    integer test, out;
    if lookahead = id then       // first set is {id} for assignment
      emit('lvalue', tokenval);  // pushes lvalue of lhs
      match(id);                 // move past the lhs]
      match(':=');               // move past the :=
      expr;                      // pushes rvalue of rhs on tos
      emit(':=');                // do the assignment (Omitted in book)
    else if lookahead = 'if' then
      match('if');               // move past the if
      expr;                      // pushes boolean on tos
      out := newlabel();
      emit('gofalse', out);      // out is integer, emit makes a legal label
      match('then');             // move past the then
      stmt;                      // recursive call
      emit('label', out)         // emit again makes out legal
    else if ...                  // while, repeat/do, etc
    else error();
  end stmt;

   start → list eof
    list → expr ; list
    list →  ε                   // would normally use | as below
    expr → expr + term      { print('+') }
         | expr - term      { print('-'); }
         | term
    term → term * factor    { print('*') }
         | term / factor    { print('/') }
         | term div factor  { print('DIV') }
         | term mod factor  { print('MOD') }
         | factor
  factor → ( expr )
         | id               { print(id.lexeme) }
         | num              { print(num.value) }

        start → list eof
	 list → expr ; list
	      | ε
	 expr → term moreterms
    moreterms → + term { print('+') } moreterms
	      | - term { print('-') } moreterms
	      | ε
         term | factor morefactors
  morefactors → * factor { print('*') } morefactors
	      | / factor { print('/') } morefactors
	      | div factor { print('DIV') } morefactors
	      | mod factor { print('MOD') } morefactors
	      | ε
       factor → ( expr )
              | id               { print(id.lexeme) }
              | num              { print(num.value) }

Lexer.c
Parser.c
  term() {
    int t;
    factor();
    // now we should call morefactorsl(), but instead code it inline
    while(true)              // morefactor nonterminal is right recursive
       switch (lookahead) {  // lookahead set by match()
       case '*': case '/': case DIV: case MOD: // all the same
          t = lookahead;     // needed for emit() below
          match(lookahead)   // skip over the operator
          factor();          // see grammar for morefactors
          emit(t,NONE);
          continue;          // C semantics for case
       default:              // the epsilon production
          return;

Emitter.c
Symbol.c
init.c
Error.c
  d₁ → r₁
  d₂ → r₂
      ...
  d_n → r_n

  letter_ → A | B | ... | Z | a | b | ... | z | _
    digit → 0 | 1 | ... | 9
      CId → letter_ ( letter_ | digit)*

  letter_ → [A-Za-z_]
    digit → [0-9]
      CId → letter_ ( letter | digit ) ^*
  
  digit → [0-9]
 digits → digit⁺
 number → digits (. digits)?(E[+-]? digits)?

  stmt → if expr then stmt
       | if expr then stmt else stmt
       | ε
  expr → term relop term      // relop is relational operator =, >, etc
       | term
  term →  id
       | number

  digit → [0-9]
 digits → digits⁺
 number → digits (. digits)? (E[+-]? digits)?
 letter → [A-Za-z]
     id → letter ( letter | digit )^*
     if → if
   then → then
   else → else
  relop → < | > | <= | >= | = | <>
  
  ws → ( blank | tab | newline ) +

  TOKEN getRelop()                        // TOKEN has two components
    TOKEN retToken = new(RELOP);          // First component set here
    while (true)
       switch(state)
         case 0: c = nextChar();
                 if (c == '<')      state = 1;
                 else if (c == '=') state = 5;
                 else if (c == '>') state = 6;
                 else fail();
                 break;
         case 1: ...
         ...
         case 8: retract();  // an accepting state with a star
                 retToken.attribute = GT;  // second component
                 return(retToken);
  
Lex
Lex
Lex
  declarations
  %%
  translation rules
  %%
  auxiliary functions

  %{
      /* definitions of manifest constants
         LT, LE, EQ, NE, GT, GE,
         IF, THEN, ELSE, ID, NUMBER, RELOP */
  %}

  /* regular definitions */
  delim     [ \t\n]
  ws        {delim}*
  letter    [A-Za-z]
  digit     [0-9]
  id        {letter}({letter}{digit})*
  number    {digit}+(\.{digit}+)?(E[+-]?{digit}+)?

  %%

  {ws}      {/* no action and no return */}
  if        {return(IF);}
  then      {return(THEN);}
  else      {return(ELSE);}
  {id}      {yylval = (int) installID(); return(ID);}
  {number}  {yylval = (int) installNum(); return(NUMBER);}
  "<"       {yylval = LT; return(RELOP);}
  "<="      {yylval = LE; return(RELOP);}
  "="       {yylval = EQ; return(RELOP);}
  "<>"      {yylval = NE; return(RELOP);}
  ">"       {yylval = GT; return(RELOP);}
  ">="      {yylval = GE; return(RELOP);}

  %%

  int installID() {/* function to install the lexeme, whose first character
                      is pointed to by yytext, and whose length is yyleng,
                      into the symbol table and return a pointer thereto    */
  }

  int installNum() {/* similar to installID, but puts numerical constants
                       into a separate table                              */

  #define LT 12
  #define LE 13

Lex
Lex
IF(X)=3
IF(X.LT.Y)X=Y
  IF / \(.*\){letter}

  s = s₀;   // start state.  NOTE = is assignment
  c = nextChar();      // a priming read
  while (c != eof) {
    s = move(s,c);
    c = nextChar();
  }
  if (s is in F, the set of accepting states) return yes
  else return no

  S = ε-closure(s₀);
  c = nextChar();
  while ( c != eof ) {
    S = ε-closure(move(S,c));
    c = nextChar();
  }
  if ( S ∩ F != φ ) return yes;   // F is accepting states
  else return no;

    E → E + T | T
    T → T * F | F
    F → ( E ) | id
  
    E  → T E'
    E' → + T E' | ε
    T  → F T'
    T' → * F T' | ε
    F  → ( E ) | id
  
    E → E + E | E * E | ( E ) | id
  
S ⇒* x
    E → E + E | E * E | ( E ) | id
  
    E ⇒ E + E ⇒ id + E ⇒ id + id
    E ⇒ E + E ⇒ E + id ⇒ id + id
  
    A ⇒ x₁ ⇒ x₂ ... ⇒ x_n
  
    E → E + E | E * E | ( E ) | id
  
id + id * id
    E ⇒ E + E          E ⇒ E * E
      ⇒ id + E           ⇒ E + E * E
      ⇒ id + E * E       ⇒ id + E * E
      ⇒ id + id * E      ⇒ id + id * E
      ⇒ id + id * id     ⇒ id + id * E
  
    A0 → A1 | A7
    A1 → A2 | A4
    A2 → a A3
    A3 → A6
    A4 → b A5
    A5 → A6
    A6 → A1 | A7
    A7 → a A8
    A8 → b A9
    A9 → b A10
    A10 → ε
  
    A → a A b | ε
  
    stmt → if expr then stmt
         | if expr then stmt else stmt
         | other
  
if E1 then S1 else if E2 then S2 else S3
        stmt → matched-stmt | open-stmt
matched-stmp → if expr then matched-stmt else matched-stmt
	     | other
   open-stmt → if expr then stmt
	     | if expr then matched-stmt else open-stmt
  
    A → A x1 | A x2 | ... A xn | y1 | y2 | ... ym
  
    A  → y1 A' | ... | ym A'
    A' → x1 A' | ... | xn A' | ε
  
A ⇒ A + ⇒ , +
A ⇒ , A' ⇒ , + A' ⇒ , +
A → x y1 | x y2
    A → x A'
   A' → y1 | y2
  
for i = 1 to n
  if Xi is a nonterminal
    process Xi  // recursive
  else if Xi (a terminal) matches current input symbol
    advance input to next symbol
  else // trouble Xi doesn't match and never will
      
    E  → T E'
    E' → + T E' | ε
    T  → F T'
    T' → * F T' | ε
    F  → ( E ) | id
  
    S → A b    // b is in FOLLOW(A)
    A → b      // α=b so α derives a string beginning with b
    A → ε      // β=ε so β derives ε
  
A → α
A→α
A → α
    E  → T E'
    E' → + T E' | ε
    T  → F T'
    T' → * F T' | ε
    F  → ( E ) | id
  
    E → E + T | T
    T → T * F | F
    F → ( E ) | id
  
id*id
id * id,  F * id,  T * F,  T,  E
E ⇒ T ⇒ T * F ⇒
    T * id ⇒ F * id ⇒ id * id
... id ( id
) ...
if (nextToken == X) then error(expected Y here
    E → E + T | T
    T → T * F | F
    F → ( E ) | id
  
    E → E + T
    E → T
    T → id
  
   E' → E
    E → E + T
    E → T
    T → id
  
   E' → E
    E → E + T | T
    T → T * F | F
    F → ( E ) | id
  
  T → T * F
  T → F
  F → num

    INT a,b,c
  
    array [3] of array [4] of int    and     int[3][4]
  
op a,b,c
    lshift a,b,4   // left shift b by 4 and place result in a
    add    a,b,c   // a = b + c
    a = b + c      // alternate (more natural) representation of above
  
    t1 = B + A
    t2 = Y - t1
    t3 = t1 * t2
  
    declare
       type MyInteger is new Integer;
       MyX : MyInteger;
       x   : Integer := 0;
    begin
       MyX := x;
    end
  
    D → T id ; D | ε
    T → B C | RECORD { D }
    B → INT | FLOAT
    C → [ NUM ] C | ε
  
    declarations         → declaration declarations | ε
    declaration          → object-declaration | type-declaration
    object-declaration   → defining-identifier : object-definition ;
    object-definition    → type-name | type-name [ NUMBER ]
    type-declaration     → TYPE defining-identifier IS ARRAY OF type-name ;
    defining-identifier  → IDENTIFIER
    type-name            → IDENTIFIER | INT | REAL
  
    ds   → d ds | ε
    d    → od | td
    od   → di : odef ;
    odef → tn | tn [ NUM ]
    td   → TYPE di IS ARRAY OF tn ;
    di   → ID
    tn   → ID | INT | REAL
  
  Procedure P1 is
    y : integer;
    type t is array of real ;
    x : t[10];

    P →                { offset = 0; }
        D
    D → T ID ;         { top.put(id.lexeme, T.type, offset);
                              offset = offset + T.width; }
        D₁
    D → ε
  
    procedure test () is
        y : integer;
        type t is array of real;
        x : t[10];
    begin
        y = 5;        // we haven't yet done statements
        x[2] = y;     // type error?
    end;
  
    T → record {         { Env.push(top);  top = new Env()
                           Stack.puch(offset); offset = 0; }
    D }                  { T.type = record(top); T.width = offset;
                           top = Env.pop(); offset = Stack.pop(); }
  
   float x;
   record { float x; float y; } rec;
   float y;
 
a[i]
i
i
a
    a = &b
    a = *b
    *a = b
  
    t$1 = 3*4    // t$n are the temporary names from new TEMP()
    t$2 = &a
    t$3 = t$2 + t$1
    *t3 = 5
  
    procedure test () is
        y : integer;
        type t is array of real;
        x : t[10];
    begin
        y = 5;        // we haven't yet done statements
        x[2] = y;     // type error?
    end;
  
    widen (a:addr, t:type, w:type, newcode:string, newaddr:addr)
      if t=w
        newcode = ""
        newaddr = a
      else if t=integer and w=real
        newaddr = new Temp()
        newcode = gen(newaddr = (real) a)
      else signal error
  
     A=0 OR  3/A < 1.2
  
  S → if ( B ) S₁
  S → if ( B ) S₁ else S₂
  S → while ( B ) S₁

    if ( x < 5 || x > 10 && x == y ) x = 3 ;
  
        if x < 5 goto L₂
        goto L₃
    L₃: if x > 10 goto L₄
	goto L₁
    L₄: if x == y goto L₂
	goto L₁
    L₂: x = 3
  
    LHS = true
    LHS = false
  
    while (boolean-expression) statement-list end
    if (boolean-expression) statement-list else statement-list end
  
    if
    else if
    else if
    ...
    end if
  
    System starts main
        enter f(5)
            enter f(4)
                enter f(3)
		    enter f(2)
		    exit f(2)
		    enter f(1)
		    exit f(1)
                exit f(3)
                enter f(2)	       int a[10];
                exit f(2)	       int main(){
            exit f(4)		           int i;
            enter f(3)		           for (i=0; i<10; i++){
	        enter f(2)	               a[i] = f(i);
		exit f(2)	           }
		enter f(1)	       }
		exit f(1)	       int f (int n) {
            exit f(3)		           if (n<3)  return 1;
        exit f(5)		           return f(n-1)+f(n-2);
    main ends			       }
  
#include <stdio.h>

int main (int argc, char *argv[])
{
    int x = 10;

    void g(int y)
    {
        int z = x;
	return;
    }

    int f (int y)
    {
	g(y);
	return y+1;
    }

    printf("The answer is %d\n", f(x));
    return 0;
}

      P() {
        D() {...}
        P1() {
          P2() {
           ...
                Pk() {
                  R(){... D(); ...}
                }
           ...
          }
        }
      }
    
	loop
	   allocate X
	   use X
	   forget to deallocate X
      
	allocate X
	use X
	deallocate X
	100,000 lines of code not using X
	use X
      
    LD  R0, y
    ADD R0, R0, z
    ST  x, R0
  
    LD  R0, b
    ADD R0, R0, c
    ST  a, R0
    LD  R0, a
    ADD R0, e
    ST  d, R0
  
    LD  R0, i
    MUL R0, R0, #4
    LD  R0, A(R0)
    ST  x, R0
  
    LD  R0, x
    LD  R1, i
    MUL R1, R1, #4
    ST  A(R1), R0
  
    LD  R0, p
    LD  R0, 0(R0)
    ST  x, R0
  
    LD  R0, x
    LD  R1, p
    ST  0(R1), R0
  
    LD   R0, x
    LD   R1, y
    SUB  R0, R0, R1
    BNEG R0, L
  
    ST  callee.staticArea, #here+20
    BR  callee.codeArea
  
    BR  *callee.staticArea
  
  // Quadruples of Main
  other₁
  call P
  other₂
  halt
  // Quadruples of P
  other₃
  return

  // Code for Main
  1000: Other₁
  1100: ST 4000, #1120    // P.staticArea, #here+20
  1112: BR 2000           // Two constants in previous instruction take 8 bytes
  1120: other₂
  1220: HALT
        ...
  // Code for P
  2000: other₃
  2100: BR *4000
        ...
  // AR for Main
  3000:                   // Return address stored here (not used)
  3004:                   // Local data for Main starts here
        ...
  // AR for P
  4000:                   // Return address stored here
  4004:                   // Local data for P starts here

  ADD SP, SP, #caller.ARSize
  ST  *SP, #here+16              // save return address
  BR  callee.codeArea

  // Quadruples of Main
  other[1]
  call P
  other[2]
  halt
  // Quadruples of P
  other[3]
  return

  // Code for Main
  1000; LD  SP, 9000
  1008: Other[1]
  1108: ADD SP, SP, #400
  1116: ST  *SP, #1132
  1124: BR, 2000
  1132: SUB SP, SP, #400
  1140: other[2]
  1240: HALT
        ...
  // Code for P
  2000: other[3]
  2100: BR *0(SP)
        ...
  // AR for Main
  9000:                   // Return address stored here (not used)
  9004:                   // Local data for Main starts here
  9396:                   // Last word of the AR is bytes 9396-9399
        ...
  // AR for P
  9400:                   // Return address stored here
  9404:                   // Local data for P starts here

  for i from 1 to 10 do
    for j from 1 to 10 do
      a[i,j] = 0
    end
  end
  for i from 1 to 10 do
    a[i,i] = 0
  end

   1)  i = 1
   2)  j = 1
   3)  t1 = 10 * i
   4)  t2 = t1 + j            // element [i,j]
   5)  t3 = 8 * t2            // offset for a[i,j] (8 byte numbers)
   6)  t4 = t3 - 88           // we start at [1,1] not [0,0]
   7)  a[t4] = 0.0
   8)  j = j + 1
   9)  if J <= 10 goto (3)
  10)  i = i + 1
  11)  if i <= 10 goto (2)
  12)  i = 1
  13)  t5 = i - 1
  14)  t6 = 88 * t5
  15)  a[t6] = 1.0
  16)  i = i + 1
  17)  if i <= 10 goto (13)

   1)  i = 1
  
   2)  j = 1
  
   3)  t1 = 10 * i
   4)  t2 = t1 + j            // element [i,j]
   5)  t3 = 8 * t2            // offset for a[i,j] (8 byte numbers)
   6)  t4 = t3 - 88           // we start at [1,1] not [0,0]
   7)  a[t4] = 0.0
   8)  j = j + 1
   9)  if J <= 10 goto (3)
  
  10)  i = i + 1
  11)  if i <= 10 goto (2)
  
  12)  i = 1
  
  13)  t5 = i - 1
  14)  t6 = 88 * t5
  15)  a[t6] = 1.0
  16)  i = i + 1
  17)  if i <= 10 goto (13)
  

  Initialize all variables in B as being live
  Examine the quads of the block in reverse order.
    Let the quad q compute x and read y and z
    Mark x as dead; mark y and z as live and used at q

  for (i=0; i<10; i++)
      for (j=0; j<10; j++)
          c[i][j] = 0;
  for (i=0; i<10; i++)
      for (j=0; j<10; j++)
          for (k=0; k<10; k++)
              c[i][j] = c[i][j] + a[i][k] * b[k][j];

    a = b + c
    c = a + x
    d = b + c
    b = a + x
  
    a = b + c
    x = y + c + b + r
  
    x = a[i]
    a[j] = 3
    z = a[i]
  
    x = a[i]
    a[j] = 3
    z = a[i]
  
    b = a + 8    // b[i] is 8 bytes past a[i]
    x = b[i]
    b[j] = y
  
    p = &x
    *p = y
  
    x = *p
    *q = y
  
    a = b + c
    c = a + x
    d = b + c
    b = a + x
  
    a = b + c
    c = a + x
    d = b + c
    b = c
  
    LD  reg, mem
    ST  mem, reg
    OP  reg, reg, reg
  
  a = b
      LD  R1, b
      ST  a, R1

  a = *b
      LD  R1, b
      LD  R1, 0(R1)
      ST  a, R1

  *a = b
      LD  R1, b
      LD  R2, a
      ST  0(R2), R1

  *a = *b
      LD  R1, b
      LD  R1, 0(R1)
      LD  R2, a
      ST  0(R2), R1


       t = a - b
           LD  R1, a
           LD  R2, b
           SUB R2, R1, R2


       u = a - c
           LD  r3, c
           SUB R1, R1, R3

       v = t + u
           ADD R3, R2, R1


       a = d
           LD  R2, d


       d = v + u
           ADD R1, R3, R1


       exit
           ST  a, R2
           ST  d, R1

                            R1  R2  R3    a    b    c    d    e
			                  a    b    c    d    e

  a = b + c
      LD  R1, b
      LD  R2, c
      ADD R3, R1, R2
                            R1  R2  R3    a    b    c    d    e
			    b   c   a     R3  b,R1 c,R2  d    e

  d = a + e
      LD  R1, e
      ADD R2, R3, R1
                            R1  R2  R3    a    b    c    d    e
                     2e →   e   d   a     R3  b,R1  c    R2  e,R1
                     me →   e   d   a     R3   b    c    R2  e,R1

  a = e + d
      ADD R3, R1, R2
                            Descriptors unchanged

  e = a + b
      ADD R1, R3, R1   ← possible wrong answer from 2e
                            R1  R2  R3    a    b    c    d    e
                            e   d    a    R3  b,R1  c    R2   R1

      LD  R1, b
      ADD R1, R3, R1
                            R1  R2  R3    a    b    c    d    e
                            e   d    a    R3   b    c    R2   R1

  exit
      ST  a, R3
      ST  d, R2
      ST  e, R1

Production	Semantic Rule
expr → expr1 + term	expr.t := expr1.t \|\| term.t \|\| '+'
expr → expr1 - term	expr.t := expr1.t \|\| term.t \|\| '-'
expr → term	expr.t := term.t
term → term1 * factor	term.t := term1.t \|\| factor.t \|\| '*'
term → term1 / factor	term.t := term1.t \|\| factor.t \|\| '/'
term → factor	term.t := factor.t
factor → digit	factor.t := digit.t
factor → ( expr )	factor.t := expr.t
digit → 0	digit.t := '0'
digit → 1	digit.t := '1'
digit → 2	digit.t := '2'
digit → 3	digit.t := '3'
digit → 4	digit.t := '4'
digit → 5	digit.t := '5
digit → 6	digit.t := '6'
digit → 7	digit.t := '7'
digit → 8	digit.t := '8'
digit → 9	digit.t := '9'

Production with Semantic Action	Semantic Rule
rest → { print('(') } + term { print('+') } rest { print(')') }	rest.t := '(' \|\| term.t \|\| '+' \|\| rest.t \|\| ')'
rest → { print('(') } - term { print('-') } rest { print(')') }	rest.t := '(' \|\| term.t \|\| '-' \|\| rest.t \|\| ')'
rest → term	rest.t := term.t
term → 1 { print('1') }	term.t := '1'
term → 2 { print('2') }	term.t := '2'
term → 3 { print('3') }	term.t := '3'

push v	push v (onto stack)
rvalue l	push contents of (location) l
lvalue l	push address of l
pop	pop
:=	r-value on tos put into the location specified by l-value 2nd on the stack; both are popped
copy	duplicate the top of stack

goto l
label l	target of jump
gofalse	pop stack; jump if value is false
gotrue	pop stack; jump if value is true
halt

lexme	token	attribute value

white space
sequence of digits	NUM	numeric value
div	DIV
mod	MOD
other seq of a letter then letters and digits	ID	index into symbol table
eof char	DONE
other char	that char	NONE

Production	FIRST
type → simple	{ integer, char, num }
type → ↑ id	{ ↑ }
type → array [ simple ] of type	{ array }
simple → integer	{ integer }
simple → char	{ char }
simple → num dotdot num	{ num }

Lexeme	Token	Attribute
Whitespace	ws	—
if	if	—
then	then	—
else	else	—
An identifier	id	Pointer to table entry
A number	number	Pointer to table entry
<	relop	LT
<=	relop	LE
=	relop	EQ
<>	relop	NE
>	relop	GT
>=	relop	GE

NFA states	DFA state	a	b
{0,1,2,4,7}	D₀	D₁	D₂
{1,2,3,4,6,7,8}	D₁	D₁	D₃
{1,2,4,5,6,7}	D₂	D₁	D₂
{1,2,4,5,6,7,9}	D₃	D₁	D₄
{1,2,4,5,6,7,10}	D₄	D₁	D₂

Right Sentential Form	Handle	Reducing Production
id1 * id2	id1	F → id
F * id2	F	T → F
T * id2	id2	F → id
T * F	T * F	E → T * F

Stack	Input	Action
$	id1*id2$	shift
$id1	*id2$	reduce F→id
$F	*id2$	reduce T→F
$T	*id2$	shift
$T*	id2$	shift
$T*id2	$	reduce F→id
$T*F	$	reduce T→T*F
$T	$	reduce E→T
$E	$	accept

Stack	Symbols	Input	Action
0		id+id$	Shift to 3
03	id	+id$	Reduce by T→id
02	T	+id$	Reduce by E→T.
01	E	+id$	Shift to 4
014	E+	id$	Shift to 3
0143	E+id	$	Reduce by T→id
0145	E+T	$	Reduce by E→E+T
01	E	$	Accept

State	ACTION						GOTO
State	id	+	*	(	)	$	E	T	F
0	s5			s4			1	2	3
1		s6				acc
2		r2	s7		r2	r2
3		r4	r4		r4	r4
4	s5			s4			8	2	3
5		r6	r6		r6	r6
6	s5			s4				9	3
7	s5			s4					10
8		s6			s11
9		r1	s7		r1	r1
10		r3	r3		r3	r3
11		r5	r5		r5	r5

Stack	Symbols	Input	Action
0		id*id+id$	shift
05	id	*id+id$	reduce by F→id
03	F	*id+id$	reduct by T→id
02	T	*id+id$	shift
027	T*	id+id$	shift
0275	T*id	+id$	reduce by F→id
027 10	T*F	+id$	reduce by T→T*F
02	T	+id$	reduce by E→T
01	E	+id$	shift
016	E+	id$	shift
0165	E+id	$	reduce by F→id
0163	E+F	$	reduce by T→F
0169	E+T	$	reduce by E→E+T
01	E	$	accept

Production	Semantic Rules

L → E $	L.val = E.val
E → E₁ + T	E.val = E₁.val + T.val
E → E₁ - T	E.val = E₁.val - T.val
E → T	E.val = T.val
T → T₁ * F	T.val = T₁.val * F.val
T → T₁ / F	T.val = T₁.val / F.val
T → F	T.val = F.val
F → ( E )	F.val = E.val
F → num	F.val = num.lexval

Production	Semantic Rules	Type

T → F T'	T'.lval = F.val	Inherited
T → F T'	T.val = T'.tval	Synthesized

T' → * F T₁'	T'₁.lval = T'.lval * F.val	Inherited
T' → * F T₁'	T'.tval = T'₁.tval	Synthesized

T' → ε	T'.tval = T'.lval	Synthesized

F → num	F.val = num.lexval	Synthesized

State	a	b	ε
0	{0,1}	{0}	φ
1	φ	{2}	φ
2	φ	{3}	φ

Pattern	Action to perform
a	Action1
abb	Action2
a^*b⁺	Action3

	FIRST	FOLLOW
E	( id	$ )
E'	ε +	$ )
T	( id	+ $ )
T'	ε *	+ $ )
F	( id	* + $ )

Nonter- minal	Input Symbol
Nonter- minal	+	*	(	)	id	$
E
E'
T
T'
F

Production	Semantic Rule	Type

D → T L	L.type = T.type	inherited
T → INT	T.type = integer	synthesized

L → L₁ , ID	L₁.type = L.type	inherited
L → L₁ , ID	addType(ID.entry,L.type)	synthesized, side effect

L → ID	addType(ID.entry,L.type)	synthesized, side effect

Production	Semantic Rules

E → E ₁ + T	E.node = new Node('+',E₁.node,T.node)
E → E ₁ - T	E.node = new Node('-',E₁.node,T.node)
E → T	E.node = T.node
T → ( E )	T.node = E.node
T → ID	T.node = new Leaf(ID,ID.entry)
T → NUM	T.node = new Leaf(NUM,NUM.val)

Production	Semantic Rules	Type

E → T E'	E.node=E'.syn	Synthesized
E → T E'	E'node=T.node	Inherited

E' → + T E'₁	E'₁.node=new Node('+',E'.node,T.node)	Inherited
E' → + T E'₁	E'.syn=E'₁.syn	Synthesized

E' → - T E'₁	E'₁.node=new Node('-',E'.node,T.node)	Inherited
E' → - T E'₁	E'.syn=E'₁.syn	Synthesized

E' → ε	E'.syn=E'.node	Synthesized
T → ( E )	T.node=E.node	Synthesized
T → ID	T.node=new Leaf(ID,ID.entry)	Synthesized
T → NUM	T.node=new Leaf(NUM,NUM.val)	Synthesized

Production	Semantic Rules	Type

T → B C	T.t=C.t	Synthesized
T → B C	C.b=B.t	Inherited

B → INT	B.t=integer	Synthesized
B → FLOAT	B.t=float	Synthesized

C → [ NUM ] C₁	C.t=array(NUM.val,C₁.t)	Synthesized
C → [ NUM ] C₁	C₁.b=C.b	Inherited

C → ε	C.t=C.b	Synthesized

Production	Semantic Rule

A → ARRAY [ NUM ] OF A₁	A.t=array(NUM.val,A₁.t)
A → INT	A.t=integer
A → FLOAT	A.t=float

Production	Actions	Semantic Rules	Kind

T → B	{ t = B.type; w = B.width; }	C.bt = B.bt	Inherited
C	{ T.type = C.type; T.width = B.width; }	C.bt = B.bt	Inherited

B → INT	{ B.type = integer; B.width = 4; }	B.bt = integer B.bw = 4	Synthesized Synthesized

B → FLOAT	{ B.type = float; B.width = 8; }	B.bt = integer B.bw = 8	Synthesized Synthesized

C → [ NUM ] C₁		C.type = array(NUM.value, C₁.type)	Synthesized
		C.width = NUM.value * C₁.width;	Synthesized
	{ C.type = array(NUM.value, C₁.type);	C₁.bt = C.bt	Inherited
	C.width = NUM.value * C₁.width; }	C₁.bw = C.bw	Inherited

C → ε	C.type = t; C.width=w	C.type = C.bt C.width = C.bw	Synthesized Synthesized

Production	Semantic Rules

d → od	d.width = od.width
d → td	d.width = 0

od → di : odef ;	addType(di.entry, odef.type)
od → di : odef ;	od.width = odef.width

di → ID	di.entry = ID.entry

odef → tn	odef.type = tn.type
	odef.width = tn.width
	tn.type must be integer or real

tn → INT	tn.type = integer
tn → INT	tn.width = 4

tn → REAL	tn.type = real
tn → REAL	tn.width = 8

Production	Semantic Rules	Kind

fd → FUNC di ( ps ) RET tn IS ds BEG s ss END ;	ds.offset = 0	Inherited

pd → PROC di ( ps ) IS ds BEG s ss END ;	ds.offset = 0	Inherited
	s.next = newlabel()	Inherited
	ss.next = newlabel()	Inherited
	pd.code = s.code \|\| label(s.next) \|\| ss.code \|\| label(ss.next)	Synthesized

ds → d ds₁	d.offset = ds.offset	Inherited
	ds₁.offset = d.newoffset	Inherited
	ds.totalSize = ds₁.totalSize	Synthesized

ds → ε	ds.totalSize = ds.offset	Synthesized

d → od	od.offset = d.offset	Inherited
d → od	d.newoffset = d.offset + od.width	Synthesized

d → td	d.newoffset = d.offset	Synthesized

od → di : odef ;	addType(di.entry, odef.type)	Synthesized
	od.width = odef.width	Synthesized
	addOffset(di.entry, od.offset)	Synthesized

di → ID	di.entry = ID.entry	Synthesized

odef → tn	odef.type = tn.type	Synthesized
	odef.width = tn.width	Synthesized
	tn.type must be integer or real

odef → tn [ NUM ]	odef.type = array(NUM.value, getBaseType(tn.entry.type))	Synthesized
	odef.width = sizeof(odef.type)	Synthesized
	tn must be ID

td → TYPE di is ARRAY OF tn ;	addType(di.entry, array(*, tn.type))	Synthesized
td → TYPE di is ARRAY OF tn ;	tn.type must be integer or real

tn → ID	tn.entry = ID.entry	Synthesized
tn → ID	ID.entry.type must be array()

tn → INT	tn.type = integer	Synthesized
tn → INT	tn.width = 4	Synthesized

tn → REAL	tn.type = real	Synthesized
tn → REAL	tn.width = 8	Synthesized

Production	Semantic Rule

as → lv = e	as.code = e.code \|\| gen(lv.lexeme = e.addr)

lv → ID	lv.lexeme = get(ID.lexeme)

e → t	e.addr = t.addr
e → t	e.code = t.code

e → e₁ + t	e.addr = new Temp()
e → e₁ + t	e.code = e₁.code \|\| t.code \|\| gen(e.addr = e₁.addr + t.addr)

e → e₁ - t	e.addr = new Temp()
e → e₁ - t	e.code = e₁.code \|\| t.code \|\| gen(e.addr = e₁.addr - t.addr)

t → f	t.addr = f.addr
t → f	t.code = f.code

t → t₁ * f	t.addr = new Temp()
t → t₁ * f	t.code = t₁.code \|\| f.code \|\| gen(t.addr = t₁.addr * f.addr)

t → t₁ / f	t.addr = new Temp()
t → t₁ / f	t.code = t₁.code \|\| f.code \|\| gen(t.addr = t₁.addr / f.addr)

f → ( e )	f.addr = e.addr
f → ( e )	f.code = e.code

f → ID	f.addr = get(ID.lexeme)
f → ID	f.code = ""

f → NUM	f.addr = get(NUM.lexeme)
f → NUM	f.code = ""

Production	Semantic Rules

as → lv = e ;	as.code = e.code \|\| lv.code \|\| gen(*lv.addr = e.addr)

lv → ID	lv.addr = new Temp() lv.code = gen(lv.addr = &get(ID.lexeme))

lv → let ae	lv.addr = ae.addr lv.code = ae.code

ae → ID [ e ]	ae.t1 = new Temp() ae.t2 = new Temp() ae.addr = new Temp() ae.code = e.code \|\| gen(ae.t1 = e.addr * getBaseWidth(ID.entry)) \|\| gen(ae.t2 = &get(ID.lexeme)) \|\| gen(ae.addr = ae.t2 + ae.t1)

Chapter 0: Administrivia

0.1: Contact Information

0.2: Course Web Page

0.3: Textbook

0.4: Computer Accounts and Mailman Mailing List

0.5: Grades

0.6: The Upper Left Board

0.7: Homeworks and Labs

0.7.1: Homework Numbering

0.7.2: Doing Labs on non-NYU Systems

0.7.3: Obtaining Help with the Labs

0.7.4: Computer Language Used for Labs

0.8: A Grade of “Incomplete”

0.9: An Introductory Compiler Course with a Programming Prerequisite

0.9.1: This is an introductory course ...

... with a Programming Prerequisite

0.10: Academic Integrity Policy

Roadmap of the Course

Chapter 1: Introduction to Compiling

1.1: Compilers

Analysis, synthesis, front and back ends

Syntax Trees

Other analyzers and synthesizers

The compilation tool chain

Preprocessors

Assemblers

Two pass assembly

A Trivial Assembler Program

Linkers

Relocating relative addresses

Resolving external references

Loaders

1.2: Analysis of the source program

Lexical analysis or scanning

Syntax analysis or parsing

Semantic analysis

Analysis in text formatters

1.3: The phases of a compiler

Symbol-table management

Error detection and reporting

The analysis phases

Intermediate code generation

Code optimization

Code generation

1.4: Cousins of the compiler

1.5: The grouping of phases

Front and back ends

Passes

Reducing the number of passes

1.6: Compiler-construction tools

Chapter 2: A Simple One-Pass Compiler

2.1: Overview

2.2: Syntax definition

Parse trees

Ambiguity

Associativity of operators

Precedence of operators

Statements

2.3: Syntax-Directed Translation

Postfix notation

Syntax-directed definitions

Synthesized Attributes

Depth-first traversals

Translation schemes

Emitting a translation

Prefix to infix translation

Simple syntax-directed definitions

2.4: Parsing

Top-down parsing

Predictive parsing

An example of predictive parsing

ε-productions

Designing a Predictive Parser

Left Recursion

2.5: Translator for simple expressions

2.5.1: Abstract and concrete syntax

2.5.2: Adapting the Translation Scheme

2.5.3: Procedures for the nonterminals expr, term, and rest

2.5.4: Simplifying the translator

The complete program

`Lexer.c`

`Parser.c`

`Emitter.c`

`Symbol.c` and `init.c`

`Error.c`

3.5: The Lexical Analyzer Generator `Lex`

3.5.1: Use of `Lex`

3.5.2: Structure of `Lex` Programs

3.5.3: Conflict Resolution in `Lex`

3.5.3a: Anger Management in `Lex`

Production	Semantic Rule

as → lv = e	widen(e.addr, e.type, lv.type, as.code1, as.addr1) as.code = lv.code \|\| e.code \|\| as.code1 \|\| gen(*lv.addr = as.addr1)

lv → ID	lv.addr = new TEMP()
	lv.type = get(ID.type)
	lv.code = gen(lv.addr = &get(ID.lexeme))

lv → let ae	lv.addr = ae.addr
	lv.type = ae.type
	lv.code = ae.code

ae → ID [ e ]	ae.type = getBaseType(ID.entry.type) ae.t1 = new Temp() ae.t2 = new Temp() ae.addr = new Temp() ae.code = e.code \|\| gen(ae.t1 = e.addr * getBaseWidth(ID.entry)) \|\| gen(ae.t2 = &get(ID.lexeme)) \|\| gen(ae.addr = ae.t2 + ae.t1)

e → t	e.addr = t.addr
	e.type = t.type
	e.code = t.code

e → e₁ + t	e.addr = new Temp()
	e.type = LUB(e₁.type, t.type)
	widen(e₁.addr, e₁.type, e.type, e.code1, e.addr1) widen(t.addr, t.type, e.type, e.code2, e.addr2) e.code = e₁.code \|\| t.code \|\| e.code1 \|\| e.code2 \|\| gen(e.addr = e.addr1 + e.addr2)

e → e₁ - t	e.addr = new Temp()
	e.type = LUB(e₁.type, t.type)
	widen(e₁.addr, e₁.type, e.type, e.code1, e.addr1) widen(t.addr, t.type, e.type, e.code2, e.addr2) e.code = e₁.code \|\| t.code \|\| e.code1 \|\| e.code2 \|\| gen(e.addr = e.addr1 - e.addr2)

t → f	t.addr = f.addr
	t.type = f.type
	t.code = f.code

t → t₁ * f	t.addr = new Temp()
	t.type = LUB(t₁.type, f.type)
	widen(t₁.addr, t₁.type, t.type, t.code1, t.addr1) widen(f.addr, f.type, t.type, t.code2, t.addr2) t.code = t₁.code \|\| f.code \|\| t.code1 \|\| t.code2 \|\| gen(t.addr = t.addr1 * t.addr2)

t → t₁ / f	t.addr = new Temp()
	t.type = LUB(t₁.type, f.type)
	widen(t₁.addr, t₁.type, t.type, t.code1, t.addr1) widen(f.addr, f.type, t.type, t.code2, t.addr2) t.code = t₁.code \|\| f.code \|\| t.code1 \|\| t.code2 \|\| gen(t.addr = t.addr1 / t.addr2)

f → ( e )	f.addr = e.addr
	f.type = e.type
	f.code = e.code

f → ID	f.addr = get(ID.lexeme)
	f.type = get(ID.type)
	f.code = ""

f → NUM	f.addr = get(NUM.lexeme)
	f.type = get(NUM.type)
	f.code = ""

Production	Semantic Rules	Kind

P → S	S.next = newlabel()	Inherited
P → S	P.code = S.code \|\| label(S.next)	Synthesized

S → if ( B ) S₁	B.true = newlabel()	Inherited
	B.false = S.next	Inherited
	S₁.next = S.next	Inherited
	S.code = B.code \|\| label(B.true) \|\| S₁.code	Synthesized

S → if ( B ) S₁ else S₂	B.true = newlabel()	Inherited
	B.false = newlabel()	Inherited
	S₁.next = S.next	Inherited
	S₂.next = S.next	Inherited
	S.code = B.code \|\| label(B.true) \|\| S₁.code \|\| gen(goto S.next) \|\| label(B.false) \|\| S₂.code	Synthesized

S → while ( B ) S₁	begin = newlabel()	Synthesized
	B.true = newlabel()	Inherited
	B.false = S.next	Inherited
	S₁.next = begin	Inherited
	S.code = label(begin) \|\| B.code \|\| label(B.true) \|\| S₁.code \|\| gen(goto begin)	Synthesized

S → S₁ S₂	S₁.next = newlabel()	Inherited
	S₂.next = S.next	Inherited
	S.code = S₁.code \|\| label(S₁.next) \|\| S₂.code	Synthesized

Production	Semantic Rules	Kind

B → B₁ \|\| B₂	B₁.true = B.true	Inherited
	B₁.false = newlabel()	Inherited
	B₂.true = B.true	Inherited
	B₂.false = B.false	Inherited
	B.code = B₁.code \|\| label(B1.false) \|\| B₂.code	Synthesized

B → B₁ && B₂	B₁.true = newlabel()	inherited
	B₁.false = B.false	inherited
	B₂.true = B.true	inherited
	B₂.false = B.false	inherited
	B.code = B₁.code \|\| label(B1.true) \|\| B₂.code	Synthesized

B → ! B₁	B₁.true = B.false	Inherited
	B₁.false = B.true	Inherited
	B.code = B₁.code	Synthesized

B → E₁ relop E₂	B.code = E₁.code \|\| E₂.code \|\| gen(if E₁.addr relop.lexeme E₂.addr goto B.true) \|\| gen(goto B.false)	Synthesized

B → true	B.code = gen(goto B.true)	Synthesized

B → false	B.code = gen(goto B.false)	Synthesized

B → ID	B.code = gen(if get(ID.lexeme) goto B.true) \|\| gen(goto B.false)	Synthesized