#include "cc.h" #define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr)) #define movearray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)), free(arr) #define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk) #define peek(p, i) (p->tok[i]) #define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k)) #define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0]) #define Bit(i) (1 << (i)) // ----------------------------------------------------------------------- // helper functions string nameof(Name *n) { if (n->kind & Nident) return n->ident; if (n->kind & Nparen) return nameof(&n->paren->name); panicf("ill-formed declarator name"); } static void openscope(Parser *p) { if (++p->sp == p->spstk + arrlen(p->spstk)) panicf("too many nested scopes: crashing"); } /* * TODO: save the symbol table with the ast node * write a "copy(move)scope" */ static void closescope(Parser *p) { if (p->sp == p->spstk) panicf("attempting to close the top level scope: crashing"); forgetall(&p->sp->objs); forgetall(&p->sp->tags); p->sp--; } static void declareobj(Parser *p, Decl *d) { Sym *sym; string ident; struct Decls *link; switch (d->kind) { case Dfunc: ident = nameof(&d->func.name); break; case Dvar: ident = nameof(&d->var.name); break; case Dtype: ident = nameof(&d->type.name); break; case Dvars: while (link = d->var.link, link != nil) { ident = nameof(&link->name); sym = lookup(&p->sp->objs, ident); if (sym) { errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); return; } define(&p->sp->objs, ident, Sobj); } return; default: panicf("unrecognized node kind %d inside declaraction", d->kind); } sym = lookup(&p->sp->objs, ident); if (sym) { errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); return; } define(&p->sp->objs, ident, Sobj); } static Sym * declaretag(Parser *p, Type *t) { Sym *sym; sym = lookup(&p->sp->tags, t->ident); if (sym) { errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", t->ident); return sym; } return define(&p->sp->tags, t->ident, Stype); } static Sym * lookupobj(Parser *p, string ident) { Sym *sym; Scope *it; it = p->sp; do { sym = lookup(&it->objs, ident); } while (sym == nil && --it >= p->spstk); return sym; } static Sym * lookuptag(Parser *p, string ident) { Sym *sym; Scope *it; it = p->sp; do { sym = lookup(&it->tags, ident); } while (sym == nil && --it >= p->spstk); return sym; } static int nomatch(Token t, vlong kind) { if (t.kind == kind) return 0; errorat(t.pos, "expected token '%s', instead found '%s'", tokens[t.kind], tokens[kind]); return 1; } // ----------------------------------------------------------------------- // forward declarations static Decl *decl(Parser *, Lexer *); static Expr *expr(Parser *, Lexer *); static error blkstmt(Parser *, Lexer *, Stmt **); #define MAKEX(x, state) alloc((x)), (x)->kind = X##state // ----------------------------------------------------------------------- // expressions static Expr* primary(Parser *p, Lexer *lx) { } static Expr* unary(Parser *p, Lexer *lx) { Expr *x; Token t; t = peek(p, 0); switch (t.kind) { case Aneg: MAKEX(x, neg); goto Prefix; case Aand: MAKEX(x, ref); goto Prefix; case Anot: MAKEX(x, not); goto Prefix; case Astar: MAKEX(x, star); goto Prefix; case Aadd: MAKEX(x, plus); goto Prefix; case Asub: MAKEX(x, minus); goto Prefix; case Ainc: MAKEX(x, preinc); goto Prefix; case Adec: MAKEX(x, predec); goto Prefix; Prefix: x->pos.beg = t.pos; x->unary.pre = unary(p, lx); x->pos.end = x->unary.pre->pos.end; return x; case Alparen: panicf("cast not implemented"); case Akeywd: panicf("sizeof/align of not implemented"); default: return primary(p, lx); } } /* static data for binary operators */ #define OPERATORS \ OPERATOR(Astar, 10, Xmul) \ OPERATOR(Adiv, 10, Xdiv) \ OPERATOR(Amod, 10, Xmod) \ OPERATOR(Aadd, 9, Xadd) \ OPERATOR(Asub, 9, Xsub) \ OPERATOR(Alsft, 8, Xlsft) \ OPERATOR(Arsft, 8, Xrsft) \ OPERATOR(Agteq, 7, Xgteq) \ OPERATOR(Alteq, 7, Xlteq) \ OPERATOR(Alt, 7, Xlt) \ OPERATOR(Agt, 7, Xgt) \ OPERATOR(Aeq, 6, Xeql) \ OPERATOR(Aneq, 6, Xneq) \ OPERATOR(Aand, 5, Xand) \ OPERATOR(Axor, 4, Xxor) \ OPERATOR(Aor, 3, Xor) \ OPERATOR(Aandand, 2, Xandand) \ OPERATOR(Aoror, 1, Xoror) static int prectab[] = { #define OPERATOR(a, b, c) [a] = b, OPERATORS #undef OPERATOR }; static int optab[] = { #define OPERATOR(a, b, c) [a] = c, OPERATORS #undef OPERATOR }; #undef OPERATORS static Expr* binary(Parser *p, Lexer *lx, int prec) { Token t; int k, np; Expr *l, *r, *x; l = unary(p, lx); for (;;) { t = peek(p, 0); k = t.kind; np = prectab[t.kind]; if (np < prec) return x; alloc(x); x->pos.beg = l->pos.beg; x->kind = optab[k]; x->binary.l = l; x->binary.r = r = binary(p, lx, np + 1); x->pos.end = r->pos.end; l = x; } return l; Bad: errorat(t.pos, "failed to parse expression"); return nil; } static Expr* ternary(Parser *p, Lexer *lx) { Pos b; Token t; Expr *x, *y; x = binary(p, lx, 1); t = peek(p, 0); b = t.pos; switch (t.kind) { case Aqmark: t = advance(p, lx); y = x; MAKEX(x, ternary); x->pos.beg = b; x->kind = Xternary; x->cond.c = y; x->cond.t = expr(p, lx); if (nomatch(t, Acolon)) { errorat(t.pos, "ternary expression missing ':'"); goto Bad; } t = advance(p, lx); x->cond.e = expr(p, lx); x->pos.end = lx->pos; break; case Aasn: MAKEX(y, asn); goto Assign; case Aorasn: MAKEX(y, orasn); goto Assign; case Axorasn: MAKEX(y, xorasn); goto Assign; case Aandasn: MAKEX(y, andasn); goto Assign; case Asubasn: MAKEX(y, subasn); goto Assign; case Amulasn: MAKEX(y, mulasn); goto Assign; case Adivasn: MAKEX(y, divasn); goto Assign; case Amodasn: MAKEX(y, modasn); goto Assign; case Alsftasn: MAKEX(y, lsftasn); goto Assign; case Arsftasn: MAKEX(y, rsftasn); goto Assign; Assign: y->asn.l = x; y->asn.r = ternary(p, lx); x = y; x->pos.beg = b; x->pos.end = lx->pos; break; default: ; } return x; Bad: errorat(lx->pos, "failing expression parse"); return nil; } static Expr* expr(Parser *p, Lexer *lx) { Pos b; Token t; Expr *x, *y; x = ternary(p, lx); while (t = peek(p, 0), t.kind == Acomma) { advance(p, lx); y = x; MAKEX(x, comma); x->pos.beg = y->pos.beg; x->comma.x[0] = y; x->comma.x[1] = ternary(p, lx); x->pos.end = lx->pos; y = nil; } return x; } // ----------------------------------------------------------------------- // statements static struct Node* stmt(Parser *p, Lexer *lx) { int k; Stmt *s; Sym *sym; Token t; t = peek(p, 0); k = t.kind; /* intercept decl before allocating a statement */ if (k == Aident) { if (peek(p, 1).kind == Acolon) goto Tlabel; sym = lookupobj(p, t.val.s); if (!sym) { errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s); goto Bad; } if (sym->kind == Stype) goto Tdecl; if (sym->kind == Sobj) goto Texpr; errorat(lx->pos, "bad symbol type used as type identifier"); goto Bad; } if (k == Akeywd) { if ((Kauto <= k && k <= Katomic) || (Ksigned <= k && k <= Klong)) { Tdecl: return (Node *)decl(p, lx); } } alloc(s); s->pos.beg = lx->pos; switch (k) { case Akeywd: switch (k = t.val.i) { case Kif: t = advance(p, lx); s->kind = Sif; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before if conditional"); goto Bad; } s->br.cond = expr(p, lx); if (nomatch(t, Arparen)) { errorat(lx->pos, "missing closing paren after if conditional"); goto Bad; } s->br.body = stmt(p, lx); t = peek(p, 0); if (iskw(t, Kelse)) s->br.orelse = stmt(p, lx); else s->br.orelse = nil; break; case Kswitch: t = advance(p, lx); s->kind = Sswitch; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before switch conditional"); goto Bad; } s->br.cond = expr(p, lx); if (nomatch(t, Arparen)) { errorat(lx->pos, "missing closing paren after switch conditional"); goto Bad; } s->br.body = stmt(p, lx); s->br.orelse = nil; break; case Kfor: t = advance(p, lx); s->kind = Sfor; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before for loop preamble"); goto Bad; } if (t.kind == Asemi) s->loop.init = nil; else { // TODO: test for declaration s->loop.init = (Node *)expr(p, lx); } if (nomatch(t, Asemi)) { errorat(lx->pos, "missing semicolon"); goto Bad; } if (t.kind == Asemi) s->loop.cond = nil; else s->loop.cond = expr(p, lx); if (nomatch(t, Asemi)) { errorat(lx->pos, "missing semicolon"); goto Bad; } if (t.kind == Asemi) s->loop.step = nil; else s->loop.step = expr(p, lx); if (nomatch(t, Alparen)) { errorat(lx->pos, "missing closing paren after for loop preamble"); goto Bad; } s->loop.body = stmt(p, lx); break; case Kwhile: t = advance(p, lx); s->kind = Swhile; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before while loop conditional"); goto Bad; } s->loop.cond = expr(p, lx); if (nomatch(t, Arparen)) { errorat(t.pos, "missing closing paren after while loop conditional"); goto Bad; } s->loop.init = nil; s->loop.step = nil; s->loop.body = stmt(p, lx); break; case Kdo: t = advance(p, lx); s->kind = Sdo; s->loop.body = stmt(p, lx); if (!iskw(t, Kwhile)) { errorat(t.pos, "missing while statement conditional after do body"); goto Bad; } t = advance(p, lx); if (nomatch(t, Alparen)) { errorat(t.pos, "missing open paren after while conditional"); goto Bad; } s->loop.init = nil; s->loop.step = nil; s->loop.cond = expr(p, lx); break; case Kgoto: t = advance(p, lx); s->kind = Sgoto; if (t.kind != Aident) { errorat(t.pos, "invalid argument to goto"); goto Bad; } s->jmp.lbl = t.val.s; t = advance(p, lx); if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after goto"); goto Bad; } advance(p, lx); break; case Kcontinue: t = advance(p, lx); s->kind = Scontin; s->jmp.lbl = nil; s->jmp.x = nil; if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after continue"); goto Bad; } advance(p, lx); break; case Kbreak: t = advance(p, lx); s->kind = Sbreak; s->jmp.lbl = nil; s->jmp.x = nil; if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after break"); goto Bad; } advance(p, lx); break; case Kreturn: t = advance(p, lx); s->kind = Sreturn; s->jmp.lbl = nil; s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx); t = peek(p, 0); if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after return statement"); goto Bad; } break; case Kcase: t = advance(p, lx); s->kind = Scase; s->lbl.x = expr(p, lx); if (nomatch(t, Acolon)) { errorat(t.pos, "missing colon after default label"); goto Bad; } t = advance(p, lx); s->lbl.stmt = stmt(p, lx); break; case Kdefault: t = advance(p, lx); s->kind = Scase; s->lbl.x = nil; if (nomatch(t, Acolon)) { errorat(t.pos, "missing colon after default label"); goto Bad; } t = advance(p, lx); s->lbl.stmt = stmt(p, lx); break; default: panicf("unexpected statement keyword %s", keywords[k]); } break; case Albrace: s->kind = Sblock; openscope(p); if (blkstmt(p, lx, &s)) { errorat(lx->pos, "failed to parse block statement"); goto Bad; } closescope(p); break; case Asemi: t = advance(p, lx); s->kind = Sempty; break; case Aident: Tlabel: t = advance(p, lx); s->kind = Slabel; if (nomatch(t, Acolon)) { errorat(t.pos, "missing colon after labelled block"); goto Bad; } t = advance(p, lx); s->lbl.stmt = stmt(p, lx); break; default: Texpr: t = advance(p, lx); s->kind = Sexpr; s->x = expr(p, lx); } s->pos.end = lx->pos; return (Node *)s; Bad: errorat(lx->pos, "failed to parse statement"); return nil; } static error blkstmt(Parser *p, Lexer *lx, Stmt **s) { Token t; int len; int cap; Node **ns; alloc(*s); (*s)->kind = Sblock; (*s)->pos.beg = lx->pos; t = peek(p, 0); if (nomatch(t, Albrakt)) goto Bad; advance(p, lx); len = 0, cap = 20; ns = malloc(cap*sizeof(*ns)); for (;;) { if (cap == len) { cap += 20; ns = realloc(ns, cap*sizeof(*ns)); } ns[len++] = stmt(p, lx); } if (nomatch(t, Arbrakt)) goto Bad; (*s)->pos.end = lx->pos; (*s)->blk.n = len; movearray((*s)->blk.item, ns, len); return 0; Bad: errorat(lx->pos, "failed to parse block statement"); free(ns); return 1; } // ----------------------------------------------------------------------- // declarations /* see https://en.wikipedia.org/wiki/C_data_types */ static uint64 validtypes[] = { Tvoid, Tbool, Tchar, Tsign | Tchar, Tunsign | Tchar, Tshort, Tshort | Tint, Tsign | Tshort, Tsign | Tshort | Tint, Tunsign | Tshort, Tunsign | Tshort | Tint, 0, Tint, Tsign, Tsign | Tint, Tunsign, Tunsign | Tint, Tlong, Tlong | Tint, Tsign | Tlong, Tsign | Tlong | Tint, Tunsign | Tlong, Tunsign | Tlong | Tint, Tvlong | Tlong, Tvlong | Tlong | Tint, Tsign | Tvlong | Tlong, Tsign | Tvlong | Tlong | Tint, Tunsign | Tvlong | Tlong, Tunsign | Tvlong | Tlong | Tint, Tfloat, Tdouble, Tlong | Tfloat, Timag, Tcmplx, Tstruct, Tunion, Tenum, Tname, }; static error spec(Parser *p, Lexer *lx, uint64 *spec) { Token t; int n; uint64 s, sm; s = 0; while (t = peek(p, 0), t.kind == Akeywd) { switch (n = t.val.i) { case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef: if (s & MaskMem) { errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]); goto Bad; } break; case Kconst: case Kvolatile: if (s & Bit(n)) warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]); break; case Ksigned: case Kunsigned: if (s & MaskSgn) { if (s & Bit(n)) { warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]); break; } errorat(lx->pos, "multiple storage class specifiers"); goto Bad; } break; case Kshort: if (s & Tshort) { warnat(lx->pos, "duplicated short specifier"); break; } break; case Klong: if ((s >> Klong) & 2) { errorat(lx->pos, "cannot chain three or more long specifiers"); goto Bad; } s += Bit(n); continue; case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble: if (s & MaskTyp) { errorat(lx->pos, "more than one base type specified"); goto Bad; } break; case Kstruct: case Kunion: case Kenum: panicf("need to implement"); default: errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]); } s |= Bit(n); advance(p, lx); } sm = (((s<<32)>>32) & ~(MaskQul | MaskMem)); for (n = 0; n < arrlen(validtypes); n++) { if (sm == validtypes[n]) { *spec = s; return 0; } } /* TODO: serialize bitflags to string for nice error message */ errorat(lx->pos, "invalid type specifier: ''"); Bad: errorat(lx->pos, "ignoring specifier"); *spec = Sbad; return 1; } /* predeclaration */ static error dtor(Parser *p, Lexer *lx, Dtor *d); static error name(Parser *p, Lexer *lx, Name *nm) { int n, k; Token t; t = peek(p, 0); switch (k = t.kind) { case Aident: nm->kind = Nident; nm->ident = t.val.s; break; case Alparen: nm->kind = Nparen; alloc(nm->paren); if (dtor(p, lx, nm->paren)) { /* we are using an arena allocator so can't clean up */ errorat(lx->pos, "invalid declarator in parenthesis"); nm->paren = nil; goto Bad; } t = peek(p, 0); if (t.kind != Arparen) { errorat(lx->pos, "missing closing paren in declarator"); goto Bad; } break; default: errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]); goto Bad; } t = advance(p, lx); switch (k = t.kind) { case Albrakt: nm->kind |= Nindex; panicf("need to implement"); break; case Alparen: nm->kind |= Ncall; panicf("need to implement"); break; default: ; } return 0; Bad: return 1; } /* pointer kind is partitioned into 8x6 regions */ static error dtor(Parser *p, Lexer *lx, Dtor *d) { int n, k; error err; Token t; Dtor *link; Ptr *ptr, *x; err = 1; ptr = &d->ptr; ptr->kind = 0; ptr->link = nil; t = peek(p, 0); if (t.kind != Astar) if (t.kind == Aident || t.kind == Arparen) goto Name; goto Bad; Ptr: ptr->kind |= Bit(n); advance(p, lx); Key: t = peek(p, 0); switch (k = t.kind) { case Akeywd: if (Kconst <= k && k <= Katomic) ptr->kind |= Bit(n + (t.val.i - Kconst + 1)); else { errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]); goto Bad; } advance(p, lx); goto Key; case Astar: if (++n >= 8) { alloc(x); x->kind = 0; x->link = nil; ptr->link = x; ptr = x; n = 0; } goto Ptr; case Aident: case Alparen: goto Name; default: errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]); goto Bad; } Name: return name(p, lx, &d->name); Bad: return err; } static Decl * decl(Parser *p, Lexer *lx) { Token t; Decl *d; Dtor dt, *dtp; struct Decls **curr; alloc(d); d->kind = 0; d->pos.beg = lx->pos; if (spec(p, lx, &d->spec)) { errorat(lx->pos, "invalid declaration specifier"); goto Bad; } dtp = &dt; curr = &d->var.link; Dtor: if (dtor(p, lx, dtp)) { errorat(lx->pos, "invalid declarator"); goto Bad; } t = peek(p, 0); if (t.kind == Aeq) { t = advance(p, lx); d->kind = (d->kind != Dvars) ? Dvar : Dvars; d->var.init = expr(p, lx); } if (t.kind == Acomma) { d->kind = Dvars; alloc(*curr); dtp = &(*curr)->dtor; curr = &(*curr)->link; advance(p, lx); goto Dtor; } t = peek(p, 0); if (t.kind == Albrace) { if (!attop(p)) { errorat(lx->pos, "nested function declarations"); goto Bad; } if (d->kind != 0) { errorat(lx->pos, "attempting to define a function for a variable declaration"); goto Bad; } d->kind = Dfunc; alloc(d->func.body); openscope(p); if (blkstmt(p, lx, &d->func.body)) { errorat(lx->pos, "failed to parse function body"); goto Bad; } closescope(p); } d->pos.end = lx->pos; declareobj(p, d); return d; Bad: errorat(lx->pos, "failed to parse top level declaration"); return nil; } // ----------------------------------------------------------------------- // top level api void parse(Parser *p, Lexer *lx) { Token tok; p->sp = p->spstk; while ((tok = peek(p, 0)), tok.kind > Aeof) { if (p->ast.len >= p->ast.cap) { p->ast.cap += 20; p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls)); } p->ast.decls[p->ast.len++] = decl(p, lx); } }