#include "cc.h" // ----------------------------------------------------------------------- // helper macros #define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr)) #define copyarray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)) #define movearray(dst, arr, n) copyarray(dst,arr,n), free(arr) #define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk) #define peek(p, i) (p->tok[i]) #define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k)) #define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0]) #define Bit(i) (1 << (i)) // ----------------------------------------------------------------------- // helper functions static string nameof(Name *n) { switch (n->kind) { /* 0 corresponds to no state - i.e. an abstract name */ case Nnil: return nil; case Nident: return n->ident; case Nparen: return nameof(n->paren->name); case Nindex: case Ncall: return nameof(n->sfx.name); } panicf("unreachable"); return nil; } static void openscope(Parser *p) { if (++p->sp >= arrend(p->spstk)) panicf("scope stack overflow"); } /* * TODO: save the symbol table with the ast node * write a "copy(move)scope" */ static void closescope(Parser *p) { if (p->sp <= p->spstk) panicf("scope stack underflow"); forgetall(&p->sp->objs); forgetall(&p->sp->tags); p->sp--; } /* temporary stack helpers */ static Name* getname(Parser *p) { if (p->nm >= arrend(p->nmstk)) panicf("name stack overflow"); return p->nm++; } static void putdtor(Parser *p, Dtor *dt); static void putname(Parser *p, Name *n) { if (p->nm <= p->nmstk) panicf("name stack underflow"); switch (n->kind) { case Nnil: case Nident: break; case Nparen: putdtor(p, n->paren); break; case Nindex: case Ncall: putname(p, n->sfx.name); break; default: panicf("unrecognized name kind"); } *p->nm-- = (Name){0}; } static Ptr* getptr(Parser *p) { if (p->pt >= arrend(p->ptstk)) panicf("pointer stack overflow"); return p->pt++; } static void putptr(Parser *p, Ptr *ptr) { if (p->pt <= p->ptstk) panicf("pointer stack underflow"); while ((ptr = ptr->link)) putptr(p, ptr); *p->pt-- = (Ptr){0}; } static Dtor* getdtor(Parser *p) { if (p->dt >= arrend(p->dtstk)) panicf("dtor stack overflow"); p->dt->name = getname(p); return p->dt++; } static void putdtor(Parser *p, Dtor *dt) { if (p->dt <= p->dtstk) panicf("dtor stack underflow"); /* release the pointer overflow if we had to use it */ if (p->dt->ptr.link) putptr(p, p->dt->ptr.link); /* the dtor could encompass multiple names hierarchically */ putname(p, dt->name); *p->dt-- = (Dtor){0}; } /* TODO: This will fail for forward declarations */ static void declareobj(Parser *p, Decl *d) { Sym *sym; string ident; uint32 kind; struct Decls *link; switch (d->kind) { case Dfunc: case Dvar: kind = Svar; goto one; case Dtype: kind = Stype; one: ident = d->name; break; case Dvars: kind = Svar; goto many; case Dtypes: kind = Stype; many: while (link = &d->list, link != nil) { ident = link->name; sym = lookup(&p->sp->objs, ident); if (sym) { errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); return; } sym = define(&p->sp->objs, ident, kind); if (kind == Svar) sym->obj = d; else sym->type = d->type; } break; default: panicf("unrecognized node kind %d. expected declaration", d->kind); } sym = lookup(&p->sp->objs, ident); if (sym) { errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); return; } sym = define(&p->sp->objs, ident, kind); if (kind == Svar) sym->obj = d; else sym->type = d->type; } /* enters the object identifier space */ static void declareenum(Parser *p, int n, string *elts, Expr *vals) { int i; Sym *s; for (i = 0; i < n; i++) { s = lookup(&p->sp->objs, elts[i]); if (s) { errorat(peek(p, 0).pos, "redeclaration of name %s in object space", elts[i]); continue; } s = define(&p->sp->objs, elts[i], Senum); s->val = vals + i; } } static void declaretag(Parser *p, uint32 t, string name) { Sym *sym; sym = lookup(&p->sp->tags, name); if (sym) { errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", name); return; } sym = define(&p->sp->tags, name, Stype); sym->type = t; } static Sym * lookupobj(Parser *p, string ident) { Sym *sym; Scope *it; it = p->sp; do { sym = lookup(&it->objs, ident); } while (sym == nil && --it >= p->spstk); return sym; } static Sym * lookuptag(Parser *p, string ident) { Sym *sym; Scope *it; it = p->sp; do { sym = lookup(&it->tags, ident); } while (sym == nil && --it >= p->spstk); return sym; } static int nomatch(Token t, vlong kind) { if (t.kind == kind) return 0; if (t.kind == Akeywd) errorat(t.pos, "expected token '%s', instead found keyword '%s'", tokens[kind], keywords[t.val.i]); else errorat(t.pos, "expected token '%s', instead found '%s'", tokens[kind], tokens[t.kind]); return 1; } // ----------------------------------------------------------------------- // needed forward declarations static error spec(Parser *, Lexer *, uint64 *); static uint32 basetype(Parser *, Lexer *, uint64 *s); static string namedecl(Parser *, Lexer *, uint32 *, int); static uint32 typename(Parser *, Lexer *, uint32 *); static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab); static uint32 typeofdtor(Dtor *, uint32); static Decl *decl(Parser *, Lexer *); static Expr *ternary(Parser *, Lexer *); static Expr *expr(Parser *, Lexer *); static error blkstmt(Parser *, Lexer *, Stmt **); // ----------------------------------------------------------------------- // expressions #define MAKEX(x, state) alloc((x)), (x)->kind = X##state static Expr* primary(Parser *p, Lexer *lx) { int k; Expr *x; Token t; Pos b; t = peek(p, 0); b = t.pos; switch (k = (t.kind & Vmask)) { case Aident: MAKEX(x, ident); x->pos.beg = b; x->pos.end = lx->pos; x->name = t.val.s; break; case Alit: MAKEX(x, lit); x->pos.beg = b; x->pos.end = lx->pos; x->val.kind = t.kind & ~Vmask; x->val.v = t.val; break; case Alparen: advance(p, lx); x = expr(p, lx); t = peek(p, 0); if (nomatch(t, Arparen)) { errorat(lx->pos, "unterminated paren expression"); goto Bad; } break; default: panicf("unreachable"); } advance(p, lx); return x; Bad: errorat(lx->pos, "unable to parse operand expression"); return nil; } static int istypename(Parser *p, Token t) { Sym *sym; if (t.kind == Akeywd && (Kconst <= t.val.i && t.val.i <= Kenum)) return 1; if (t.kind == Aident) { sym = lookupobj(p, t.val.s); return (sym != nil) && sym->kind == Stype; } return 0; } static Expr* initx(Parser *p, Lexer *lx); static Expr* initlist(Parser *p, Lexer *lx) { Token t; int c, n; Expr *x, **a; struct Key *k; MAKEX(x, initlist); x->pos.beg = lx->pos; x->init.n = 0; if (t.kind == Arbrace) { x->init.k = nil; x->init.v = nil; return x; } c = 0; n = 0; a = nil; k = nil; Key0: if (n >= c) { c += 20; k = realloc(k, c * sizeof(*k)); a = realloc(a, c * sizeof(*a)); } Key1: switch (t.kind) { case Adot: t = advance(p, lx); if (t.kind != Aident) { errorat(t.pos, "dot designator must be followed by identifier"); goto Bad; } k[n++] = (struct Key) { .kind = (uint32)x->init.n, .s = t.val.s, }; t = advance(p, lx); goto Key0; case Albrakt: t = advance(p, lx); k[n++] = (struct Key) { .kind = (uint32)x->init.n | (1ULL << 32), .x = expr(p, lx), }; t = peek(p, 0); goto Key0; case Aeq: t = advance(p, lx); /* fallthrough */ default: a[x->init.n++] = initx(p, lx); t = peek(p, 0); switch (t.kind) { case Arbrace: break; case Acomma: advance(p, lx); /* fallthrough */ default: goto Key0; } break; case Acomma: t = advance(p, lx); break; } movearray(x->init.k, k, n); movearray(x->init.v, a, x->init.n); return x; Bad: errorat(t.pos, "could not parse initializer list"); return nil; } static Expr* initx(Parser *p, Lexer *lx) { Expr *x; Token t; t = peek(p, 0); if (t.kind != Albrace) return ternary(p, lx); advance(p, lx); x = initlist(p, lx); t = peek(p, 0); if (nomatch(t, Arbrace)) { errorat(t.pos, "unmatched brace in initializer list, found %s instead", tokens[t.kind]); advance(p, lx); } return x; } static Expr* postfix(Parser *p, Lexer *lx) { Pos b; Token t; int c, n; uint32 type, qual; Expr *x, *y, **a; t = peek(p, 0); if (t.kind == Alparen) if (istypename(p, peek(p, 1))) { t = advance(p, lx); type = typename(p, lx, &qual); t = peek(p, 0); if (nomatch(t, Arparen)) { errorat(lx->pos, "unmatched paren: found %s instead", tokens[t.kind]); goto Bad; } t = advance(p, lx); if (nomatch(t, Albrace)) { errorat(lx->pos, "bad initializer list: found %s", tokens[t.kind]); goto Bad; } x = initlist(p, lx); t = peek(p, 0); if (nomatch(t, Arbrace)) { errorat(lx->pos, "unmatched brace: found %s instead", tokens[t.kind]); goto Bad; } x->type = type; x->qual = qual; return x; } x = primary(p, lx); t = peek(p, 0); for (;;) { b = x->pos.beg; switch (t.kind) { case Ainc: MAKEX(y, postinc); goto Postfix; case Adec: MAKEX(y, postdec); Postfix: y->pos.beg = b; y->pos.end = lx->pos; y->unary.post = x; x = y, y = nil; break; case Adot: MAKEX(y, self); goto Select; case Aarrow: MAKEX(y, selp); Select: t = advance(p, lx); if (t.kind != Aident) { errorat(t.pos, "invalid operand of selector expression"); goto Bad; } y->pos.beg = b; y->pos.end = lx->pos; y->idx.f = t.val.s; y->idx.x = x; x = y, y = nil; break; case Albrakt: t = advance(p, lx); if (t.kind == Arbrakt) { errorat(t.pos, "empty index expression"); goto Bad; } MAKEX(y, index); y->idx.x = x; y->idx.i = expr(p, lx); t = peek(p, 0); if (t.kind != Albrakt) { errorat(t.pos, "malformed index expression"); goto Bad; } x = y, y = nil; break; case Alparen: t = advance(p, lx); MAKEX(y, call); y->call.fn = x; y->pos.beg = b; y->call.n = 0; if (t.kind == Arparen) { y->call.arg = nil; goto Endfunc; } c = 0; a = nil; Arg: if (y->call.n >= c) { c += 20; a = realloc(a, c * sizeof(*a)); } a[y->call.n++] = expr(p, lx); t = peek(p, 0); if (t.kind == Acomma) { advance(p, lx); goto Arg; } if (t.kind != Arparen) { errorat(t.pos, "invalid token '%s' found in call argument"); goto Bad; } movearray(y->call.arg, a, y->call.n); Endfunc: y->pos.end = lx->pos; x = y, y = nil; break; default: return x; } t = advance(p, lx); } return x; Bad: errorat(lx->pos, "failed to parse primary expression"); return nil; } static uint32 typename(Parser *p, Lexer *lx, uint32 *spec) { uint32 base; uint64 s; base = basetype(p, lx, &s); if (!base) { errorat(lx->pos, "failed to parse type name specifiers"); return 0; } *spec = (uint32)s; namedecl(p, lx, &base, 1); return base; } static Expr* cast(Parser *p, Lexer *lx); static Expr* unary(Parser *p, Lexer *lx) { Expr *x; Token t; t = peek(p, 0); switch (t.kind) { case Ainc: MAKEX(x, preinc); goto Prefix; case Adec: MAKEX(x, predec); /* fallthrough */ Prefix: advance(p, lx); x->pos.beg = t.pos; x->unary.pre = unary(p, lx); x->pos.end = x->unary.pre->pos.end; return x; case Aneg: MAKEX(x, neg); goto Unary; case Aand: MAKEX(x, ref); goto Unary; case Anot: MAKEX(x, not); goto Unary; case Astar: MAKEX(x, star); goto Unary; case Aadd: MAKEX(x, plus); goto Unary; case Asub: MAKEX(x, minus); /* fallthrough */ Unary: advance(p, lx); x->pos.beg = t.pos; x->unary.pre = cast(p, lx); x->pos.end = x->unary.pre->pos.end; return x; case Akeywd: switch (t.val.i) { case Ksizeof: MAKEX(x, sizeof); goto Key; case Kalignof: MAKEX(x, alignof); /* fallthrough */ Key: t = advance(p, lx); if (t.kind == Alparen) if (istypename(p, peek(p, 1))) { t = advance(p, lx); x->info.type = 0; x->info.of.type = typename(p, lx, &x->info.of.qual); t = peek(p, 0); if (nomatch(t, Arparen)) { errorat(t.pos, "missing paren for size/alignof statement"); goto Bad; } advance(p, lx); return x; } x->info.type = 1; x->info.x = unary(p, lx); return x; default: ; } /* fallthrough */ default: return postfix(p, lx); } Bad: return nil; } static Expr* cast(Parser *p, Lexer *lx) { Expr *x; Token t; t = peek(p, 0); if (t.kind == Alparen && istypename(p, peek(p,1))) { t = advance(p, lx); MAKEX(x, cast); x->pos.beg = t.pos; x->cast.to.type = typename(p, lx, &x->cast.to.qual); if (!x->cast.to.type) { errorat(lx->pos, "invalid type operand of cast"); goto Bad; } t = peek(p, 0); if (nomatch(t, Arparen)) { errorat(lx->pos, "missing closing paren after cast expression"); goto Bad; } advance(p, lx); x->cast.x = cast(p, lx); x->pos.beg = lx->pos; return x; } return unary(p, lx); Bad: errorat(lx->pos, "failed to parse cast expression"); return nil; } /* static data for binary operators */ #define OPERATORS \ OPERATOR(Astar, 10, Xmul) \ OPERATOR(Adiv, 10, Xdiv) \ OPERATOR(Amod, 10, Xmod) \ OPERATOR(Aadd, 9, Xadd) \ OPERATOR(Asub, 9, Xsub) \ OPERATOR(Alsft, 8, Xlsft) \ OPERATOR(Arsft, 8, Xrsft) \ OPERATOR(Agteq, 7, Xgteq) \ OPERATOR(Alteq, 7, Xlteq) \ OPERATOR(Alt, 7, Xlt) \ OPERATOR(Agt, 7, Xgt) \ OPERATOR(Aeq, 6, Xeql) \ OPERATOR(Aneq, 6, Xneq) \ OPERATOR(Aand, 5, Xand) \ OPERATOR(Axor, 4, Xxor) \ OPERATOR(Aor, 3, Xor) \ OPERATOR(Aandand, 2, Xandand) \ OPERATOR(Aoror, 1, Xoror) static int prectab[NUM_TOKENS] = { #define OPERATOR(a, b, c) [a] = b, OPERATORS #undef OPERATOR }; static int optab[NUM_TOKENS] = { #define OPERATOR(a, b, c) [a] = c, OPERATORS #undef OPERATOR }; #undef OPERATORS static Expr* binary(Parser *p, Lexer *lx, int prec) { Token t; int k, np; Expr *l, *x; l = cast(p, lx); for (;;) { t = peek(p, 0); k = t.kind; np = prectab[k]; if (np < prec) return l; alloc(x); t = advance(p, lx); x->pos.beg = l->pos.beg; x->kind = optab[k]; x->binary.l = l; x->binary.r = binary(p, lx, np + 1); x->pos.end = x->binary.r->pos.end; l = x; } return l; Bad: errorat(t.pos, "failed to parse expression"); return nil; } static Expr* ternary(Parser *p, Lexer *lx) { Pos b; Token t; Expr *x, *y; x = binary(p, lx, 1); t = peek(p, 0); b = t.pos; switch (t.kind) { case Aqmark: t = advance(p, lx); y = x; MAKEX(x, ternary); x->pos.beg = b; x->kind = Xternary; x->cond.c = y; x->cond.t = expr(p, lx); t = peek(p, 0); if (nomatch(t, Acolon)) { errorat(t.pos, "ternary expression missing ':'"); goto Bad; } t = advance(p, lx); x->cond.e = expr(p, lx); x->pos.end = lx->pos; break; case Aasn: MAKEX(y, asn); goto Assign; case Aorasn: MAKEX(y, orasn); goto Assign; case Axorasn: MAKEX(y, xorasn); goto Assign; case Aandasn: MAKEX(y, andasn); goto Assign; case Asubasn: MAKEX(y, subasn); goto Assign; case Amulasn: MAKEX(y, mulasn); goto Assign; case Adivasn: MAKEX(y, divasn); goto Assign; case Amodasn: MAKEX(y, modasn); goto Assign; case Alsftasn: MAKEX(y, lsftasn); goto Assign; case Arsftasn: MAKEX(y, rsftasn); goto Assign; Assign: advance(p, lx); y->asn.l = x; y->asn.r = ternary(p, lx); x = y; x->pos.beg = b; x->pos.end = lx->pos; break; default: ; } return x; Bad: errorat(lx->pos, "failing expression parse"); return nil; } static Expr* expr(Parser *p, Lexer *lx) { Pos b; Token t; Expr *x, *y; x = ternary(p, lx); while (t = peek(p, 0), t.kind == Acomma) { advance(p, lx); y = x; MAKEX(x, comma); x->pos.beg = y->pos.beg; x->comma.x[0] = y; x->comma.x[1] = ternary(p, lx); x->pos.end = lx->pos; y = nil; } return x; } // ----------------------------------------------------------------------- // statements static struct Node* stmt(Parser *p, Lexer *lx) { int k; Stmt *s; Sym *sym; Token t; t = peek(p, 0); k = t.kind; /* intercept decl before allocating a statement */ if (k == Aident) { if (peek(p, 1).kind == Acolon) goto Tlabel; sym = lookupobj(p, t.val.s); if (!sym) { errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s); goto Bad; } if (sym->kind == Stype) goto Tdecl; if (sym->kind == Svar) { alloc(s); s->pos.beg = lx->pos; goto Texpr; } errorat(lx->pos, "bad symbol type used as type identifier"); goto Bad; } if (k == Akeywd) { if ((Kauto <= t.val.i && t.val.i <= Ktypedef) || (Kconst <= t.val.i && t.val.i <= Kenum)) { Tdecl: return (Node *)decl(p, lx); } } alloc(s); s->pos.beg = lx->pos; switch (k) { case Akeywd: switch (k = t.val.i) { case Kif: t = advance(p, lx); s->kind = Sif; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before if conditional"); goto Bad; } s->br.cond = expr(p, lx); if (nomatch(t, Arparen)) { errorat(lx->pos, "missing closing paren after if conditional"); goto Bad; } s->br.body = stmt(p, lx); t = peek(p, 0); if (iskw(t, Kelse)) s->br.orelse = stmt(p, lx); else s->br.orelse = nil; break; case Kswitch: t = advance(p, lx); s->kind = Sswitch; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before switch conditional"); goto Bad; } s->br.cond = expr(p, lx); if (nomatch(t, Arparen)) { errorat(lx->pos, "missing closing paren after switch conditional"); goto Bad; } s->br.body = stmt(p, lx); s->br.orelse = nil; break; case Kfor: t = advance(p, lx); s->kind = Sfor; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before for loop preamble"); goto Bad; } if (t.kind == Asemi) s->loop.init = nil; else { // TODO: test for declaration s->loop.init = (Node *)expr(p, lx); } if (nomatch(t, Asemi)) { errorat(lx->pos, "missing semicolon"); goto Bad; } if (t.kind == Asemi) s->loop.cond = nil; else s->loop.cond = expr(p, lx); if (nomatch(t, Asemi)) { errorat(lx->pos, "missing semicolon"); goto Bad; } if (t.kind == Asemi) s->loop.step = nil; else s->loop.step = expr(p, lx); if (nomatch(t, Alparen)) { errorat(lx->pos, "missing closing paren after for loop preamble"); goto Bad; } s->loop.body = stmt(p, lx); break; case Kwhile: t = advance(p, lx); s->kind = Swhile; if (nomatch(t, Alparen)) { errorat(lx->pos, "missing opening paren before while loop conditional"); goto Bad; } s->loop.cond = expr(p, lx); if (nomatch(t, Arparen)) { errorat(t.pos, "missing closing paren after while loop conditional"); goto Bad; } s->loop.init = nil; s->loop.step = nil; s->loop.body = stmt(p, lx); break; case Kdo: t = advance(p, lx); s->kind = Sdo; s->loop.body = stmt(p, lx); if (!iskw(t, Kwhile)) { errorat(t.pos, "missing while statement conditional after do body"); goto Bad; } t = advance(p, lx); if (nomatch(t, Alparen)) { errorat(t.pos, "missing open paren after while conditional"); goto Bad; } s->loop.init = nil; s->loop.step = nil; s->loop.cond = expr(p, lx); break; case Kgoto: t = advance(p, lx); s->kind = Sgoto; if (t.kind != Aident) { errorat(t.pos, "invalid argument to goto"); goto Bad; } s->jmp.lbl = t.val.s; t = advance(p, lx); if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after goto"); goto Bad; } advance(p, lx); break; case Kcontinue: t = advance(p, lx); s->kind = Scontin; s->jmp.lbl = nil; s->jmp.x = nil; if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after continue"); goto Bad; } advance(p, lx); break; case Kbreak: t = advance(p, lx); s->kind = Sbreak; s->jmp.lbl = nil; s->jmp.x = nil; if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after break"); goto Bad; } advance(p, lx); break; case Kreturn: t = advance(p, lx); s->kind = Sreturn; s->jmp.lbl = nil; s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx); t = peek(p, 0); if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after return statement"); goto Bad; } advance(p, lx); break; case Kcase: t = advance(p, lx); s->kind = Scase; s->lbl.x = expr(p, lx); if (nomatch(t, Acolon)) { errorat(t.pos, "missing colon after default label"); goto Bad; } t = advance(p, lx); s->lbl.stmt = stmt(p, lx); break; case Kdefault: t = advance(p, lx); s->kind = Scase; s->lbl.x = nil; if (nomatch(t, Acolon)) { errorat(t.pos, "missing colon after default label"); goto Bad; } t = advance(p, lx); s->lbl.stmt = stmt(p, lx); break; default: panicf("unexpected statement keyword %s", keywords[k]); } break; case Albrace: s->kind = Sblock; openscope(p); if (blkstmt(p, lx, &s)) { errorat(lx->pos, "failed to parse block statement"); goto Bad; } closescope(p); break; case Asemi: t = advance(p, lx); s->kind = Sempty; break; case Aident: Tlabel: t = advance(p, lx); s->kind = Slabel; if (nomatch(t, Acolon)) { errorat(t.pos, "missing colon after labelled block"); goto Bad; } t = advance(p, lx); s->lbl.stmt = stmt(p, lx); break; default: Texpr: s->kind = Sexpr; s->x = expr(p, lx); t = peek(p, 0); if (nomatch(t, Asemi)) { errorat(t.pos, "missing semicolon after statement expression"); goto Bad; } advance(p, lx); } s->pos.end = lx->pos; return (Node *)s; Bad: errorat(lx->pos, "failed to parse statement"); return nil; } static error blkstmt(Parser *p, Lexer *lx, Stmt **s) { Token t; int len; int cap; Node **ns; alloc(*s); (*s)->kind = Sblock; (*s)->pos.beg = lx->pos; t = peek(p, 0); if (nomatch(t, Albrace)) goto Bad; t = advance(p, lx); len = 0, cap = 20; ns = malloc(cap*sizeof(*ns)); while (t.kind != Arbrace) { if (cap == len) { cap += 20; ns = realloc(ns, cap*sizeof(*ns)); } ns[len++] = stmt(p, lx); t = peek(p, 0); } advance(p, lx); (*s)->pos.end = lx->pos; (*s)->blk.n = len; movearray((*s)->blk.item, ns, len); return 0; Bad: errorat(lx->pos, "failed to parse block statement"); free(ns); return 1; } // ----------------------------------------------------------------------- // types uint32 ptrtype(uint32 base, uint32 qual) { uint32 i; Type *t; i = type(); t = C.type.info + i; t->kind = Tptr; t->ptr.base = base; t->ptr.qual = qual; t->size = pointer.size; t->align = pointer.align; t->sign = pointer.sign; return i; } uint32 arraytype(uint32 base, uint32 qual, Expr *ix) { int i, n; Type *t; /* TODO: evaluate the length */ n = 10; i = type(); t = C.type.info + i; t->kind = Tarray; t->ptr.base = base; t->size = n * C.type.info[base].size; t->align = C.type.info[base].align; t->sign = 0; return i; } uint32 functype(uint32 ret, int n, Field *args, int dots) { uint32 i; Type *t; i = type(); t = C.type.info + i; t->kind = Tfunc; t->size = pointer.size; t->align = pointer.align; t->sign = pointer.sign; t->func.ret = ret; t->func.n = n; t->func.arg = args; t->func.dots = dots; return i; } #define ALIGN_DOWN(n, a) ((n) & ~((a)-1)) #define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a)) uint32 structtype(int n, Field *field, Expr *bits) { uint32 i; Type *t; Field *f, *e; i = type(); t = C.type.info + i; t->kind = Tstruct; t->size = 0; t->align = 0; for (f = field, e = field+n; f != e; ++f) { t->size += C.type.info[f->type].size + ALIGN_UP(t->size, C.type.info[f->type].align); t->align = MAX(t->align, C.type.info[f->type].align); } t->aggr.len = n; t->aggr.f = field; t->aggr.x = bits; return i; } uint32 uniontype(int n, Field *field, Expr *bits) { uint32 i; Type *t; Field *f, *e; i = type(); t = C.type.info + i; t->kind = Tstruct; t->size = 0; t->align = 0; for (f = field, e = field+n; f != e; ++f) { t->size = MAX(t->size, C.type.info[f->type].size); t->align = MAX(t->align, C.type.info[f->type].align); } t->aggr.len = n; t->aggr.f = field; t->aggr.x = bits; return i; } uint32 enumtype(int n, string *elts, Expr *vals) { uint32 i; Type *t; Field *f, *e; i = type(); t = C.type.info + i; t->kind = Tenum; /* TODO: dont hardcode int32 */ t->size = 4; t->align = 4; t->enm.len = n; t->enm.elt = elts; t->enm.val = vals; return i; } #undef ALIGN_UP #undef ALIGN_DOWN /* unpacking C declarations into sensible types */ static uint32 typeofname(Name *name, uint32 base) { switch (name->kind) { /* Nnil corresponds to an abstract declarator (i.e. no identifier) */ case Nnil: case Nident: return base; case Nparen: return typeofdtor(name->paren, base); case Nindex: return typeofname(name->sfx.name, arraytype(base, name->sfx.idx.q, name->sfx.idx.x)); case Ncall: return typeofname(name->sfx.name, functype(base, name->sfx.call.n, name->sfx.call.arg, name->sfx.call.dots)); default: panicf("unreachable"); } return 0; } static uint32 typeofdtor(Dtor *decl, uint32 base) { int n; Ptr *p; uint64 b, tmp; n = 0; p = &decl->ptr; b = p->kind; while (b & 1) { base = ptrtype(base, b >> 1); if (++n >= 8) { p = p->link; b = p->kind; } else { b >>= 6; } } return typeofname(decl->name, base); } static uint32 basetype(Parser *p, Lexer *lx, uint64 *s) { int n; uint64 m; if (spec(p, lx, s)) { errorat(lx->pos, "failed to parse type specifier"); return 0; } m = (((*s<<32)>>32) & ~(MaskQul|MaskMem|MaskFcn)); for (n = 0; n < arrlen(validtypespec); n++) { if (validtypespec[n] == m) { if (indextypespec[n] < 0) { m = *s >> 32; if (!m) { errorat(lx->pos, "not a valid type identifier"); return 0; } return m; } return indextypespec[n]; } } errorat(lx->pos, "invalid type specifier"); return 0; } static string namedecl(Parser *p, Lexer *lx, uint32 *base, int noname) { Dtor *dt; string name; Type *t; dt = getdtor(p); name = nil; if (dtor(p, lx, dt, noname)) { errorat(lx->pos, "invalid declarator"); goto End; } if (!noname || noname == 2 && dt->name->kind) name = nameof(dt->name); *base = typeofdtor(dt, *base); putdtor(p, dt); return name; End: putdtor(p, dt); return nil; } // ----------------------------------------------------------------------- // declarations static uint32 enumerate(Parser *p, Lexer *lx, string name, int kind) { int i, n; uint64 s; uint32 t; Token tk; /* TODO: think of a better soln */ string nm[1024], *elts; Expr *cx[1024], *vals; for (n = 0; tk.kind != Arbrace && n < arrlen(nm); n++) { if (tk.kind != Aident) { errorat(tk.pos, "invalid token %s in enum declaration", tokens[tk.kind]); goto Bad; } nm[n] = tk.val.s; cx[n] = nil; tk = advance(p, lx); switch(tk.kind) { case Aeq: advance(p, lx); cx[n] = expr(p, lx); tk = peek(p, 0); if (tk.kind != Acomma) continue; /* fallthrough */ case Acomma: tk = advance(p, lx); } } copyarray(elts, nm, n); copyarray(vals, cx, n); t = enumtype(n, elts, vals); declareenum(p, n, elts, vals); return t; Bad: errorat(tk.pos, "failed to parse enum declaration"); return 0; } static uint32 aggregate(Parser *p, Lexer *lx, string name, int kind) { int n; uint64 s; Token tk; /* TODO: think of a better soln */ static Field fs[1024]; Field *f; static Expr *cx[1024]; Expr *x; for (n = 0, tk = peek(p, 0); tk.kind != Arbrace && n < arrlen(fs); n++) { fs[n].type = basetype(p, lx, &s); fs[n].qual = (uint32)(s & ~(MaskTyp|MaskInt|MaskFlt)); Field: fs[n].name = namedecl(p, lx, &fs[n].type, 0); tk = peek(p, 0); switch (tk.kind) { case Acolon: advance(p, lx); cx[n] = expr(p, lx); tk = peek(p, 0); if (tk.kind == Asemi) { tk = advance(p, lx); continue; } if (tk.kind != Acomma) { errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]); goto Bad; } /* fallthrough */ case Acomma: advance(p, lx); n++; goto Field; case Asemi: tk = advance(p, lx); continue; default: errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]); goto Bad; } } copyarray(f, fs, n); copyarray(x, cx, n); return (kind == Tstruct) ? structtype(n, f, x) : uniontype(n, f, x); Bad: errorat(tk.pos, "failed to parse aggregate declaration"); return 0; } static error spec(Parser *p, Lexer *lx, uint64 *spec) { Token t; int n, i; Sym *typ; string name; uint32 tag; uint64 s, sm; static uint32 (*aggrfunc[2])(Parser *, Lexer *, string , int) = {aggregate, enumerate}; s = 0; while (t = peek(p, 0), t.kind >= Aident) { /* typename */ if (t.kind == Aident) { typ = lookupobj(p, t.val.s); if (!typ || (typ && typ->kind != Stype)) break; sm = typ->type; s |= (sm << 32 | Tname); advance(p, lx); continue; } /* keyword */ switch (n = t.val.i) { case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef: case Ktls: if (s & MaskMem) { errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]); goto Bad; } break; case Kinline: case Knoret: if (s & Bit(n)) warnat(lx->pos, "duplicate %s function specifier", keywords[n]); break; case Kconst: case Kvolatile: if (s & Bit(n)) warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]); break; case Ksigned: case Kunsigned: if (s & MaskSgn) { if (s & Bit(n)) { warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]); break; } errorat(lx->pos, "multiple storage class specifiers"); goto Bad; } break; case Kshort: if (s & Tshort) { warnat(lx->pos, "duplicated short specifier"); break; } break; case Klong: if ((s >> Klong) & 2) { errorat(lx->pos, "cannot chain three or more long specifiers"); goto Bad; } s += Bit(n); t = advance(p, lx); continue; case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble: if (s & MaskTyp) { errorat(lx->pos, "more than one base type specified"); goto Bad; } break; case Kstruct: case Kunion: i = 0; goto Aggr; case Kenum: i = 1; Aggr: if (s & (Tstruct | Tunion | Tenum)) { errorat(lx->pos, "more than one aggregate/enum type specified"); goto Bad; } t = advance(p, lx); if (t.kind != Aident && t.kind != Albrace) { errorat(t.pos, "enum specifier missing valid declaration"); goto Bad; } /* NOTE: This offset is needed to correctly obtain Tstruct */ n++; name = nil; tag = 0; if (t.kind == Aident) { name = t.val.s; t = advance(p, lx); } if (t.kind == Albrace) { /* TODO: we need check if the name exists. */ t = advance(p, lx); /* NOTE: This depends on the enum order. KEEP IN SYNC */ tag = aggrfunc[i](p, lx, name, Bit(n)); if (t = peek(p, 0), nomatch(t, Arbrace)) { errorat(t.pos, "invalid token %s in aggregate/enum declaration", tokens[t.kind]); goto Bad; } /* high bits encode the type index */ s |= (uint64)tag << 32; } /* TODO: if name does not exist, enter in an incomplete type! */ if (name) declaretag(p, tag, name); break; default: errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]); } s |= Bit(n); advance(p, lx); } *spec = s; return 0; Bad: /* TODO: serialize bitflags to string for nice error message */ errorat(lx->pos, "ignoring specifier"); *spec = Sbad; return 1; } /* * name declaration * see dtor for valid values of ab */ static error name(Parser *p, Lexer *lx, Name **nmp, int ab) { Token t; int n, k; uint64 s; Sym *sym; Name *nm, *tmp; /* max args = 100 */ struct Field args[100]; nm = *nmp; t = peek(p, 0); switch (k = t.kind) { case Aident: if (ab == 1) { errorat(t.pos, "identifier not allowed in abstract declarator"); goto Bad; } nm->kind = Nident; nm->ident = t.val.s; break; case Alparen: advance(p, lx); nm->kind = Nparen; nm->paren = getdtor(p); if (dtor(p, lx, nm->paren, ab)) { putdtor(p, nm->paren); nm->paren = nil; errorat(lx->pos, "invalid declarator in parenthesis"); goto Bad; } t = peek(p, 0); if (nomatch(t, Arparen)) { putdtor(p, nm->paren); nm->paren = nil; errorat(lx->pos, "missing closing paren in declarator"); goto Bad; } break; case Albrakt: if (ab) goto Sfx; errorat(lx->pos, "missing identifier in non-abstract declarator"); /* fallthrough */ default: if (ab) goto Sfx; errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]); goto Bad; } t = advance(p, lx); Sfx: for (;;) { switch (k = t.kind) { case Albrakt: tmp = getname(p); tmp->kind = Nindex; tmp->sfx.name = nm; nm = tmp, tmp = nil; t = advance(p, lx); if (t.kind == Arbrakt) { nm->sfx.idx.q = 0; Iend: nm->sfx.idx.x = nil; t = advance(p, lx); break; } if (t.kind == Astar) { nm->sfx.idx.q = -1; IStar: nm->sfx.idx.x = nil; t = advance(p, lx); if (t.kind != Arbrakt) { errorat(t.pos, "invalid '*' syntax in index expression"); goto Bad; } t = advance(p, lx); break; } if (spec(p, lx, &s)) { errorat(lx->pos, "invalid type qualifier list in index expression"); goto Bad; } nm->sfx.idx.q = (uint32)s; t = peek(p, 0); if (t.kind == Astar) goto IStar; if (t.kind == Arbrakt) goto Iend; nm->sfx.idx.x = expr(p, lx); t = peek(p, 0); if (nomatch(t, Arbrakt)) { errorat(t.pos, "unterminated index expression"); goto Bad; } t = advance(p, lx); continue; case Alparen: tmp = getname(p); tmp->kind = Ncall; tmp->sfx.name = nm; nm = tmp, tmp = nil; t = advance(p, lx); nm->sfx.call.n = 0; switch (t.kind) { case Arparen: nm->sfx.call.arg = nil; break; case Aident: sym = lookupobj(p, t.val.s); if (!sym || (sym && sym->kind != Stype)) { while (t.kind == Aident) { if (nm->sfx.call.n >= arrlen(args)) panicf("ident stack overflow"); args[nm->sfx.call.n++] = (struct Field) { .qual = 0, .type = 0, .name = t.val.s, }; t = advance(p, lx); } if (nomatch(t, Arparen)) { errorat(t.pos, "token '%s' found in function parameter identifier list"); goto Bad; } copyarray(nm->sfx.call.arg, args, nm->sfx.call.n); break; } goto ParamLoop; case Akeywd: if (t.val.i < Kconst || t.val.i > Kenum) { errorat(t.pos, "invalid keyword %s inside function signature"); goto Bad; } ParamLoop: if (nm->sfx.call.n >= arrlen(args)-1) panicf("out of argument buffer"); args[nm->sfx.call.n].type = basetype(p, lx, &s); if (!args[nm->sfx.call.n].type) { errorat(lx->pos, "could not parse base type in function call"); goto Bad; } args[nm->sfx.call.n].qual = (uint32)s & ~(MaskTyp|MaskInt|MaskFlt); args[nm->sfx.call.n].name = namedecl(p, lx, &args[nm->sfx.call.n].type, 2); nm->sfx.call.n++; if ((t = peek(p, 0)).kind == Acomma) { advance(p, lx); goto ParamLoop; } if (t.kind == Aellip) { nm->sfx.call.dots = 1; t = advance(p, lx); } if (nomatch(t, Arparen)) { errorat(t.pos, "token '%s' found in function parameter list"); goto Bad; } copyarray(nm->sfx.call.arg, args, nm->sfx.call.n); break; default: errorat(t.pos, "invalid token %s inside function call signature", tokens[t.kind]); goto Bad; } t = advance(p, lx); continue; default: break; } break; } *nmp = nm; return 0; Bad: return 1; } /* pointer kind is partitioned into 8x6 regions * ab => abstract * @ 0: must have identifier * @ 1: must not have identifier * @ 2: don't care * else: undefined */ static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab) { int n, k; error err; Token t; Dtor *link; Ptr *ptr, *x; err = 1; ptr = &d->ptr; ptr->kind = 0; ptr->link = nil; t = peek(p, 0); if (t.kind != Astar) { if (ab || t.kind == Aident || t.kind == Arparen) goto Name; goto Bad; } n = 0; Ptr: ptr->kind |= Bit(n); advance(p, lx); Key: t = peek(p, 0); switch (k = t.kind) { case Akeywd: if (Kconst <= t.val.i && t.val.i <= Katomic) ptr->kind |= Bit(6*n + (t.val.i - Kconst + 1)); else { errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]); goto Bad; } advance(p, lx); goto Key; case Astar: if (++n >= 8) { x = getptr(p); x->kind = 0; x->link = nil; ptr->link = x; ptr = x; n = 0; } goto Ptr; case Aident: case Alparen: goto Name; default: if (ab) goto Name; errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]); goto Bad; } Name: return name(p, lx, &d->name, ab); Bad: return err; } static Decl * decl(Parser *p, Lexer *lx) { uint64 s; Token t; Decl *d; Expr *x; string name; struct Decls *ds; uint32 base, type; alloc(d); d->kind = 0; d->pos.beg = lx->pos; base = basetype(p, lx, &s); if (!base) { errorat(lx->pos, "could not parse type declaration"); goto Bad; } x = nil; d->spec = (uint32)s & ~(MaskInt|MaskFlt|MaskTyp); d->type = base; d->name = namedecl(p, lx, &d->type, 0); /* TODO: think about functions (both decls and defs) */ d->kind = (s & Mtype) ? Dtype : Dvar; switch (t = peek(p, 0), t.kind) { case Aeq: if (s & Mtype) { errorat(d->pos.beg, "initialization of type not allowed"); goto Bad; } t = advance(p, lx); x = initx(p, lx); d->kind = Dvar; if (t.kind != Acomma) { d->init = x; goto Semi; } /* fallthrough */ case Acomma: d->kind |= Dlist; d->list.init = x; /* move singleton data over */ name = d->name; type = d->type; d->list.name = name; d->list.type = type; ds = &d->list; /* iterate until we hit end of list */ while (t.kind == Acomma) { t = advance(p, lx); alloc(ds->link); ds = ds->link; ds->type = base; ds->name = namedecl(p, lx, &ds->type, 0); t = peek(p, 0); if (t.kind == Aeq) { t = advance(p, lx); ds->init = initx(p, lx); } else ds->init = nil; } goto Semi; case Albrace: d->kind = Dfunc; alloc(d->body); if (!attop(p)) { errorat(lx->pos, "nested function declarations are illegal"); goto Bad; } if (C.type.info[d->type].kind != Tfunc) { errorat(lx->pos, "attempted to define function body for non function type"); goto Bad; } openscope(p); if (blkstmt(p, lx, &d->body)) { errorat(lx->pos, "failed to parse function body"); goto Bad; } closescope(p); break; default: Semi: if (nomatch(t, Asemi)) { errorat(t.pos, "no semicolon after declaration"); goto Bad; } t = advance(p, lx); } d->pos.end = lx->pos; declareobj(p, d); return d; Bad: errorat(lx->pos, "failed to parse top level declaration"); return nil; } // ----------------------------------------------------------------------- // top level api void setup(Parser *p, Lexer *lx) { advance(p,lx); advance(p,lx); /* define all builtin typedefs */ declareobj(p, &C.builtin.vargs); } error parse(Parser *p, Lexer *lx) { Token tok; setup(p, lx); while ((tok = peek(p, 0)), tok.kind > Aeof) { if (p->ast.len >= p->ast.cap) { p->ast.cap += 20; p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls)); } p->ast.decls[p->ast.len++] = decl(p, lx); } return 0; }