aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/cc/ast.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cmd/cc/ast.c')
-rw-r--r--sys/cmd/cc/ast.c822
1 files changed, 822 insertions, 0 deletions
diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c
new file mode 100644
index 0000000..f5c0ba2
--- /dev/null
+++ b/sys/cmd/cc/ast.c
@@ -0,0 +1,822 @@
+#include "cc.h"
+#define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr))
+#define movearray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)), free(arr)
+
+#define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk)
+#define peek(p, i) (p->tok[i])
+#define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k))
+#define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0])
+
+#define Bit(i) (1 << (i))
+
+// -----------------------------------------------------------------------
+// helper functions
+
+string
+nameof(Name *n)
+{
+ if (n->kind & Nident)
+ return n->ident;
+ if (n->kind & Nparen)
+ return nameof(&n->paren->name);
+
+ panicf("ill-formed declarator name");
+}
+
+static
+void
+openscope(Parser *p)
+{
+ if (++p->sp == p->spstk + arrlen(p->spstk))
+ panicf("too many nested scopes: crashing");
+}
+
+/*
+ * TODO: save the symbol table with the ast node
+ * write a "copy(move)scope"
+ */
+
+static
+void
+closescope(Parser *p)
+{
+ if (p->sp == p->spstk)
+ panicf("attempting to close the top level scope: crashing");
+
+ forgetall(&p->sp->objs);
+ forgetall(&p->sp->tags);
+ p->sp--;
+}
+
+static
+void
+declareobj(Parser *p, Decl *d)
+{
+ Sym *sym;
+ string ident;
+ struct Decls *link;
+
+ switch (d->kind) {
+ case Dfunc:
+ ident = nameof(&d->func.name);
+ break;
+
+ case Dvar:
+ ident = nameof(&d->var.name);
+ break;
+
+ case Dtype:
+ ident = nameof(&d->type.name);
+ break;
+
+ case Dvars:
+ while (link = d->var.link, link != nil) {
+ ident = nameof(&link->name);
+ sym = lookup(&p->sp->objs, ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident);
+ return;
+ }
+ define(&p->sp->objs, ident, Sobj);
+ }
+ return;
+ default:
+ panicf("unrecognized node kind %d inside declaraction", d->kind);
+
+ }
+
+ sym = lookup(&p->sp->objs, ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident);
+ return;
+ }
+
+ define(&p->sp->objs, ident, Sobj);
+}
+
+static
+Sym *
+declaretag(Parser *p, Type *t)
+{
+ Sym *sym;
+ sym = lookup(&p->sp->tags, t->ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", t->ident);
+ return sym;
+ }
+
+ return define(&p->sp->tags, t->ident, Stype);
+}
+
+static
+Sym *
+lookupobj(Parser *p, string ident)
+{
+ Sym *sym;
+ Scope *it;
+
+ it = p->sp;
+ do {
+ sym = lookup(&it->objs, ident);
+ } while (sym == nil && --it >= p->spstk);
+
+ return sym;
+}
+
+static
+Sym *
+lookuptag(Parser *p, string ident)
+{
+ Sym *sym;
+ Scope *it;
+
+ it = p->sp;
+ do {
+ sym = lookup(&it->tags, ident);
+ } while (sym == nil && --it >= p->spstk);
+
+ return sym;
+}
+
+static
+int
+nomatch(Token t, vlong kind)
+{
+ if (t.kind == kind)
+ return 0;
+
+ errorat(t.pos, "expected token '%s', instead found '%s'", tokens[t.kind], tokens[kind]);
+ return 1;
+}
+
+// -----------------------------------------------------------------------
+// forward declarations
+
+static Decl *decl(Parser *p, Lexer *lx);
+static Expr *expr(Parser *p, Lexer *lx);
+static error blkstmt(Parser *p, Lexer *lx, Stmt **s);
+
+// -----------------------------------------------------------------------
+// expressions
+
+// -----------------------------------------------------------------------
+// statements
+
+static
+struct Node*
+stmt(Parser *p, Lexer *lx)
+{
+ int k;
+ Stmt *s;
+ Sym *sym;
+ Token t;
+
+ t = peek(p, 0);
+ k = t.kind;
+
+ /* intercept decl before allocating a statement */
+ if (k == Aident) {
+ if (peek(p, 1).kind == Acolon)
+ goto Tlabel;
+ sym = lookupobj(p, t.val.s);
+ if (!sym) {
+ errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s);
+ goto Bad;
+ }
+
+ if (sym->kind == Stype)
+ goto Tdecl;
+ if (sym->kind == Sobj)
+ goto Texpr;
+
+ errorat(lx->pos, "bad symbol type used as type identifier");
+ goto Bad;
+ }
+
+ if (k == Akeywd) {
+ if ((Kauto <= k && k <= Katomic) || (Ksigned <= k && k <= Klong)) {
+ Tdecl:
+ return (Node *)decl(p, lx);
+ }
+ }
+
+ alloc(s);
+ s->pos.beg = lx->pos;
+
+ switch (k) {
+ case Akeywd:
+ switch (k = t.val.i) {
+ case Kif:
+ t = advance(p, lx);
+ s->kind = Sif;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before if conditional");
+ goto Bad;
+ }
+ s->br.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after if conditional");
+ goto Bad;
+ }
+ s->br.body = stmt(p, lx);
+
+ t = peek(p, 0);
+ if (iskw(t, Kelse))
+ s->br.orelse = stmt(p, lx);
+ else
+ s->br.orelse = nil;
+
+ break;
+
+ case Kswitch:
+ t = advance(p, lx);
+ s->kind = Sswitch;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before switch conditional");
+ goto Bad;
+ }
+ s->br.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after switch conditional");
+ goto Bad;
+ }
+ s->br.body = stmt(p, lx);
+ s->br.orelse = nil;
+
+ break;
+
+ case Kfor:
+ t = advance(p, lx);
+ s->kind = Sfor;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before for loop preamble");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.init = nil;
+ else {
+ // TODO: test for declaration
+ s->loop.init = (Node *)expr(p, lx);
+ }
+
+ if (nomatch(t, Asemi)) {
+ errorat(lx->pos, "missing semicolon");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.cond = nil;
+ else
+ s->loop.cond = expr(p, lx);
+
+ if (nomatch(t, Asemi)) {
+ errorat(lx->pos, "missing semicolon");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.step = nil;
+ else
+ s->loop.step = expr(p, lx);
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing closing paren after for loop preamble");
+ goto Bad;
+ }
+ s->loop.body = stmt(p, lx);
+ break;
+
+ case Kwhile:
+ t = advance(p, lx);
+ s->kind = Swhile;
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before while loop conditional");
+ goto Bad;
+ }
+ s->loop.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(t.pos, "missing closing paren after while loop conditional");
+ goto Bad;
+ }
+
+ s->loop.init = nil;
+ s->loop.step = nil;
+ s->loop.body = stmt(p, lx);
+ break;
+
+ case Kdo:
+ t = advance(p, lx);
+ s->kind = Sdo;
+ s->loop.body = stmt(p, lx);
+
+ if (!iskw(t, Kwhile)) {
+ errorat(t.pos, "missing while statement conditional after do body");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ if (nomatch(t, Alparen)) {
+ errorat(t.pos, "missing open paren after while conditional");
+ goto Bad;
+ }
+
+ s->loop.init = nil;
+ s->loop.step = nil;
+ s->loop.cond = expr(p, lx);
+ break;
+
+ case Kgoto:
+ t = advance(p, lx);
+ s->kind = Sgoto;
+ if (t.kind != Aident) {
+ errorat(t.pos, "invalid argument to goto");
+ goto Bad;
+ }
+ s->jmp.lbl = t.val.s;
+ t = advance(p, lx);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after goto");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kcontinue:
+ t = advance(p, lx);
+ s->kind = Scontin;
+ s->jmp.lbl = nil;
+ s->jmp.x = nil;
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after continue");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kbreak:
+ t = advance(p, lx);
+ s->kind = Sbreak;
+ s->jmp.lbl = nil;
+ s->jmp.x = nil;
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after break");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kreturn:
+ t = advance(p, lx);
+ s->kind = Sreturn;
+
+ s->jmp.lbl = nil;
+ s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after return statement");
+ goto Bad;
+ }
+ break;
+
+ case Kcase:
+ t = advance(p, lx);
+ s->kind = Scase;
+ s->lbl.x = expr(p, lx);
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after default label");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ case Kdefault:
+ t = advance(p, lx);
+ s->kind = Scase;
+ s->lbl.x = nil;
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after default label");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ default:
+ panicf("unexpected statement keyword %s", keywords[k]);
+ }
+ break;
+ case Albrace:
+ s->kind = Sblock;
+ openscope(p);
+ if (blkstmt(p, lx, &s)) {
+ errorat(lx->pos, "failed to parse block statement");
+ goto Bad;
+ }
+ closescope(p);
+ break;
+
+ case Asemi:
+ t = advance(p, lx);
+ s->kind = Sempty;
+ break;
+
+ case Aident:
+ Tlabel:
+ t = advance(p, lx);
+ s->kind = Slabel;
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after labelled block");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ default:
+ Texpr:
+ t = advance(p, lx);
+ s->kind = Sexpr;
+ s->x = expr(p, lx);
+ }
+
+ s->pos.end = lx->pos;
+ return (Node *)s;
+Bad:
+ errorat(lx->pos, "failed to parse statement");
+ return nil;
+}
+
+static
+error
+blkstmt(Parser *p, Lexer *lx, Stmt **s)
+{
+ Token t;
+ int len;
+ int cap;
+ Node **ns;
+
+ alloc(*s);
+ (*s)->kind = Sblock;
+ (*s)->pos.beg = lx->pos;
+
+ t = peek(p, 0);
+ if (nomatch(t, Albrakt))
+ goto Bad;
+ advance(p, lx);
+
+ len = 0, cap = 20;
+ ns = malloc(cap*sizeof(*ns));
+ for (;;) {
+ if (cap == len) {
+ cap += 20;
+ ns = realloc(ns, cap*sizeof(*ns));
+ }
+ ns[len++] = stmt(p, lx);
+ }
+
+ if (nomatch(t, Arbrakt))
+ goto Bad;
+
+ (*s)->pos.end = lx->pos;
+ (*s)->blk.n = len;
+ movearray((*s)->blk.item, ns, len);
+ return 0;
+Bad:
+ errorat(lx->pos, "failed to parse block statement");
+ free(ns);
+ return 1;
+}
+
+// -----------------------------------------------------------------------
+// declarations
+
+/* see https://en.wikipedia.org/wiki/C_data_types */
+static uint64 validtypes[] = {
+ Tvoid,
+ Tbool,
+
+ Tchar,
+ Tsign | Tchar,
+ Tunsign | Tchar,
+
+ Tshort, Tshort | Tint,
+ Tsign | Tshort, Tsign | Tshort | Tint,
+ Tunsign | Tshort, Tunsign | Tshort | Tint,
+
+ 0, Tint,
+ Tsign, Tsign | Tint,
+ Tunsign, Tunsign | Tint,
+
+ Tlong, Tlong | Tint,
+ Tsign | Tlong, Tsign | Tlong | Tint,
+ Tunsign | Tlong, Tunsign | Tlong | Tint,
+
+ Tvlong | Tlong, Tvlong | Tlong | Tint,
+ Tsign | Tvlong | Tlong, Tsign | Tvlong | Tlong | Tint,
+ Tunsign | Tvlong | Tlong, Tunsign | Tvlong | Tlong | Tint,
+
+ Tfloat,
+ Tdouble,
+ Tlong | Tfloat,
+
+ Timag,
+ Tcmplx,
+
+ Tstruct, Tunion, Tenum, Tname,
+};
+
+static
+error
+spec(Parser *p, Lexer *lx, uint64 *spec)
+{
+ Token t;
+ int n;
+ uint64 s, sm;
+
+ s = 0;
+ while (t = peek(p, 0), t.kind == Akeywd) {
+ switch (n = t.val.i) {
+ case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef:
+ if (s & MaskMem) {
+ errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]);
+ goto Bad;
+ }
+ break;
+
+ case Kconst: case Kvolatile:
+ if (s & Bit(n))
+ warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]);
+ break;
+
+ case Ksigned: case Kunsigned:
+ if (s & MaskSgn) {
+ if (s & Bit(n)) {
+ warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]);
+ break;
+ }
+ errorat(lx->pos, "multiple storage class specifiers");
+ goto Bad;
+ }
+ break;
+
+ case Kshort:
+ if (s & Tshort) {
+ warnat(lx->pos, "duplicated short specifier");
+ break;
+ }
+ break;
+
+ case Klong:
+ if ((s >> Klong) & 2) {
+ errorat(lx->pos, "cannot chain three or more long specifiers");
+ goto Bad;
+ }
+ s += Bit(n);
+ continue;
+
+ case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble:
+ if (s & MaskTyp) {
+ errorat(lx->pos, "more than one base type specified");
+ goto Bad;
+ }
+ break;
+
+ case Kstruct: case Kunion:
+ case Kenum:
+ panicf("need to implement");
+
+ default:
+ errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]);
+ }
+
+ s |= Bit(n);
+ advance(p, lx);
+ }
+
+ sm = (((s<<32)>>32) & ~(MaskQul | MaskMem));
+ for (n = 0; n < arrlen(validtypes); n++) {
+ if (sm == validtypes[n]) {
+ *spec = s;
+ return 0;
+ }
+ }
+ /* TODO: serialize bitflags to string for nice error message */
+ errorat(lx->pos, "invalid type specifier: ''");
+Bad:
+ errorat(lx->pos, "ignoring specifier");
+ *spec = Sbad;
+ return 1;
+}
+
+/* predeclaration */
+static error dtor(Parser *p, Lexer *lx, Dtor *d);
+
+static
+error
+name(Parser *p, Lexer *lx, Name *nm)
+{
+ int n, k;
+ Token t;
+
+ t = peek(p, 0);
+ switch (k = t.kind) {
+ case Aident:
+ nm->kind = Nident;
+ nm->ident = t.val.s;
+ break;
+
+ case Alparen:
+ nm->kind = Nparen;
+ alloc(nm->paren);
+ if (dtor(p, lx, nm->paren)) {
+ /* we are using an arena allocator so can't clean up */
+ errorat(lx->pos, "invalid declarator in parenthesis");
+ nm->paren = nil;
+ goto Bad;
+ }
+ t = peek(p, 0);
+ if (t.kind != Arparen) {
+ errorat(lx->pos, "missing closing paren in declarator");
+ goto Bad;
+ }
+ break;
+
+ default:
+ errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]);
+ goto Bad;
+ }
+
+ t = advance(p, lx);
+ switch (k = t.kind) {
+ case Albrakt:
+ nm->kind |= Nindex;
+ panicf("need to implement");
+ break;
+
+ case Alparen:
+ nm->kind |= Ncall;
+ panicf("need to implement");
+ break;
+
+ default:
+ ;
+ }
+
+ return 0;
+Bad:
+ return 1;
+}
+
+/* pointer kind is partitioned into 8x6 regions */
+static
+error
+dtor(Parser *p, Lexer *lx, Dtor *d)
+{
+ int n, k;
+ error err;
+ Token t;
+ Dtor *link;
+ Ptr *ptr, *x;
+
+ err = 1;
+
+ ptr = &d->ptr;
+ ptr->kind = 0;
+ ptr->link = nil;
+
+ t = peek(p, 0);
+ if (t.kind != Astar)
+ if (t.kind == Aident || t.kind == Arparen)
+ goto Name;
+ goto Bad;
+Ptr:
+ ptr->kind |= Bit(n);
+ advance(p, lx);
+Key:
+ t = peek(p, 0);
+ switch (k = t.kind) {
+ case Akeywd:
+ if (Kconst <= k && k <= Katomic)
+ ptr->kind |= Bit(n + (t.val.i - Kconst + 1));
+ else {
+ errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]);
+ goto Bad;
+ }
+ advance(p, lx);
+ goto Key;
+
+ case Astar:
+ if (++n >= 8) {
+ alloc(x);
+ x->kind = 0;
+ x->link = nil;
+ ptr->link = x;
+ ptr = x;
+ n = 0;
+ }
+ goto Ptr;
+
+ case Aident:
+ case Alparen:
+ goto Name;
+
+ default:
+ errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]);
+ goto Bad;
+ }
+Name:
+ return name(p, lx, &d->name);
+Bad:
+ return err;
+}
+
+static
+Decl *
+decl(Parser *p, Lexer *lx)
+{
+ Token t;
+ Decl *d;
+ Dtor dt, *dtp;
+ struct Decls **curr;
+
+ alloc(d);
+ d->kind = 0;
+ d->pos.beg = lx->pos;
+
+ if (spec(p, lx, &d->spec)) {
+ errorat(lx->pos, "invalid declaration specifier");
+ goto Bad;
+ }
+ dtp = &dt;
+ curr = &d->var.link;
+Dtor:
+ if (dtor(p, lx, dtp)) {
+ errorat(lx->pos, "invalid declarator");
+ goto Bad;
+ }
+
+ t = peek(p, 0);
+ if (t.kind == Aeq) {
+ t = advance(p, lx);
+ d->kind = (d->kind != Dvars) ? Dvar : Dvars;
+ d->var.init = expr(p, lx);
+ }
+ if (t.kind == Acomma) {
+ d->kind = Dvars;
+ alloc(*curr);
+ dtp = &(*curr)->dtor;
+ curr = &(*curr)->link;
+ advance(p, lx);
+ goto Dtor;
+ }
+
+ t = peek(p, 0);
+ if (t.kind == Albrace) {
+ if (!attop(p)) {
+ errorat(lx->pos, "nested function declarations");
+ goto Bad;
+ }
+ if (d->kind != 0) {
+ errorat(lx->pos, "attempting to define a function for a variable declaration");
+ goto Bad;
+ }
+ d->kind = Dfunc;
+ alloc(d->func.body);
+ openscope(p);
+ if (blkstmt(p, lx, &d->func.body)) {
+ errorat(lx->pos, "failed to parse function body");
+ goto Bad;
+ }
+ closescope(p);
+ }
+
+ d->pos.end = lx->pos;
+ declareobj(p, d);
+ return d;
+Bad:
+ errorat(lx->pos, "failed to parse top level declaration");
+ return nil;
+}
+
+// -----------------------------------------------------------------------
+// top level api
+
+void
+parse(Parser *p, Lexer *lx)
+{
+ Token tok;
+ p->sp = p->spstk;
+
+ while ((tok = peek(p, 0)), tok.kind > Aeof) {
+ if (p->ast.len >= p->ast.cap) {
+ p->ast.cap += 20;
+ p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls));
+ }
+ p->ast.decls[p->ast.len++] = decl(p, lx);
+ }
+}