From 7d2a1280cd4321d2a3b2fff0b2413085347a7b4d Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 21 May 2020 18:53:23 -0700 Subject: feat: prototype of ast stmt and decl implementations --- sys/cmd/cc/ast.c | 822 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 822 insertions(+) create mode 100644 sys/cmd/cc/ast.c (limited to 'sys/cmd/cc/ast.c') diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c new file mode 100644 index 0000000..f5c0ba2 --- /dev/null +++ b/sys/cmd/cc/ast.c @@ -0,0 +1,822 @@ +#include "cc.h" +#define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr)) +#define movearray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)), free(arr) + +#define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk) +#define peek(p, i) (p->tok[i]) +#define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k)) +#define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0]) + +#define Bit(i) (1 << (i)) + +// ----------------------------------------------------------------------- +// helper functions + +string +nameof(Name *n) +{ + if (n->kind & Nident) + return n->ident; + if (n->kind & Nparen) + return nameof(&n->paren->name); + + panicf("ill-formed declarator name"); +} + +static +void +openscope(Parser *p) +{ + if (++p->sp == p->spstk + arrlen(p->spstk)) + panicf("too many nested scopes: crashing"); +} + +/* + * TODO: save the symbol table with the ast node + * write a "copy(move)scope" + */ + +static +void +closescope(Parser *p) +{ + if (p->sp == p->spstk) + panicf("attempting to close the top level scope: crashing"); + + forgetall(&p->sp->objs); + forgetall(&p->sp->tags); + p->sp--; +} + +static +void +declareobj(Parser *p, Decl *d) +{ + Sym *sym; + string ident; + struct Decls *link; + + switch (d->kind) { + case Dfunc: + ident = nameof(&d->func.name); + break; + + case Dvar: + ident = nameof(&d->var.name); + break; + + case Dtype: + ident = nameof(&d->type.name); + break; + + case Dvars: + while (link = d->var.link, link != nil) { + ident = nameof(&link->name); + sym = lookup(&p->sp->objs, ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); + return; + } + define(&p->sp->objs, ident, Sobj); + } + return; + default: + panicf("unrecognized node kind %d inside declaraction", d->kind); + + } + + sym = lookup(&p->sp->objs, ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); + return; + } + + define(&p->sp->objs, ident, Sobj); +} + +static +Sym * +declaretag(Parser *p, Type *t) +{ + Sym *sym; + sym = lookup(&p->sp->tags, t->ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", t->ident); + return sym; + } + + return define(&p->sp->tags, t->ident, Stype); +} + +static +Sym * +lookupobj(Parser *p, string ident) +{ + Sym *sym; + Scope *it; + + it = p->sp; + do { + sym = lookup(&it->objs, ident); + } while (sym == nil && --it >= p->spstk); + + return sym; +} + +static +Sym * +lookuptag(Parser *p, string ident) +{ + Sym *sym; + Scope *it; + + it = p->sp; + do { + sym = lookup(&it->tags, ident); + } while (sym == nil && --it >= p->spstk); + + return sym; +} + +static +int +nomatch(Token t, vlong kind) +{ + if (t.kind == kind) + return 0; + + errorat(t.pos, "expected token '%s', instead found '%s'", tokens[t.kind], tokens[kind]); + return 1; +} + +// ----------------------------------------------------------------------- +// forward declarations + +static Decl *decl(Parser *p, Lexer *lx); +static Expr *expr(Parser *p, Lexer *lx); +static error blkstmt(Parser *p, Lexer *lx, Stmt **s); + +// ----------------------------------------------------------------------- +// expressions + +// ----------------------------------------------------------------------- +// statements + +static +struct Node* +stmt(Parser *p, Lexer *lx) +{ + int k; + Stmt *s; + Sym *sym; + Token t; + + t = peek(p, 0); + k = t.kind; + + /* intercept decl before allocating a statement */ + if (k == Aident) { + if (peek(p, 1).kind == Acolon) + goto Tlabel; + sym = lookupobj(p, t.val.s); + if (!sym) { + errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s); + goto Bad; + } + + if (sym->kind == Stype) + goto Tdecl; + if (sym->kind == Sobj) + goto Texpr; + + errorat(lx->pos, "bad symbol type used as type identifier"); + goto Bad; + } + + if (k == Akeywd) { + if ((Kauto <= k && k <= Katomic) || (Ksigned <= k && k <= Klong)) { + Tdecl: + return (Node *)decl(p, lx); + } + } + + alloc(s); + s->pos.beg = lx->pos; + + switch (k) { + case Akeywd: + switch (k = t.val.i) { + case Kif: + t = advance(p, lx); + s->kind = Sif; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before if conditional"); + goto Bad; + } + s->br.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after if conditional"); + goto Bad; + } + s->br.body = stmt(p, lx); + + t = peek(p, 0); + if (iskw(t, Kelse)) + s->br.orelse = stmt(p, lx); + else + s->br.orelse = nil; + + break; + + case Kswitch: + t = advance(p, lx); + s->kind = Sswitch; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before switch conditional"); + goto Bad; + } + s->br.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after switch conditional"); + goto Bad; + } + s->br.body = stmt(p, lx); + s->br.orelse = nil; + + break; + + case Kfor: + t = advance(p, lx); + s->kind = Sfor; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before for loop preamble"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.init = nil; + else { + // TODO: test for declaration + s->loop.init = (Node *)expr(p, lx); + } + + if (nomatch(t, Asemi)) { + errorat(lx->pos, "missing semicolon"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.cond = nil; + else + s->loop.cond = expr(p, lx); + + if (nomatch(t, Asemi)) { + errorat(lx->pos, "missing semicolon"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.step = nil; + else + s->loop.step = expr(p, lx); + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing closing paren after for loop preamble"); + goto Bad; + } + s->loop.body = stmt(p, lx); + break; + + case Kwhile: + t = advance(p, lx); + s->kind = Swhile; + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before while loop conditional"); + goto Bad; + } + s->loop.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(t.pos, "missing closing paren after while loop conditional"); + goto Bad; + } + + s->loop.init = nil; + s->loop.step = nil; + s->loop.body = stmt(p, lx); + break; + + case Kdo: + t = advance(p, lx); + s->kind = Sdo; + s->loop.body = stmt(p, lx); + + if (!iskw(t, Kwhile)) { + errorat(t.pos, "missing while statement conditional after do body"); + goto Bad; + } + t = advance(p, lx); + if (nomatch(t, Alparen)) { + errorat(t.pos, "missing open paren after while conditional"); + goto Bad; + } + + s->loop.init = nil; + s->loop.step = nil; + s->loop.cond = expr(p, lx); + break; + + case Kgoto: + t = advance(p, lx); + s->kind = Sgoto; + if (t.kind != Aident) { + errorat(t.pos, "invalid argument to goto"); + goto Bad; + } + s->jmp.lbl = t.val.s; + t = advance(p, lx); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after goto"); + goto Bad; + } + advance(p, lx); + break; + + case Kcontinue: + t = advance(p, lx); + s->kind = Scontin; + s->jmp.lbl = nil; + s->jmp.x = nil; + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after continue"); + goto Bad; + } + advance(p, lx); + break; + + case Kbreak: + t = advance(p, lx); + s->kind = Sbreak; + s->jmp.lbl = nil; + s->jmp.x = nil; + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after break"); + goto Bad; + } + advance(p, lx); + break; + + case Kreturn: + t = advance(p, lx); + s->kind = Sreturn; + + s->jmp.lbl = nil; + s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx); + + t = peek(p, 0); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after return statement"); + goto Bad; + } + break; + + case Kcase: + t = advance(p, lx); + s->kind = Scase; + s->lbl.x = expr(p, lx); + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after default label"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + case Kdefault: + t = advance(p, lx); + s->kind = Scase; + s->lbl.x = nil; + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after default label"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + default: + panicf("unexpected statement keyword %s", keywords[k]); + } + break; + case Albrace: + s->kind = Sblock; + openscope(p); + if (blkstmt(p, lx, &s)) { + errorat(lx->pos, "failed to parse block statement"); + goto Bad; + } + closescope(p); + break; + + case Asemi: + t = advance(p, lx); + s->kind = Sempty; + break; + + case Aident: + Tlabel: + t = advance(p, lx); + s->kind = Slabel; + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after labelled block"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + default: + Texpr: + t = advance(p, lx); + s->kind = Sexpr; + s->x = expr(p, lx); + } + + s->pos.end = lx->pos; + return (Node *)s; +Bad: + errorat(lx->pos, "failed to parse statement"); + return nil; +} + +static +error +blkstmt(Parser *p, Lexer *lx, Stmt **s) +{ + Token t; + int len; + int cap; + Node **ns; + + alloc(*s); + (*s)->kind = Sblock; + (*s)->pos.beg = lx->pos; + + t = peek(p, 0); + if (nomatch(t, Albrakt)) + goto Bad; + advance(p, lx); + + len = 0, cap = 20; + ns = malloc(cap*sizeof(*ns)); + for (;;) { + if (cap == len) { + cap += 20; + ns = realloc(ns, cap*sizeof(*ns)); + } + ns[len++] = stmt(p, lx); + } + + if (nomatch(t, Arbrakt)) + goto Bad; + + (*s)->pos.end = lx->pos; + (*s)->blk.n = len; + movearray((*s)->blk.item, ns, len); + return 0; +Bad: + errorat(lx->pos, "failed to parse block statement"); + free(ns); + return 1; +} + +// ----------------------------------------------------------------------- +// declarations + +/* see https://en.wikipedia.org/wiki/C_data_types */ +static uint64 validtypes[] = { + Tvoid, + Tbool, + + Tchar, + Tsign | Tchar, + Tunsign | Tchar, + + Tshort, Tshort | Tint, + Tsign | Tshort, Tsign | Tshort | Tint, + Tunsign | Tshort, Tunsign | Tshort | Tint, + + 0, Tint, + Tsign, Tsign | Tint, + Tunsign, Tunsign | Tint, + + Tlong, Tlong | Tint, + Tsign | Tlong, Tsign | Tlong | Tint, + Tunsign | Tlong, Tunsign | Tlong | Tint, + + Tvlong | Tlong, Tvlong | Tlong | Tint, + Tsign | Tvlong | Tlong, Tsign | Tvlong | Tlong | Tint, + Tunsign | Tvlong | Tlong, Tunsign | Tvlong | Tlong | Tint, + + Tfloat, + Tdouble, + Tlong | Tfloat, + + Timag, + Tcmplx, + + Tstruct, Tunion, Tenum, Tname, +}; + +static +error +spec(Parser *p, Lexer *lx, uint64 *spec) +{ + Token t; + int n; + uint64 s, sm; + + s = 0; + while (t = peek(p, 0), t.kind == Akeywd) { + switch (n = t.val.i) { + case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef: + if (s & MaskMem) { + errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]); + goto Bad; + } + break; + + case Kconst: case Kvolatile: + if (s & Bit(n)) + warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]); + break; + + case Ksigned: case Kunsigned: + if (s & MaskSgn) { + if (s & Bit(n)) { + warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]); + break; + } + errorat(lx->pos, "multiple storage class specifiers"); + goto Bad; + } + break; + + case Kshort: + if (s & Tshort) { + warnat(lx->pos, "duplicated short specifier"); + break; + } + break; + + case Klong: + if ((s >> Klong) & 2) { + errorat(lx->pos, "cannot chain three or more long specifiers"); + goto Bad; + } + s += Bit(n); + continue; + + case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble: + if (s & MaskTyp) { + errorat(lx->pos, "more than one base type specified"); + goto Bad; + } + break; + + case Kstruct: case Kunion: + case Kenum: + panicf("need to implement"); + + default: + errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]); + } + + s |= Bit(n); + advance(p, lx); + } + + sm = (((s<<32)>>32) & ~(MaskQul | MaskMem)); + for (n = 0; n < arrlen(validtypes); n++) { + if (sm == validtypes[n]) { + *spec = s; + return 0; + } + } + /* TODO: serialize bitflags to string for nice error message */ + errorat(lx->pos, "invalid type specifier: ''"); +Bad: + errorat(lx->pos, "ignoring specifier"); + *spec = Sbad; + return 1; +} + +/* predeclaration */ +static error dtor(Parser *p, Lexer *lx, Dtor *d); + +static +error +name(Parser *p, Lexer *lx, Name *nm) +{ + int n, k; + Token t; + + t = peek(p, 0); + switch (k = t.kind) { + case Aident: + nm->kind = Nident; + nm->ident = t.val.s; + break; + + case Alparen: + nm->kind = Nparen; + alloc(nm->paren); + if (dtor(p, lx, nm->paren)) { + /* we are using an arena allocator so can't clean up */ + errorat(lx->pos, "invalid declarator in parenthesis"); + nm->paren = nil; + goto Bad; + } + t = peek(p, 0); + if (t.kind != Arparen) { + errorat(lx->pos, "missing closing paren in declarator"); + goto Bad; + } + break; + + default: + errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]); + goto Bad; + } + + t = advance(p, lx); + switch (k = t.kind) { + case Albrakt: + nm->kind |= Nindex; + panicf("need to implement"); + break; + + case Alparen: + nm->kind |= Ncall; + panicf("need to implement"); + break; + + default: + ; + } + + return 0; +Bad: + return 1; +} + +/* pointer kind is partitioned into 8x6 regions */ +static +error +dtor(Parser *p, Lexer *lx, Dtor *d) +{ + int n, k; + error err; + Token t; + Dtor *link; + Ptr *ptr, *x; + + err = 1; + + ptr = &d->ptr; + ptr->kind = 0; + ptr->link = nil; + + t = peek(p, 0); + if (t.kind != Astar) + if (t.kind == Aident || t.kind == Arparen) + goto Name; + goto Bad; +Ptr: + ptr->kind |= Bit(n); + advance(p, lx); +Key: + t = peek(p, 0); + switch (k = t.kind) { + case Akeywd: + if (Kconst <= k && k <= Katomic) + ptr->kind |= Bit(n + (t.val.i - Kconst + 1)); + else { + errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]); + goto Bad; + } + advance(p, lx); + goto Key; + + case Astar: + if (++n >= 8) { + alloc(x); + x->kind = 0; + x->link = nil; + ptr->link = x; + ptr = x; + n = 0; + } + goto Ptr; + + case Aident: + case Alparen: + goto Name; + + default: + errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]); + goto Bad; + } +Name: + return name(p, lx, &d->name); +Bad: + return err; +} + +static +Decl * +decl(Parser *p, Lexer *lx) +{ + Token t; + Decl *d; + Dtor dt, *dtp; + struct Decls **curr; + + alloc(d); + d->kind = 0; + d->pos.beg = lx->pos; + + if (spec(p, lx, &d->spec)) { + errorat(lx->pos, "invalid declaration specifier"); + goto Bad; + } + dtp = &dt; + curr = &d->var.link; +Dtor: + if (dtor(p, lx, dtp)) { + errorat(lx->pos, "invalid declarator"); + goto Bad; + } + + t = peek(p, 0); + if (t.kind == Aeq) { + t = advance(p, lx); + d->kind = (d->kind != Dvars) ? Dvar : Dvars; + d->var.init = expr(p, lx); + } + if (t.kind == Acomma) { + d->kind = Dvars; + alloc(*curr); + dtp = &(*curr)->dtor; + curr = &(*curr)->link; + advance(p, lx); + goto Dtor; + } + + t = peek(p, 0); + if (t.kind == Albrace) { + if (!attop(p)) { + errorat(lx->pos, "nested function declarations"); + goto Bad; + } + if (d->kind != 0) { + errorat(lx->pos, "attempting to define a function for a variable declaration"); + goto Bad; + } + d->kind = Dfunc; + alloc(d->func.body); + openscope(p); + if (blkstmt(p, lx, &d->func.body)) { + errorat(lx->pos, "failed to parse function body"); + goto Bad; + } + closescope(p); + } + + d->pos.end = lx->pos; + declareobj(p, d); + return d; +Bad: + errorat(lx->pos, "failed to parse top level declaration"); + return nil; +} + +// ----------------------------------------------------------------------- +// top level api + +void +parse(Parser *p, Lexer *lx) +{ + Token tok; + p->sp = p->spstk; + + while ((tok = peek(p, 0)), tok.kind > Aeof) { + if (p->ast.len >= p->ast.cap) { + p->ast.cap += 20; + p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls)); + } + p->ast.decls[p->ast.len++] = decl(p, lx); + } +} -- cgit v1.2.1