From 7d2a1280cd4321d2a3b2fff0b2413085347a7b4d Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 21 May 2020 18:53:23 -0700 Subject: feat: prototype of ast stmt and decl implementations --- sys/cmd/cc/ast.c | 822 ++++++++++++++++++++++++++++++++++++++++++++++++++++ sys/cmd/cc/cc.c | 5 +- sys/cmd/cc/cc.h | 259 ++++++++++++----- sys/cmd/cc/lex.c | 13 +- sys/cmd/cc/rules.mk | 1 + 5 files changed, 1018 insertions(+), 82 deletions(-) create mode 100644 sys/cmd/cc/ast.c (limited to 'sys') diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c new file mode 100644 index 0000000..f5c0ba2 --- /dev/null +++ b/sys/cmd/cc/ast.c @@ -0,0 +1,822 @@ +#include "cc.h" +#define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr)) +#define movearray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)), free(arr) + +#define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk) +#define peek(p, i) (p->tok[i]) +#define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k)) +#define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0]) + +#define Bit(i) (1 << (i)) + +// ----------------------------------------------------------------------- +// helper functions + +string +nameof(Name *n) +{ + if (n->kind & Nident) + return n->ident; + if (n->kind & Nparen) + return nameof(&n->paren->name); + + panicf("ill-formed declarator name"); +} + +static +void +openscope(Parser *p) +{ + if (++p->sp == p->spstk + arrlen(p->spstk)) + panicf("too many nested scopes: crashing"); +} + +/* + * TODO: save the symbol table with the ast node + * write a "copy(move)scope" + */ + +static +void +closescope(Parser *p) +{ + if (p->sp == p->spstk) + panicf("attempting to close the top level scope: crashing"); + + forgetall(&p->sp->objs); + forgetall(&p->sp->tags); + p->sp--; +} + +static +void +declareobj(Parser *p, Decl *d) +{ + Sym *sym; + string ident; + struct Decls *link; + + switch (d->kind) { + case Dfunc: + ident = nameof(&d->func.name); + break; + + case Dvar: + ident = nameof(&d->var.name); + break; + + case Dtype: + ident = nameof(&d->type.name); + break; + + case Dvars: + while (link = d->var.link, link != nil) { + ident = nameof(&link->name); + sym = lookup(&p->sp->objs, ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); + return; + } + define(&p->sp->objs, ident, Sobj); + } + return; + default: + panicf("unrecognized node kind %d inside declaraction", d->kind); + + } + + sym = lookup(&p->sp->objs, ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); + return; + } + + define(&p->sp->objs, ident, Sobj); +} + +static +Sym * +declaretag(Parser *p, Type *t) +{ + Sym *sym; + sym = lookup(&p->sp->tags, t->ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", t->ident); + return sym; + } + + return define(&p->sp->tags, t->ident, Stype); +} + +static +Sym * +lookupobj(Parser *p, string ident) +{ + Sym *sym; + Scope *it; + + it = p->sp; + do { + sym = lookup(&it->objs, ident); + } while (sym == nil && --it >= p->spstk); + + return sym; +} + +static +Sym * +lookuptag(Parser *p, string ident) +{ + Sym *sym; + Scope *it; + + it = p->sp; + do { + sym = lookup(&it->tags, ident); + } while (sym == nil && --it >= p->spstk); + + return sym; +} + +static +int +nomatch(Token t, vlong kind) +{ + if (t.kind == kind) + return 0; + + errorat(t.pos, "expected token '%s', instead found '%s'", tokens[t.kind], tokens[kind]); + return 1; +} + +// ----------------------------------------------------------------------- +// forward declarations + +static Decl *decl(Parser *p, Lexer *lx); +static Expr *expr(Parser *p, Lexer *lx); +static error blkstmt(Parser *p, Lexer *lx, Stmt **s); + +// ----------------------------------------------------------------------- +// expressions + +// ----------------------------------------------------------------------- +// statements + +static +struct Node* +stmt(Parser *p, Lexer *lx) +{ + int k; + Stmt *s; + Sym *sym; + Token t; + + t = peek(p, 0); + k = t.kind; + + /* intercept decl before allocating a statement */ + if (k == Aident) { + if (peek(p, 1).kind == Acolon) + goto Tlabel; + sym = lookupobj(p, t.val.s); + if (!sym) { + errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s); + goto Bad; + } + + if (sym->kind == Stype) + goto Tdecl; + if (sym->kind == Sobj) + goto Texpr; + + errorat(lx->pos, "bad symbol type used as type identifier"); + goto Bad; + } + + if (k == Akeywd) { + if ((Kauto <= k && k <= Katomic) || (Ksigned <= k && k <= Klong)) { + Tdecl: + return (Node *)decl(p, lx); + } + } + + alloc(s); + s->pos.beg = lx->pos; + + switch (k) { + case Akeywd: + switch (k = t.val.i) { + case Kif: + t = advance(p, lx); + s->kind = Sif; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before if conditional"); + goto Bad; + } + s->br.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after if conditional"); + goto Bad; + } + s->br.body = stmt(p, lx); + + t = peek(p, 0); + if (iskw(t, Kelse)) + s->br.orelse = stmt(p, lx); + else + s->br.orelse = nil; + + break; + + case Kswitch: + t = advance(p, lx); + s->kind = Sswitch; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before switch conditional"); + goto Bad; + } + s->br.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after switch conditional"); + goto Bad; + } + s->br.body = stmt(p, lx); + s->br.orelse = nil; + + break; + + case Kfor: + t = advance(p, lx); + s->kind = Sfor; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before for loop preamble"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.init = nil; + else { + // TODO: test for declaration + s->loop.init = (Node *)expr(p, lx); + } + + if (nomatch(t, Asemi)) { + errorat(lx->pos, "missing semicolon"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.cond = nil; + else + s->loop.cond = expr(p, lx); + + if (nomatch(t, Asemi)) { + errorat(lx->pos, "missing semicolon"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.step = nil; + else + s->loop.step = expr(p, lx); + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing closing paren after for loop preamble"); + goto Bad; + } + s->loop.body = stmt(p, lx); + break; + + case Kwhile: + t = advance(p, lx); + s->kind = Swhile; + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before while loop conditional"); + goto Bad; + } + s->loop.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(t.pos, "missing closing paren after while loop conditional"); + goto Bad; + } + + s->loop.init = nil; + s->loop.step = nil; + s->loop.body = stmt(p, lx); + break; + + case Kdo: + t = advance(p, lx); + s->kind = Sdo; + s->loop.body = stmt(p, lx); + + if (!iskw(t, Kwhile)) { + errorat(t.pos, "missing while statement conditional after do body"); + goto Bad; + } + t = advance(p, lx); + if (nomatch(t, Alparen)) { + errorat(t.pos, "missing open paren after while conditional"); + goto Bad; + } + + s->loop.init = nil; + s->loop.step = nil; + s->loop.cond = expr(p, lx); + break; + + case Kgoto: + t = advance(p, lx); + s->kind = Sgoto; + if (t.kind != Aident) { + errorat(t.pos, "invalid argument to goto"); + goto Bad; + } + s->jmp.lbl = t.val.s; + t = advance(p, lx); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after goto"); + goto Bad; + } + advance(p, lx); + break; + + case Kcontinue: + t = advance(p, lx); + s->kind = Scontin; + s->jmp.lbl = nil; + s->jmp.x = nil; + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after continue"); + goto Bad; + } + advance(p, lx); + break; + + case Kbreak: + t = advance(p, lx); + s->kind = Sbreak; + s->jmp.lbl = nil; + s->jmp.x = nil; + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after break"); + goto Bad; + } + advance(p, lx); + break; + + case Kreturn: + t = advance(p, lx); + s->kind = Sreturn; + + s->jmp.lbl = nil; + s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx); + + t = peek(p, 0); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after return statement"); + goto Bad; + } + break; + + case Kcase: + t = advance(p, lx); + s->kind = Scase; + s->lbl.x = expr(p, lx); + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after default label"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + case Kdefault: + t = advance(p, lx); + s->kind = Scase; + s->lbl.x = nil; + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after default label"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + default: + panicf("unexpected statement keyword %s", keywords[k]); + } + break; + case Albrace: + s->kind = Sblock; + openscope(p); + if (blkstmt(p, lx, &s)) { + errorat(lx->pos, "failed to parse block statement"); + goto Bad; + } + closescope(p); + break; + + case Asemi: + t = advance(p, lx); + s->kind = Sempty; + break; + + case Aident: + Tlabel: + t = advance(p, lx); + s->kind = Slabel; + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after labelled block"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + default: + Texpr: + t = advance(p, lx); + s->kind = Sexpr; + s->x = expr(p, lx); + } + + s->pos.end = lx->pos; + return (Node *)s; +Bad: + errorat(lx->pos, "failed to parse statement"); + return nil; +} + +static +error +blkstmt(Parser *p, Lexer *lx, Stmt **s) +{ + Token t; + int len; + int cap; + Node **ns; + + alloc(*s); + (*s)->kind = Sblock; + (*s)->pos.beg = lx->pos; + + t = peek(p, 0); + if (nomatch(t, Albrakt)) + goto Bad; + advance(p, lx); + + len = 0, cap = 20; + ns = malloc(cap*sizeof(*ns)); + for (;;) { + if (cap == len) { + cap += 20; + ns = realloc(ns, cap*sizeof(*ns)); + } + ns[len++] = stmt(p, lx); + } + + if (nomatch(t, Arbrakt)) + goto Bad; + + (*s)->pos.end = lx->pos; + (*s)->blk.n = len; + movearray((*s)->blk.item, ns, len); + return 0; +Bad: + errorat(lx->pos, "failed to parse block statement"); + free(ns); + return 1; +} + +// ----------------------------------------------------------------------- +// declarations + +/* see https://en.wikipedia.org/wiki/C_data_types */ +static uint64 validtypes[] = { + Tvoid, + Tbool, + + Tchar, + Tsign | Tchar, + Tunsign | Tchar, + + Tshort, Tshort | Tint, + Tsign | Tshort, Tsign | Tshort | Tint, + Tunsign | Tshort, Tunsign | Tshort | Tint, + + 0, Tint, + Tsign, Tsign | Tint, + Tunsign, Tunsign | Tint, + + Tlong, Tlong | Tint, + Tsign | Tlong, Tsign | Tlong | Tint, + Tunsign | Tlong, Tunsign | Tlong | Tint, + + Tvlong | Tlong, Tvlong | Tlong | Tint, + Tsign | Tvlong | Tlong, Tsign | Tvlong | Tlong | Tint, + Tunsign | Tvlong | Tlong, Tunsign | Tvlong | Tlong | Tint, + + Tfloat, + Tdouble, + Tlong | Tfloat, + + Timag, + Tcmplx, + + Tstruct, Tunion, Tenum, Tname, +}; + +static +error +spec(Parser *p, Lexer *lx, uint64 *spec) +{ + Token t; + int n; + uint64 s, sm; + + s = 0; + while (t = peek(p, 0), t.kind == Akeywd) { + switch (n = t.val.i) { + case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef: + if (s & MaskMem) { + errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]); + goto Bad; + } + break; + + case Kconst: case Kvolatile: + if (s & Bit(n)) + warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]); + break; + + case Ksigned: case Kunsigned: + if (s & MaskSgn) { + if (s & Bit(n)) { + warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]); + break; + } + errorat(lx->pos, "multiple storage class specifiers"); + goto Bad; + } + break; + + case Kshort: + if (s & Tshort) { + warnat(lx->pos, "duplicated short specifier"); + break; + } + break; + + case Klong: + if ((s >> Klong) & 2) { + errorat(lx->pos, "cannot chain three or more long specifiers"); + goto Bad; + } + s += Bit(n); + continue; + + case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble: + if (s & MaskTyp) { + errorat(lx->pos, "more than one base type specified"); + goto Bad; + } + break; + + case Kstruct: case Kunion: + case Kenum: + panicf("need to implement"); + + default: + errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]); + } + + s |= Bit(n); + advance(p, lx); + } + + sm = (((s<<32)>>32) & ~(MaskQul | MaskMem)); + for (n = 0; n < arrlen(validtypes); n++) { + if (sm == validtypes[n]) { + *spec = s; + return 0; + } + } + /* TODO: serialize bitflags to string for nice error message */ + errorat(lx->pos, "invalid type specifier: ''"); +Bad: + errorat(lx->pos, "ignoring specifier"); + *spec = Sbad; + return 1; +} + +/* predeclaration */ +static error dtor(Parser *p, Lexer *lx, Dtor *d); + +static +error +name(Parser *p, Lexer *lx, Name *nm) +{ + int n, k; + Token t; + + t = peek(p, 0); + switch (k = t.kind) { + case Aident: + nm->kind = Nident; + nm->ident = t.val.s; + break; + + case Alparen: + nm->kind = Nparen; + alloc(nm->paren); + if (dtor(p, lx, nm->paren)) { + /* we are using an arena allocator so can't clean up */ + errorat(lx->pos, "invalid declarator in parenthesis"); + nm->paren = nil; + goto Bad; + } + t = peek(p, 0); + if (t.kind != Arparen) { + errorat(lx->pos, "missing closing paren in declarator"); + goto Bad; + } + break; + + default: + errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]); + goto Bad; + } + + t = advance(p, lx); + switch (k = t.kind) { + case Albrakt: + nm->kind |= Nindex; + panicf("need to implement"); + break; + + case Alparen: + nm->kind |= Ncall; + panicf("need to implement"); + break; + + default: + ; + } + + return 0; +Bad: + return 1; +} + +/* pointer kind is partitioned into 8x6 regions */ +static +error +dtor(Parser *p, Lexer *lx, Dtor *d) +{ + int n, k; + error err; + Token t; + Dtor *link; + Ptr *ptr, *x; + + err = 1; + + ptr = &d->ptr; + ptr->kind = 0; + ptr->link = nil; + + t = peek(p, 0); + if (t.kind != Astar) + if (t.kind == Aident || t.kind == Arparen) + goto Name; + goto Bad; +Ptr: + ptr->kind |= Bit(n); + advance(p, lx); +Key: + t = peek(p, 0); + switch (k = t.kind) { + case Akeywd: + if (Kconst <= k && k <= Katomic) + ptr->kind |= Bit(n + (t.val.i - Kconst + 1)); + else { + errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]); + goto Bad; + } + advance(p, lx); + goto Key; + + case Astar: + if (++n >= 8) { + alloc(x); + x->kind = 0; + x->link = nil; + ptr->link = x; + ptr = x; + n = 0; + } + goto Ptr; + + case Aident: + case Alparen: + goto Name; + + default: + errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]); + goto Bad; + } +Name: + return name(p, lx, &d->name); +Bad: + return err; +} + +static +Decl * +decl(Parser *p, Lexer *lx) +{ + Token t; + Decl *d; + Dtor dt, *dtp; + struct Decls **curr; + + alloc(d); + d->kind = 0; + d->pos.beg = lx->pos; + + if (spec(p, lx, &d->spec)) { + errorat(lx->pos, "invalid declaration specifier"); + goto Bad; + } + dtp = &dt; + curr = &d->var.link; +Dtor: + if (dtor(p, lx, dtp)) { + errorat(lx->pos, "invalid declarator"); + goto Bad; + } + + t = peek(p, 0); + if (t.kind == Aeq) { + t = advance(p, lx); + d->kind = (d->kind != Dvars) ? Dvar : Dvars; + d->var.init = expr(p, lx); + } + if (t.kind == Acomma) { + d->kind = Dvars; + alloc(*curr); + dtp = &(*curr)->dtor; + curr = &(*curr)->link; + advance(p, lx); + goto Dtor; + } + + t = peek(p, 0); + if (t.kind == Albrace) { + if (!attop(p)) { + errorat(lx->pos, "nested function declarations"); + goto Bad; + } + if (d->kind != 0) { + errorat(lx->pos, "attempting to define a function for a variable declaration"); + goto Bad; + } + d->kind = Dfunc; + alloc(d->func.body); + openscope(p); + if (blkstmt(p, lx, &d->func.body)) { + errorat(lx->pos, "failed to parse function body"); + goto Bad; + } + closescope(p); + } + + d->pos.end = lx->pos; + declareobj(p, d); + return d; +Bad: + errorat(lx->pos, "failed to parse top level declaration"); + return nil; +} + +// ----------------------------------------------------------------------- +// top level api + +void +parse(Parser *p, Lexer *lx) +{ + Token tok; + p->sp = p->spstk; + + while ((tok = peek(p, 0)), tok.kind > Aeof) { + if (p->ast.len >= p->ast.cap) { + p->ast.cap += 20; + p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls)); + } + p->ast.decls[p->ast.len++] = decl(p, lx); + } +} diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c index fcb9217..cf479f8 100644 --- a/sys/cmd/cc/cc.c +++ b/sys/cmd/cc/cc.c @@ -297,9 +297,7 @@ compile(byte *path) .col = 1, }; - while (tok = lex(&C.lxr), tok.kind > Aeof) { - // puttok(tok); - } + parse(&C.psr, &C.lxr); return tok.kind != Anil; } @@ -350,3 +348,4 @@ main(int argc, byte *argv[]) exit(0); } + diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 144eb4a..7c9d679 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -15,14 +15,18 @@ typedef struct Lexer Lexer; typedef struct Sym Sym; typedef struct Type Type; +typedef struct Scope Scope; +typedef struct Parser Parser; + +typedef struct Node Node; typedef struct Ptr Ptr; typedef struct Name Name; +typedef struct Dtor Dtor; typedef struct Decl Decl; typedef struct Stmt Stmt; typedef struct Expr Expr; -/* maps */ typedef struct SymTab SymTab; typedef struct StrTab StrTab; @@ -34,21 +38,26 @@ typedef struct Compiler Compiler; KEYWORD(Kregister,"register") \ KEYWORD(Kstatic,"static") \ KEYWORD(Kextern,"extern") \ + KEYWORD(Ktls,"thread_local") \ KEYWORD(Ktypedef,"typedef") \ KEYWORD(Kconst,"const") \ KEYWORD(Kvolatile,"volatile") \ KEYWORD(Krestrict,"restrict") \ + KEYWORD(Katomic,"_Atomic") \ KEYWORD(Kinline,"inline") \ + KEYWORD(Knoret,"_Noreturn") \ + KEYWORD(Ksigned,"signed") \ + KEYWORD(Kunsigned,"unsigned") \ KEYWORD(Kvoid,"void") \ + KEYWORD(Kbool,"_Bool") \ KEYWORD(Kchar,"char") \ - KEYWORD(Kint,"int") \ KEYWORD(Kfloat,"float") \ KEYWORD(Kdouble,"double") \ - KEYWORD(Ksigned,"signed") \ - KEYWORD(Kunsigned,"unsigned") \ + KEYWORD(Kcomplex,"complex") \ + KEYWORD(Kimaginary,"imaginary") \ + KEYWORD(Kint,"int") \ KEYWORD(Kshort,"short") \ KEYWORD(Klong,"long") \ - KEYWORD(Kvlong,"vlong") \ KEYWORD(Kstruct,"struct") \ KEYWORD(Kunion,"union") \ KEYWORD(Kenum,"enum") \ @@ -201,7 +210,7 @@ extern byte *tokens[NUM_TOKENS]; struct Token { uint32 kind; - Range pos; + Pos pos; union { byte *s; double f; @@ -215,9 +224,12 @@ struct Token enum { - Svar = 1 << 0, - Sfunc = 1 << 1, - Smacro = 1 << 2, + Sobj = iota(0), + Stype = iota(2), + Stag = iota(3), + Senum = iota(4), + Sstmt = iota(5), + Smacro = iota(6), }; struct Sym @@ -226,7 +238,9 @@ struct Sym string name; union { string macro; - /*Func *func;*/ + Decl *obj; + Type *tag; + Stmt *blk; }; }; @@ -244,6 +258,7 @@ struct SymTab Sym *define(SymTab *tab, string ident, int kind); Sym *lookup(SymTab *tab, string ident); error forget(SymTab *tab, string ident); +void forgetall(SymTab *tab); struct Lexer { @@ -276,6 +291,7 @@ void puttok(Token); // parsing & type resolution // tokens -> ast +/* parent data */ struct Node { Range pos; @@ -287,9 +303,9 @@ enum { Nbad, /* labels */ - Slabel, Scase, + Sempty, Slabel, Scase, Sblock, - Sexpr, + Sexpr, Sdecl, Sselect, /* loops */ Sfor, Swhile, Sdo, @@ -317,8 +333,17 @@ enum /* lists */ Xcomma, + Dfunc, + Dtype, Dvar, + Dvars, + + /* names (shouldn't interact with base AST node enumeration */ + Nident = iota(0), + Nparen = iota(1), + Nindex = iota(2), + Ncall = iota(3), }; /* statements */ @@ -331,7 +356,7 @@ struct Stmt string ident; Expr *x; }; - Stmt *stmt; + Node *stmt; } lbl; struct { long n; @@ -339,13 +364,10 @@ struct Stmt } blk; Expr *x; struct { - union { - Expr *x; - Decl *d; - } init; + Node *init; Expr *cond; Expr *step; - Stmt *body; + Node *body; } loop; union{ string lbl; @@ -353,9 +375,9 @@ struct Stmt } jmp; struct { Expr *cond; - Stmt *body; - Stmt *orelse; - } sel; + Node *body; + Node *orelse; + } br; }; }; @@ -363,6 +385,7 @@ struct Stmt struct Expr { struct Node; + uint64 type; union { struct { Expr *l; @@ -403,83 +426,162 @@ struct Name { struct Node; union { - string ident; - struct Dtor *paren; - struct { - uint32 kind; - Name *base; - union { - Expr *index; - /* TODO: - * func params - * variadic arrays - * compound set notation - */ - }; - } suffix; + string ident; + struct Dtor *paren; + }; + union { + Expr *i; + Expr *p; }; }; struct Dtor { - Range pos; Ptr ptr; Name name; }; // specifiers +/* + * the design is the following: + * type info is held w/in a 64 bit integer. + * the bottom 32 bits are associated to specializations + * the top 32 bits index into a type-info array held by the compiler. + */ enum { /* memory */ - Mauto = iota(0), - Mtype = iota(1), - Mstatic = iota(2), - Mreg = iota(3), - Mtls = iota(4), + Mauto = iota(Kauto), + Mstatic = iota(Kstatic), + Mreg = iota(Kregister), + Mtls = iota(Ktls), + Mtype = iota(Ktypedef), + + MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype, /* qualifiers */ - Qconst = iota(5), - Qrestr = iota(6), - Qvoltl = iota(7), - Qatom = iota(8), + Qconst = iota(Kconst), + Qrestr = iota(Krestrict), + Qvoltl = iota(Kvolatile), + Qatom = iota(Katomic), + + MaskQul = Qconst | Qrestr | Qvoltl | Qatom, - Finlne = iota(9), - Fnoret = iota(10), + Finlne = iota(Kinline), + Fnoret = iota(Knoret), + + MaskFcn = Finlne | Fnoret, /* types */ - Tlong = iota(12), - Tvlong = iota(13), - Tsign = iota(14), - Tunsign = iota(15), - - Tvoid = iota(16), - Tchar = iota(17), - Tshort = iota(18), - Tfloat = iota(19), - Tdouble = iota(20), - Tcmplx = iota(21), - Timag = iota(22), - - Tatom = iota(23), - - Taggr = iota(24), - Tenum = iota(25), - Tname = iota(26), - - /* alignment */ - Alnas = iota(27), + Tsign = iota(Ksigned), + Tunsign = iota(Kunsigned), + + MaskSgn = Tsign | Tunsign, + + Tvoid = iota(Kvoid), + + Tfloat = iota(Kfloat), + Tdouble = iota(Kdouble), + Tcmplx = iota(Kcomplex), + Timag = iota(Kimaginary), + + MaskFlt = Tfloat | Tdouble | Tcmplx | Timag, + + Tchar = iota(Kchar), + Tbool = iota(Kbool), + + Tshort = iota(Kshort), + Tint = iota(Kint), + Tlong = iota(Klong), + Tvlong = iota(Klong+1), + + MaskInt = Tshort | Tint | Tlong | Tvlong, + MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx, + /* + * NOTE IMPORTANT: vlong takes over the struct bit place + * DON'T MOVE KEYWORDS WITHOUT REORGANIZING + */ + Tstruct = iota(Kstruct+1), + Tunion = iota(Kunion+1), + Tenum = iota(Kenum+1), + Tname = iota(Kenum+2), + + Sbad = -1, +}; + +struct Decls +{ + union { + struct Dtor; + Dtor dtor; + }; + Expr *init; + struct Decls *link; }; struct Decl { struct Node; + uint64 spec; union { + struct { + struct Dtor; + Stmt *body; + } func; + struct { + struct Dtor; + Expr *init; + struct Decls *link; + } var; + struct Dtor type; + }; +}; +/* types */ +struct Type +{ + Sym *sym; + string ident; + uintptr size; + uint16 align; + union { + struct { + int len; + Dtor *d; + Expr *x; + } su; + struct { + int len; + string *s; + Expr *x; + } en; }; }; +struct Scope +{ + SymTab tags; + SymTab objs; +}; + +struct Parser +{ + Token tok[2]; + struct { + int cap; + int len; + Decl **decls; + } ast; + + Scope *sp; + Scope spstk[20]; +}; + +/* ast.c functions */ +void parse(Parser *, Lexer *); + // ----------------------------------------------------------------------- -// compiler +// global compiler enum { @@ -525,9 +627,18 @@ string internview(byte* beg, byte *end); /* main data */ struct Compiler { + /* storage */ mem·Arena *heap; StrTab strs; + string outfile; + + struct { + int cap; + int len; + Type *info; + } type; + /* i/o data */ struct { int cap; int len; @@ -542,15 +653,11 @@ struct Compiler string *path; } omit; + /* partitioned data for stages */ Lexer lxr; - - string outfile; - struct { - int cap; - int len; - Decl *decls; - } ast; + Parser psr; }; + extern Compiler C; /* cc.c compiler functions */ diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c index e35ea7e..a8dabec 100644 --- a/sys/cmd/cc/lex.c +++ b/sys/cmd/cc/lex.c @@ -257,7 +257,7 @@ lex(Lexer *lx) GetByte: b = getbyte(lx); Dispatch: - tok.pos.beg = lx->pos; + tok.pos = lx->pos; if ((b != EOF && b >= RuneSelf) || b == '_') goto Talpha; @@ -631,7 +631,9 @@ Dispatch: n = intern(&tok.val.s); if (n < arrlen(keywords)) { - tok.kind = Akeywd; + tok.kind = Akeywd; + tok.val.i = n; + goto Return; } sym = lookup(&lx->sym, tok.val.s); @@ -662,7 +664,6 @@ Dispatch: Return: lx->b = lx->buf; - tok.pos.end = lx->pos; return tok; Nospace: @@ -773,6 +774,12 @@ forget(SymTab *tab, string ident) return 1; } +void +forgetall(SymTab *tab) +{ + MAP_RESET(tab); +} + // ----------------------------------------------------------------------- // error reporting diff --git a/sys/cmd/cc/rules.mk b/sys/cmd/cc/rules.mk index b32d5b6..d7a4ac1 100644 --- a/sys/cmd/cc/rules.mk +++ b/sys/cmd/cc/rules.mk @@ -5,6 +5,7 @@ include share/push.mk SRCS_$(d) := \ $(d)/pp.c \ $(d)/lex.c \ + $(d)/ast.c \ $(d)/cc.c LIBS_$(d) := -- cgit v1.2.1