aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-21 18:53:23 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-21 18:53:23 -0700
commit7d2a1280cd4321d2a3b2fff0b2413085347a7b4d (patch)
tree994c9ebac9c29ff2e5b833ce514d014ca9cdb8b7 /sys
parenta9e3c42ac5696c56adaffbb4007e9d969563337b (diff)
feat: prototype of ast stmt and decl implementations
Diffstat (limited to 'sys')
-rw-r--r--sys/cmd/cc/ast.c822
-rw-r--r--sys/cmd/cc/cc.c5
-rw-r--r--sys/cmd/cc/cc.h259
-rw-r--r--sys/cmd/cc/lex.c13
-rw-r--r--sys/cmd/cc/rules.mk1
5 files changed, 1018 insertions, 82 deletions
diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c
new file mode 100644
index 0000000..f5c0ba2
--- /dev/null
+++ b/sys/cmd/cc/ast.c
@@ -0,0 +1,822 @@
+#include "cc.h"
+#define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr))
+#define movearray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)), free(arr)
+
+#define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk)
+#define peek(p, i) (p->tok[i])
+#define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k))
+#define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0])
+
+#define Bit(i) (1 << (i))
+
+// -----------------------------------------------------------------------
+// helper functions
+
+string
+nameof(Name *n)
+{
+ if (n->kind & Nident)
+ return n->ident;
+ if (n->kind & Nparen)
+ return nameof(&n->paren->name);
+
+ panicf("ill-formed declarator name");
+}
+
+static
+void
+openscope(Parser *p)
+{
+ if (++p->sp == p->spstk + arrlen(p->spstk))
+ panicf("too many nested scopes: crashing");
+}
+
+/*
+ * TODO: save the symbol table with the ast node
+ * write a "copy(move)scope"
+ */
+
+static
+void
+closescope(Parser *p)
+{
+ if (p->sp == p->spstk)
+ panicf("attempting to close the top level scope: crashing");
+
+ forgetall(&p->sp->objs);
+ forgetall(&p->sp->tags);
+ p->sp--;
+}
+
+static
+void
+declareobj(Parser *p, Decl *d)
+{
+ Sym *sym;
+ string ident;
+ struct Decls *link;
+
+ switch (d->kind) {
+ case Dfunc:
+ ident = nameof(&d->func.name);
+ break;
+
+ case Dvar:
+ ident = nameof(&d->var.name);
+ break;
+
+ case Dtype:
+ ident = nameof(&d->type.name);
+ break;
+
+ case Dvars:
+ while (link = d->var.link, link != nil) {
+ ident = nameof(&link->name);
+ sym = lookup(&p->sp->objs, ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident);
+ return;
+ }
+ define(&p->sp->objs, ident, Sobj);
+ }
+ return;
+ default:
+ panicf("unrecognized node kind %d inside declaraction", d->kind);
+
+ }
+
+ sym = lookup(&p->sp->objs, ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident);
+ return;
+ }
+
+ define(&p->sp->objs, ident, Sobj);
+}
+
+static
+Sym *
+declaretag(Parser *p, Type *t)
+{
+ Sym *sym;
+ sym = lookup(&p->sp->tags, t->ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", t->ident);
+ return sym;
+ }
+
+ return define(&p->sp->tags, t->ident, Stype);
+}
+
+static
+Sym *
+lookupobj(Parser *p, string ident)
+{
+ Sym *sym;
+ Scope *it;
+
+ it = p->sp;
+ do {
+ sym = lookup(&it->objs, ident);
+ } while (sym == nil && --it >= p->spstk);
+
+ return sym;
+}
+
+static
+Sym *
+lookuptag(Parser *p, string ident)
+{
+ Sym *sym;
+ Scope *it;
+
+ it = p->sp;
+ do {
+ sym = lookup(&it->tags, ident);
+ } while (sym == nil && --it >= p->spstk);
+
+ return sym;
+}
+
+static
+int
+nomatch(Token t, vlong kind)
+{
+ if (t.kind == kind)
+ return 0;
+
+ errorat(t.pos, "expected token '%s', instead found '%s'", tokens[t.kind], tokens[kind]);
+ return 1;
+}
+
+// -----------------------------------------------------------------------
+// forward declarations
+
+static Decl *decl(Parser *p, Lexer *lx);
+static Expr *expr(Parser *p, Lexer *lx);
+static error blkstmt(Parser *p, Lexer *lx, Stmt **s);
+
+// -----------------------------------------------------------------------
+// expressions
+
+// -----------------------------------------------------------------------
+// statements
+
+static
+struct Node*
+stmt(Parser *p, Lexer *lx)
+{
+ int k;
+ Stmt *s;
+ Sym *sym;
+ Token t;
+
+ t = peek(p, 0);
+ k = t.kind;
+
+ /* intercept decl before allocating a statement */
+ if (k == Aident) {
+ if (peek(p, 1).kind == Acolon)
+ goto Tlabel;
+ sym = lookupobj(p, t.val.s);
+ if (!sym) {
+ errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s);
+ goto Bad;
+ }
+
+ if (sym->kind == Stype)
+ goto Tdecl;
+ if (sym->kind == Sobj)
+ goto Texpr;
+
+ errorat(lx->pos, "bad symbol type used as type identifier");
+ goto Bad;
+ }
+
+ if (k == Akeywd) {
+ if ((Kauto <= k && k <= Katomic) || (Ksigned <= k && k <= Klong)) {
+ Tdecl:
+ return (Node *)decl(p, lx);
+ }
+ }
+
+ alloc(s);
+ s->pos.beg = lx->pos;
+
+ switch (k) {
+ case Akeywd:
+ switch (k = t.val.i) {
+ case Kif:
+ t = advance(p, lx);
+ s->kind = Sif;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before if conditional");
+ goto Bad;
+ }
+ s->br.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after if conditional");
+ goto Bad;
+ }
+ s->br.body = stmt(p, lx);
+
+ t = peek(p, 0);
+ if (iskw(t, Kelse))
+ s->br.orelse = stmt(p, lx);
+ else
+ s->br.orelse = nil;
+
+ break;
+
+ case Kswitch:
+ t = advance(p, lx);
+ s->kind = Sswitch;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before switch conditional");
+ goto Bad;
+ }
+ s->br.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after switch conditional");
+ goto Bad;
+ }
+ s->br.body = stmt(p, lx);
+ s->br.orelse = nil;
+
+ break;
+
+ case Kfor:
+ t = advance(p, lx);
+ s->kind = Sfor;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before for loop preamble");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.init = nil;
+ else {
+ // TODO: test for declaration
+ s->loop.init = (Node *)expr(p, lx);
+ }
+
+ if (nomatch(t, Asemi)) {
+ errorat(lx->pos, "missing semicolon");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.cond = nil;
+ else
+ s->loop.cond = expr(p, lx);
+
+ if (nomatch(t, Asemi)) {
+ errorat(lx->pos, "missing semicolon");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.step = nil;
+ else
+ s->loop.step = expr(p, lx);
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing closing paren after for loop preamble");
+ goto Bad;
+ }
+ s->loop.body = stmt(p, lx);
+ break;
+
+ case Kwhile:
+ t = advance(p, lx);
+ s->kind = Swhile;
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before while loop conditional");
+ goto Bad;
+ }
+ s->loop.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(t.pos, "missing closing paren after while loop conditional");
+ goto Bad;
+ }
+
+ s->loop.init = nil;
+ s->loop.step = nil;
+ s->loop.body = stmt(p, lx);
+ break;
+
+ case Kdo:
+ t = advance(p, lx);
+ s->kind = Sdo;
+ s->loop.body = stmt(p, lx);
+
+ if (!iskw(t, Kwhile)) {
+ errorat(t.pos, "missing while statement conditional after do body");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ if (nomatch(t, Alparen)) {
+ errorat(t.pos, "missing open paren after while conditional");
+ goto Bad;
+ }
+
+ s->loop.init = nil;
+ s->loop.step = nil;
+ s->loop.cond = expr(p, lx);
+ break;
+
+ case Kgoto:
+ t = advance(p, lx);
+ s->kind = Sgoto;
+ if (t.kind != Aident) {
+ errorat(t.pos, "invalid argument to goto");
+ goto Bad;
+ }
+ s->jmp.lbl = t.val.s;
+ t = advance(p, lx);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after goto");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kcontinue:
+ t = advance(p, lx);
+ s->kind = Scontin;
+ s->jmp.lbl = nil;
+ s->jmp.x = nil;
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after continue");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kbreak:
+ t = advance(p, lx);
+ s->kind = Sbreak;
+ s->jmp.lbl = nil;
+ s->jmp.x = nil;
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after break");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kreturn:
+ t = advance(p, lx);
+ s->kind = Sreturn;
+
+ s->jmp.lbl = nil;
+ s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after return statement");
+ goto Bad;
+ }
+ break;
+
+ case Kcase:
+ t = advance(p, lx);
+ s->kind = Scase;
+ s->lbl.x = expr(p, lx);
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after default label");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ case Kdefault:
+ t = advance(p, lx);
+ s->kind = Scase;
+ s->lbl.x = nil;
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after default label");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ default:
+ panicf("unexpected statement keyword %s", keywords[k]);
+ }
+ break;
+ case Albrace:
+ s->kind = Sblock;
+ openscope(p);
+ if (blkstmt(p, lx, &s)) {
+ errorat(lx->pos, "failed to parse block statement");
+ goto Bad;
+ }
+ closescope(p);
+ break;
+
+ case Asemi:
+ t = advance(p, lx);
+ s->kind = Sempty;
+ break;
+
+ case Aident:
+ Tlabel:
+ t = advance(p, lx);
+ s->kind = Slabel;
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after labelled block");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ default:
+ Texpr:
+ t = advance(p, lx);
+ s->kind = Sexpr;
+ s->x = expr(p, lx);
+ }
+
+ s->pos.end = lx->pos;
+ return (Node *)s;
+Bad:
+ errorat(lx->pos, "failed to parse statement");
+ return nil;
+}
+
+static
+error
+blkstmt(Parser *p, Lexer *lx, Stmt **s)
+{
+ Token t;
+ int len;
+ int cap;
+ Node **ns;
+
+ alloc(*s);
+ (*s)->kind = Sblock;
+ (*s)->pos.beg = lx->pos;
+
+ t = peek(p, 0);
+ if (nomatch(t, Albrakt))
+ goto Bad;
+ advance(p, lx);
+
+ len = 0, cap = 20;
+ ns = malloc(cap*sizeof(*ns));
+ for (;;) {
+ if (cap == len) {
+ cap += 20;
+ ns = realloc(ns, cap*sizeof(*ns));
+ }
+ ns[len++] = stmt(p, lx);
+ }
+
+ if (nomatch(t, Arbrakt))
+ goto Bad;
+
+ (*s)->pos.end = lx->pos;
+ (*s)->blk.n = len;
+ movearray((*s)->blk.item, ns, len);
+ return 0;
+Bad:
+ errorat(lx->pos, "failed to parse block statement");
+ free(ns);
+ return 1;
+}
+
+// -----------------------------------------------------------------------
+// declarations
+
+/* see https://en.wikipedia.org/wiki/C_data_types */
+static uint64 validtypes[] = {
+ Tvoid,
+ Tbool,
+
+ Tchar,
+ Tsign | Tchar,
+ Tunsign | Tchar,
+
+ Tshort, Tshort | Tint,
+ Tsign | Tshort, Tsign | Tshort | Tint,
+ Tunsign | Tshort, Tunsign | Tshort | Tint,
+
+ 0, Tint,
+ Tsign, Tsign | Tint,
+ Tunsign, Tunsign | Tint,
+
+ Tlong, Tlong | Tint,
+ Tsign | Tlong, Tsign | Tlong | Tint,
+ Tunsign | Tlong, Tunsign | Tlong | Tint,
+
+ Tvlong | Tlong, Tvlong | Tlong | Tint,
+ Tsign | Tvlong | Tlong, Tsign | Tvlong | Tlong | Tint,
+ Tunsign | Tvlong | Tlong, Tunsign | Tvlong | Tlong | Tint,
+
+ Tfloat,
+ Tdouble,
+ Tlong | Tfloat,
+
+ Timag,
+ Tcmplx,
+
+ Tstruct, Tunion, Tenum, Tname,
+};
+
+static
+error
+spec(Parser *p, Lexer *lx, uint64 *spec)
+{
+ Token t;
+ int n;
+ uint64 s, sm;
+
+ s = 0;
+ while (t = peek(p, 0), t.kind == Akeywd) {
+ switch (n = t.val.i) {
+ case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef:
+ if (s & MaskMem) {
+ errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]);
+ goto Bad;
+ }
+ break;
+
+ case Kconst: case Kvolatile:
+ if (s & Bit(n))
+ warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]);
+ break;
+
+ case Ksigned: case Kunsigned:
+ if (s & MaskSgn) {
+ if (s & Bit(n)) {
+ warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]);
+ break;
+ }
+ errorat(lx->pos, "multiple storage class specifiers");
+ goto Bad;
+ }
+ break;
+
+ case Kshort:
+ if (s & Tshort) {
+ warnat(lx->pos, "duplicated short specifier");
+ break;
+ }
+ break;
+
+ case Klong:
+ if ((s >> Klong) & 2) {
+ errorat(lx->pos, "cannot chain three or more long specifiers");
+ goto Bad;
+ }
+ s += Bit(n);
+ continue;
+
+ case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble:
+ if (s & MaskTyp) {
+ errorat(lx->pos, "more than one base type specified");
+ goto Bad;
+ }
+ break;
+
+ case Kstruct: case Kunion:
+ case Kenum:
+ panicf("need to implement");
+
+ default:
+ errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]);
+ }
+
+ s |= Bit(n);
+ advance(p, lx);
+ }
+
+ sm = (((s<<32)>>32) & ~(MaskQul | MaskMem));
+ for (n = 0; n < arrlen(validtypes); n++) {
+ if (sm == validtypes[n]) {
+ *spec = s;
+ return 0;
+ }
+ }
+ /* TODO: serialize bitflags to string for nice error message */
+ errorat(lx->pos, "invalid type specifier: ''");
+Bad:
+ errorat(lx->pos, "ignoring specifier");
+ *spec = Sbad;
+ return 1;
+}
+
+/* predeclaration */
+static error dtor(Parser *p, Lexer *lx, Dtor *d);
+
+static
+error
+name(Parser *p, Lexer *lx, Name *nm)
+{
+ int n, k;
+ Token t;
+
+ t = peek(p, 0);
+ switch (k = t.kind) {
+ case Aident:
+ nm->kind = Nident;
+ nm->ident = t.val.s;
+ break;
+
+ case Alparen:
+ nm->kind = Nparen;
+ alloc(nm->paren);
+ if (dtor(p, lx, nm->paren)) {
+ /* we are using an arena allocator so can't clean up */
+ errorat(lx->pos, "invalid declarator in parenthesis");
+ nm->paren = nil;
+ goto Bad;
+ }
+ t = peek(p, 0);
+ if (t.kind != Arparen) {
+ errorat(lx->pos, "missing closing paren in declarator");
+ goto Bad;
+ }
+ break;
+
+ default:
+ errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]);
+ goto Bad;
+ }
+
+ t = advance(p, lx);
+ switch (k = t.kind) {
+ case Albrakt:
+ nm->kind |= Nindex;
+ panicf("need to implement");
+ break;
+
+ case Alparen:
+ nm->kind |= Ncall;
+ panicf("need to implement");
+ break;
+
+ default:
+ ;
+ }
+
+ return 0;
+Bad:
+ return 1;
+}
+
+/* pointer kind is partitioned into 8x6 regions */
+static
+error
+dtor(Parser *p, Lexer *lx, Dtor *d)
+{
+ int n, k;
+ error err;
+ Token t;
+ Dtor *link;
+ Ptr *ptr, *x;
+
+ err = 1;
+
+ ptr = &d->ptr;
+ ptr->kind = 0;
+ ptr->link = nil;
+
+ t = peek(p, 0);
+ if (t.kind != Astar)
+ if (t.kind == Aident || t.kind == Arparen)
+ goto Name;
+ goto Bad;
+Ptr:
+ ptr->kind |= Bit(n);
+ advance(p, lx);
+Key:
+ t = peek(p, 0);
+ switch (k = t.kind) {
+ case Akeywd:
+ if (Kconst <= k && k <= Katomic)
+ ptr->kind |= Bit(n + (t.val.i - Kconst + 1));
+ else {
+ errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]);
+ goto Bad;
+ }
+ advance(p, lx);
+ goto Key;
+
+ case Astar:
+ if (++n >= 8) {
+ alloc(x);
+ x->kind = 0;
+ x->link = nil;
+ ptr->link = x;
+ ptr = x;
+ n = 0;
+ }
+ goto Ptr;
+
+ case Aident:
+ case Alparen:
+ goto Name;
+
+ default:
+ errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]);
+ goto Bad;
+ }
+Name:
+ return name(p, lx, &d->name);
+Bad:
+ return err;
+}
+
+static
+Decl *
+decl(Parser *p, Lexer *lx)
+{
+ Token t;
+ Decl *d;
+ Dtor dt, *dtp;
+ struct Decls **curr;
+
+ alloc(d);
+ d->kind = 0;
+ d->pos.beg = lx->pos;
+
+ if (spec(p, lx, &d->spec)) {
+ errorat(lx->pos, "invalid declaration specifier");
+ goto Bad;
+ }
+ dtp = &dt;
+ curr = &d->var.link;
+Dtor:
+ if (dtor(p, lx, dtp)) {
+ errorat(lx->pos, "invalid declarator");
+ goto Bad;
+ }
+
+ t = peek(p, 0);
+ if (t.kind == Aeq) {
+ t = advance(p, lx);
+ d->kind = (d->kind != Dvars) ? Dvar : Dvars;
+ d->var.init = expr(p, lx);
+ }
+ if (t.kind == Acomma) {
+ d->kind = Dvars;
+ alloc(*curr);
+ dtp = &(*curr)->dtor;
+ curr = &(*curr)->link;
+ advance(p, lx);
+ goto Dtor;
+ }
+
+ t = peek(p, 0);
+ if (t.kind == Albrace) {
+ if (!attop(p)) {
+ errorat(lx->pos, "nested function declarations");
+ goto Bad;
+ }
+ if (d->kind != 0) {
+ errorat(lx->pos, "attempting to define a function for a variable declaration");
+ goto Bad;
+ }
+ d->kind = Dfunc;
+ alloc(d->func.body);
+ openscope(p);
+ if (blkstmt(p, lx, &d->func.body)) {
+ errorat(lx->pos, "failed to parse function body");
+ goto Bad;
+ }
+ closescope(p);
+ }
+
+ d->pos.end = lx->pos;
+ declareobj(p, d);
+ return d;
+Bad:
+ errorat(lx->pos, "failed to parse top level declaration");
+ return nil;
+}
+
+// -----------------------------------------------------------------------
+// top level api
+
+void
+parse(Parser *p, Lexer *lx)
+{
+ Token tok;
+ p->sp = p->spstk;
+
+ while ((tok = peek(p, 0)), tok.kind > Aeof) {
+ if (p->ast.len >= p->ast.cap) {
+ p->ast.cap += 20;
+ p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls));
+ }
+ p->ast.decls[p->ast.len++] = decl(p, lx);
+ }
+}
diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c
index fcb9217..cf479f8 100644
--- a/sys/cmd/cc/cc.c
+++ b/sys/cmd/cc/cc.c
@@ -297,9 +297,7 @@ compile(byte *path)
.col = 1,
};
- while (tok = lex(&C.lxr), tok.kind > Aeof) {
- // puttok(tok);
- }
+ parse(&C.psr, &C.lxr);
return tok.kind != Anil;
}
@@ -350,3 +348,4 @@ main(int argc, byte *argv[])
exit(0);
}
+
diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h
index 144eb4a..7c9d679 100644
--- a/sys/cmd/cc/cc.h
+++ b/sys/cmd/cc/cc.h
@@ -15,14 +15,18 @@ typedef struct Lexer Lexer;
typedef struct Sym Sym;
typedef struct Type Type;
+typedef struct Scope Scope;
+typedef struct Parser Parser;
+
+typedef struct Node Node;
typedef struct Ptr Ptr;
typedef struct Name Name;
+typedef struct Dtor Dtor;
typedef struct Decl Decl;
typedef struct Stmt Stmt;
typedef struct Expr Expr;
-/* maps */
typedef struct SymTab SymTab;
typedef struct StrTab StrTab;
@@ -34,21 +38,26 @@ typedef struct Compiler Compiler;
KEYWORD(Kregister,"register") \
KEYWORD(Kstatic,"static") \
KEYWORD(Kextern,"extern") \
+ KEYWORD(Ktls,"thread_local") \
KEYWORD(Ktypedef,"typedef") \
KEYWORD(Kconst,"const") \
KEYWORD(Kvolatile,"volatile") \
KEYWORD(Krestrict,"restrict") \
+ KEYWORD(Katomic,"_Atomic") \
KEYWORD(Kinline,"inline") \
+ KEYWORD(Knoret,"_Noreturn") \
+ KEYWORD(Ksigned,"signed") \
+ KEYWORD(Kunsigned,"unsigned") \
KEYWORD(Kvoid,"void") \
+ KEYWORD(Kbool,"_Bool") \
KEYWORD(Kchar,"char") \
- KEYWORD(Kint,"int") \
KEYWORD(Kfloat,"float") \
KEYWORD(Kdouble,"double") \
- KEYWORD(Ksigned,"signed") \
- KEYWORD(Kunsigned,"unsigned") \
+ KEYWORD(Kcomplex,"complex") \
+ KEYWORD(Kimaginary,"imaginary") \
+ KEYWORD(Kint,"int") \
KEYWORD(Kshort,"short") \
KEYWORD(Klong,"long") \
- KEYWORD(Kvlong,"vlong") \
KEYWORD(Kstruct,"struct") \
KEYWORD(Kunion,"union") \
KEYWORD(Kenum,"enum") \
@@ -201,7 +210,7 @@ extern byte *tokens[NUM_TOKENS];
struct Token
{
uint32 kind;
- Range pos;
+ Pos pos;
union {
byte *s;
double f;
@@ -215,9 +224,12 @@ struct Token
enum
{
- Svar = 1 << 0,
- Sfunc = 1 << 1,
- Smacro = 1 << 2,
+ Sobj = iota(0),
+ Stype = iota(2),
+ Stag = iota(3),
+ Senum = iota(4),
+ Sstmt = iota(5),
+ Smacro = iota(6),
};
struct Sym
@@ -226,7 +238,9 @@ struct Sym
string name;
union {
string macro;
- /*Func *func;*/
+ Decl *obj;
+ Type *tag;
+ Stmt *blk;
};
};
@@ -244,6 +258,7 @@ struct SymTab
Sym *define(SymTab *tab, string ident, int kind);
Sym *lookup(SymTab *tab, string ident);
error forget(SymTab *tab, string ident);
+void forgetall(SymTab *tab);
struct Lexer
{
@@ -276,6 +291,7 @@ void puttok(Token);
// parsing & type resolution
// tokens -> ast
+/* parent data */
struct Node
{
Range pos;
@@ -287,9 +303,9 @@ enum
{
Nbad,
/* labels */
- Slabel, Scase,
+ Sempty, Slabel, Scase,
Sblock,
- Sexpr,
+ Sexpr, Sdecl,
Sselect,
/* loops */
Sfor, Swhile, Sdo,
@@ -317,8 +333,17 @@ enum
/* lists */
Xcomma,
+
Dfunc,
+ Dtype,
Dvar,
+ Dvars,
+
+ /* names (shouldn't interact with base AST node enumeration */
+ Nident = iota(0),
+ Nparen = iota(1),
+ Nindex = iota(2),
+ Ncall = iota(3),
};
/* statements */
@@ -331,7 +356,7 @@ struct Stmt
string ident;
Expr *x;
};
- Stmt *stmt;
+ Node *stmt;
} lbl;
struct {
long n;
@@ -339,13 +364,10 @@ struct Stmt
} blk;
Expr *x;
struct {
- union {
- Expr *x;
- Decl *d;
- } init;
+ Node *init;
Expr *cond;
Expr *step;
- Stmt *body;
+ Node *body;
} loop;
union{
string lbl;
@@ -353,9 +375,9 @@ struct Stmt
} jmp;
struct {
Expr *cond;
- Stmt *body;
- Stmt *orelse;
- } sel;
+ Node *body;
+ Node *orelse;
+ } br;
};
};
@@ -363,6 +385,7 @@ struct Stmt
struct Expr
{
struct Node;
+ uint64 type;
union {
struct {
Expr *l;
@@ -403,83 +426,162 @@ struct Name
{
struct Node;
union {
- string ident;
- struct Dtor *paren;
- struct {
- uint32 kind;
- Name *base;
- union {
- Expr *index;
- /* TODO:
- * func params
- * variadic arrays
- * compound set notation
- */
- };
- } suffix;
+ string ident;
+ struct Dtor *paren;
+ };
+ union {
+ Expr *i;
+ Expr *p;
};
};
struct Dtor
{
- Range pos;
Ptr ptr;
Name name;
};
// specifiers
+/*
+ * the design is the following:
+ * type info is held w/in a 64 bit integer.
+ * the bottom 32 bits are associated to specializations
+ * the top 32 bits index into a type-info array held by the compiler.
+ */
enum
{
/* memory */
- Mauto = iota(0),
- Mtype = iota(1),
- Mstatic = iota(2),
- Mreg = iota(3),
- Mtls = iota(4),
+ Mauto = iota(Kauto),
+ Mstatic = iota(Kstatic),
+ Mreg = iota(Kregister),
+ Mtls = iota(Ktls),
+ Mtype = iota(Ktypedef),
+
+ MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype,
/* qualifiers */
- Qconst = iota(5),
- Qrestr = iota(6),
- Qvoltl = iota(7),
- Qatom = iota(8),
+ Qconst = iota(Kconst),
+ Qrestr = iota(Krestrict),
+ Qvoltl = iota(Kvolatile),
+ Qatom = iota(Katomic),
+
+ MaskQul = Qconst | Qrestr | Qvoltl | Qatom,
- Finlne = iota(9),
- Fnoret = iota(10),
+ Finlne = iota(Kinline),
+ Fnoret = iota(Knoret),
+
+ MaskFcn = Finlne | Fnoret,
/* types */
- Tlong = iota(12),
- Tvlong = iota(13),
- Tsign = iota(14),
- Tunsign = iota(15),
-
- Tvoid = iota(16),
- Tchar = iota(17),
- Tshort = iota(18),
- Tfloat = iota(19),
- Tdouble = iota(20),
- Tcmplx = iota(21),
- Timag = iota(22),
-
- Tatom = iota(23),
-
- Taggr = iota(24),
- Tenum = iota(25),
- Tname = iota(26),
-
- /* alignment */
- Alnas = iota(27),
+ Tsign = iota(Ksigned),
+ Tunsign = iota(Kunsigned),
+
+ MaskSgn = Tsign | Tunsign,
+
+ Tvoid = iota(Kvoid),
+
+ Tfloat = iota(Kfloat),
+ Tdouble = iota(Kdouble),
+ Tcmplx = iota(Kcomplex),
+ Timag = iota(Kimaginary),
+
+ MaskFlt = Tfloat | Tdouble | Tcmplx | Timag,
+
+ Tchar = iota(Kchar),
+ Tbool = iota(Kbool),
+
+ Tshort = iota(Kshort),
+ Tint = iota(Kint),
+ Tlong = iota(Klong),
+ Tvlong = iota(Klong+1),
+
+ MaskInt = Tshort | Tint | Tlong | Tvlong,
+ MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx,
+ /*
+ * NOTE IMPORTANT: vlong takes over the struct bit place
+ * DON'T MOVE KEYWORDS WITHOUT REORGANIZING
+ */
+ Tstruct = iota(Kstruct+1),
+ Tunion = iota(Kunion+1),
+ Tenum = iota(Kenum+1),
+ Tname = iota(Kenum+2),
+
+ Sbad = -1,
+};
+
+struct Decls
+{
+ union {
+ struct Dtor;
+ Dtor dtor;
+ };
+ Expr *init;
+ struct Decls *link;
};
struct Decl
{
struct Node;
+ uint64 spec;
union {
+ struct {
+ struct Dtor;
+ Stmt *body;
+ } func;
+ struct {
+ struct Dtor;
+ Expr *init;
+ struct Decls *link;
+ } var;
+ struct Dtor type;
+ };
+};
+/* types */
+struct Type
+{
+ Sym *sym;
+ string ident;
+ uintptr size;
+ uint16 align;
+ union {
+ struct {
+ int len;
+ Dtor *d;
+ Expr *x;
+ } su;
+ struct {
+ int len;
+ string *s;
+ Expr *x;
+ } en;
};
};
+struct Scope
+{
+ SymTab tags;
+ SymTab objs;
+};
+
+struct Parser
+{
+ Token tok[2];
+ struct {
+ int cap;
+ int len;
+ Decl **decls;
+ } ast;
+
+ Scope *sp;
+ Scope spstk[20];
+};
+
+/* ast.c functions */
+void parse(Parser *, Lexer *);
+
// -----------------------------------------------------------------------
-// compiler
+// global compiler
enum
{
@@ -525,9 +627,18 @@ string internview(byte* beg, byte *end);
/* main data */
struct Compiler
{
+ /* storage */
mem·Arena *heap;
StrTab strs;
+ string outfile;
+
+ struct {
+ int cap;
+ int len;
+ Type *info;
+ } type;
+ /* i/o data */
struct {
int cap;
int len;
@@ -542,15 +653,11 @@ struct Compiler
string *path;
} omit;
+ /* partitioned data for stages */
Lexer lxr;
-
- string outfile;
- struct {
- int cap;
- int len;
- Decl *decls;
- } ast;
+ Parser psr;
};
+
extern Compiler C;
/* cc.c compiler functions */
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
index e35ea7e..a8dabec 100644
--- a/sys/cmd/cc/lex.c
+++ b/sys/cmd/cc/lex.c
@@ -257,7 +257,7 @@ lex(Lexer *lx)
GetByte:
b = getbyte(lx);
Dispatch:
- tok.pos.beg = lx->pos;
+ tok.pos = lx->pos;
if ((b != EOF && b >= RuneSelf) || b == '_')
goto Talpha;
@@ -631,7 +631,9 @@ Dispatch:
n = intern(&tok.val.s);
if (n < arrlen(keywords)) {
- tok.kind = Akeywd;
+ tok.kind = Akeywd;
+ tok.val.i = n;
+ goto Return;
}
sym = lookup(&lx->sym, tok.val.s);
@@ -662,7 +664,6 @@ Dispatch:
Return:
lx->b = lx->buf;
- tok.pos.end = lx->pos;
return tok;
Nospace:
@@ -773,6 +774,12 @@ forget(SymTab *tab, string ident)
return 1;
}
+void
+forgetall(SymTab *tab)
+{
+ MAP_RESET(tab);
+}
+
// -----------------------------------------------------------------------
// error reporting
diff --git a/sys/cmd/cc/rules.mk b/sys/cmd/cc/rules.mk
index b32d5b6..d7a4ac1 100644
--- a/sys/cmd/cc/rules.mk
+++ b/sys/cmd/cc/rules.mk
@@ -5,6 +5,7 @@ include share/push.mk
SRCS_$(d) := \
$(d)/pp.c \
$(d)/lex.c \
+ $(d)/ast.c \
$(d)/cc.c
LIBS_$(d) :=