aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/cc')
-rw-r--r--src/cmd/cc/ast.c2139
-rw-r--r--src/cmd/cc/bits.c114
-rw-r--r--src/cmd/cc/cc.c409
-rw-r--r--src/cmd/cc/cc.h806
-rw-r--r--src/cmd/cc/lex.c873
-rw-r--r--src/cmd/cc/pp.c1125
-rw-r--r--src/cmd/cc/rules.mk21
-rw-r--r--src/cmd/cc/scratch.c7
-rw-r--r--src/cmd/cc/util.c21
9 files changed, 5515 insertions, 0 deletions
diff --git a/src/cmd/cc/ast.c b/src/cmd/cc/ast.c
new file mode 100644
index 0000000..4330bcc
--- /dev/null
+++ b/src/cmd/cc/ast.c
@@ -0,0 +1,2139 @@
+#include "cc.h"
+
+// -----------------------------------------------------------------------
+// helper macros
+
+#define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr))
+#define copyarray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr))
+#define movearray(dst, arr, n) copyarray(dst,arr,n), free(arr)
+
+#define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk)
+#define peek(p, i) (p->tok[i])
+#define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k))
+#define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0])
+
+#define Bit(i) (1 << (i))
+
+// -----------------------------------------------------------------------
+// helper functions
+
+static
+string
+nameof(Name *n)
+{
+ switch (n->kind) {
+ /* 0 corresponds to no state - i.e. an abstract name */
+ case Nnil:
+ return nil;
+ case Nident:
+ return n->ident;
+ case Nparen:
+ return nameof(n->paren->name);
+ case Nindex:
+ case Ncall:
+ return nameof(n->sfx.name);
+ }
+ panicf("unreachable");
+ return nil;
+}
+
+static
+void
+openscope(Parser *p)
+{
+ if (++p->sp >= arrend(p->spstk))
+ panicf("scope stack overflow");
+}
+
+/*
+ * TODO: save the symbol table with the ast node
+ * write a "copy(move)scope"
+ */
+
+static
+void
+closescope(Parser *p)
+{
+ if (p->sp <= p->spstk)
+ panicf("scope stack underflow");
+
+ forgetall(&p->sp->objs);
+ forgetall(&p->sp->tags);
+ p->sp--;
+}
+
+/* temporary stack helpers */
+static
+Name*
+getname(Parser *p)
+{
+ if (p->nm >= arrend(p->nmstk))
+ panicf("name stack overflow");
+ return p->nm++;
+}
+
+static void putdtor(Parser *p, Dtor *dt);
+
+static
+void
+putname(Parser *p, Name *n)
+{
+ if (p->nm <= p->nmstk)
+ panicf("name stack underflow");
+
+ switch (n->kind) {
+ case Nnil:
+ case Nident:
+ break;
+ case Nparen:
+ putdtor(p, n->paren);
+ break;
+ case Nindex:
+ case Ncall:
+ putname(p, n->sfx.name);
+ break;
+ default:
+ panicf("unrecognized name kind");
+ }
+ *p->nm-- = (Name){0};
+}
+
+static
+Ptr*
+getptr(Parser *p)
+{
+ if (p->pt >= arrend(p->ptstk))
+ panicf("pointer stack overflow");
+
+ return p->pt++;
+}
+
+static
+void
+putptr(Parser *p, Ptr *ptr)
+{
+ if (p->pt <= p->ptstk)
+ panicf("pointer stack underflow");
+
+ while ((ptr = ptr->link))
+ putptr(p, ptr);
+
+ *p->pt-- = (Ptr){0};
+}
+
+
+static
+Dtor*
+getdtor(Parser *p)
+{
+ if (p->dt >= arrend(p->dtstk))
+ panicf("dtor stack overflow");
+
+ p->dt->name = getname(p);
+ return p->dt++;
+}
+
+static
+void
+putdtor(Parser *p, Dtor *dt)
+{
+ if (p->dt <= p->dtstk)
+ panicf("dtor stack underflow");
+
+ /* release the pointer overflow if we had to use it */
+ if (p->dt->ptr.link)
+ putptr(p, p->dt->ptr.link);
+
+ /* the dtor could encompass multiple names hierarchically */
+ putname(p, dt->name);
+ *p->dt-- = (Dtor){0};
+}
+
+/* TODO: This will fail for forward declarations */
+static
+void
+declareobj(Parser *p, Decl *d)
+{
+ Sym *sym;
+ string ident;
+ uint32 kind;
+ struct Decls *link;
+
+ switch (d->kind) {
+ case Dfunc:
+ case Dvar:
+ kind = Svar;
+ goto one;
+ case Dtype:
+ kind = Stype;
+ one:
+ ident = d->name;
+ break;
+
+ case Dvars:
+ kind = Svar;
+ goto many;
+ case Dtypes:
+ kind = Stype;
+ many:
+ while (link = &d->list, link != nil) {
+ ident = link->name;
+ sym = lookup(&p->sp->objs, ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident);
+ return;
+ }
+ sym = define(&p->sp->objs, ident, kind);
+ if (kind == Svar)
+ sym->obj = d;
+ else
+ sym->type = d->type;
+ }
+ break;
+
+ default:
+ panicf("unrecognized node kind %d. expected declaration", d->kind);
+ }
+ sym = lookup(&p->sp->objs, ident);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident);
+ return;
+ }
+ sym = define(&p->sp->objs, ident, kind);
+ if (kind == Svar)
+ sym->obj = d;
+ else
+ sym->type = d->type;
+}
+
+/* enters the object identifier space */
+static
+void
+declareenum(Parser *p, int n, string *elts, Expr *vals)
+{
+ int i;
+ Sym *s;
+
+ for (i = 0; i < n; i++) {
+ s = lookup(&p->sp->objs, elts[i]);
+ if (s) {
+ errorat(peek(p, 0).pos, "redeclaration of name %s in object space", elts[i]);
+ continue;
+ }
+ s = define(&p->sp->objs, elts[i], Senum);
+ s->val = vals + i;
+ }
+}
+
+static
+void
+declaretag(Parser *p, uint32 t, string name)
+{
+ Sym *sym;
+ sym = lookup(&p->sp->tags, name);
+ if (sym) {
+ errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", name);
+ return;
+ }
+
+ sym = define(&p->sp->tags, name, Stype);
+ sym->type = t;
+}
+
+static
+Sym *
+lookupobj(Parser *p, string ident)
+{
+ Sym *sym;
+ Scope *it;
+
+ it = p->sp;
+ do {
+ sym = lookup(&it->objs, ident);
+ } while (sym == nil && --it >= p->spstk);
+
+ return sym;
+}
+
+static
+Sym *
+lookuptag(Parser *p, string ident)
+{
+ Sym *sym;
+ Scope *it;
+
+ it = p->sp;
+ do {
+ sym = lookup(&it->tags, ident);
+ } while (sym == nil && --it >= p->spstk);
+
+ return sym;
+}
+
+static
+int
+nomatch(Token t, vlong kind)
+{
+ if (t.kind == kind)
+ return 0;
+
+ if (t.kind == Akeywd)
+ errorat(t.pos, "expected token '%s', instead found keyword '%s'", tokens[kind], keywords[t.val.i]);
+ else
+ errorat(t.pos, "expected token '%s', instead found '%s'", tokens[kind], tokens[t.kind]);
+ return 1;
+}
+
+// -----------------------------------------------------------------------
+// needed forward declarations
+
+static error spec(Parser *, Lexer *, uint64 *);
+static uint32 basetype(Parser *, Lexer *, uint64 *s);
+static string namedecl(Parser *, Lexer *, uint32 *, int);
+static uint32 typename(Parser *, Lexer *, uint32 *);
+
+static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab);
+static uint32 typeofdtor(Dtor *, uint32);
+
+
+static Decl *decl(Parser *, Lexer *);
+
+static Expr *ternary(Parser *, Lexer *);
+static Expr *expr(Parser *, Lexer *);
+
+static error blkstmt(Parser *, Lexer *, Stmt **);
+
+
+// -----------------------------------------------------------------------
+// expressions
+
+#define MAKEX(x, state) alloc((x)), (x)->kind = X##state
+
+static
+Expr*
+primary(Parser *p, Lexer *lx)
+{
+ int k;
+ Expr *x;
+ Token t;
+ Pos b;
+
+ t = peek(p, 0);
+ b = t.pos;
+ switch (k = (t.kind & Vmask)) {
+ case Aident:
+ MAKEX(x, ident);
+ x->pos.beg = b;
+ x->pos.end = lx->pos;
+ x->name = t.val.s;
+ break;
+
+ case Alit:
+ MAKEX(x, lit);
+ x->pos.beg = b;
+ x->pos.end = lx->pos;
+ x->val.kind = t.kind & ~Vmask;
+ x->val.v = t.val;
+ break;
+
+ case Alparen:
+ advance(p, lx);
+ x = expr(p, lx);
+ t = peek(p, 0);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "unterminated paren expression");
+ goto Bad;
+ }
+ break;
+
+ default:
+ panicf("unreachable");
+ }
+
+ advance(p, lx);
+ return x;
+Bad:
+ errorat(lx->pos, "unable to parse operand expression");
+ return nil;
+}
+
+static
+int
+istypename(Parser *p, Token t)
+{
+ Sym *sym;
+
+ if (t.kind == Akeywd && (Kconst <= t.val.i && t.val.i <= Kenum))
+ return 1;
+ if (t.kind == Aident) {
+ sym = lookupobj(p, t.val.s);
+ return (sym != nil) && sym->kind == Stype;
+ }
+
+ return 0;
+}
+
+static Expr* initx(Parser *p, Lexer *lx);
+
+static
+Expr*
+initlist(Parser *p, Lexer *lx)
+{
+ Token t;
+ int c, n;
+ Expr *x, **a;
+ struct Key *k;
+
+ MAKEX(x, initlist);
+ x->pos.beg = lx->pos;
+ x->init.n = 0;
+ if (t.kind == Arbrace) {
+ x->init.k = nil;
+ x->init.v = nil;
+ return x;
+ }
+
+ c = 0;
+ n = 0;
+ a = nil;
+ k = nil;
+Key0:
+ if (n >= c) {
+ c += 20;
+ k = realloc(k, c * sizeof(*k));
+ a = realloc(a, c * sizeof(*a));
+ }
+Key1:
+ switch (t.kind) {
+ case Adot:
+ t = advance(p, lx);
+ if (t.kind != Aident) {
+ errorat(t.pos, "dot designator must be followed by identifier");
+ goto Bad;
+ }
+ k[n++] = (struct Key) {
+ .kind = (uint32)x->init.n,
+ .s = t.val.s,
+ };
+ t = advance(p, lx);
+ goto Key0;
+
+ case Albrakt:
+ t = advance(p, lx);
+ k[n++] = (struct Key) {
+ .kind = (uint32)x->init.n | (1ULL << 32),
+ .x = expr(p, lx),
+ };
+ t = peek(p, 0);
+ goto Key0;
+
+ case Aeq:
+ t = advance(p, lx);
+ /* fallthrough */
+ default:
+ a[x->init.n++] = initx(p, lx);
+
+ t = peek(p, 0);
+ switch (t.kind) {
+ case Arbrace:
+ break;
+ case Acomma:
+ advance(p, lx);
+ /* fallthrough */
+ default:
+ goto Key0;
+ }
+ break;
+
+ case Acomma:
+ t = advance(p, lx);
+ break;
+ }
+ movearray(x->init.k, k, n);
+ movearray(x->init.v, a, x->init.n);
+ return x;
+Bad:
+ errorat(t.pos, "could not parse initializer list");
+ return nil;
+}
+
+static
+Expr*
+initx(Parser *p, Lexer *lx)
+{
+ Expr *x;
+ Token t;
+
+ t = peek(p, 0);
+ if (t.kind != Albrace)
+ return ternary(p, lx);
+
+ advance(p, lx);
+ x = initlist(p, lx);
+ t = peek(p, 0);
+ if (nomatch(t, Arbrace)) {
+ errorat(t.pos, "unmatched brace in initializer list, found %s instead", tokens[t.kind]);
+ advance(p, lx);
+ }
+
+ return x;
+}
+
+static
+Expr*
+postfix(Parser *p, Lexer *lx)
+{
+ Pos b;
+ Token t;
+ int c, n;
+ uint32 type, qual;
+ Expr *x, *y, **a;
+
+ t = peek(p, 0);
+ if (t.kind == Alparen)
+ if (istypename(p, peek(p, 1))) {
+ t = advance(p, lx);
+ type = typename(p, lx, &qual);
+ t = peek(p, 0);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "unmatched paren: found %s instead", tokens[t.kind]);
+ goto Bad;
+ }
+ t = advance(p, lx);
+ if (nomatch(t, Albrace)) {
+ errorat(lx->pos, "bad initializer list: found %s", tokens[t.kind]);
+ goto Bad;
+ }
+
+ x = initlist(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Arbrace)) {
+ errorat(lx->pos, "unmatched brace: found %s instead", tokens[t.kind]);
+ goto Bad;
+ }
+
+ x->type = type;
+ x->qual = qual;
+ return x;
+ }
+
+ x = primary(p, lx);
+ t = peek(p, 0);
+ for (;;) {
+ b = x->pos.beg;
+ switch (t.kind) {
+ case Ainc:
+ MAKEX(y, postinc);
+ goto Postfix;
+ case Adec:
+ MAKEX(y, postdec);
+ Postfix:
+ y->pos.beg = b;
+ y->pos.end = lx->pos;
+ y->unary.post = x;
+ x = y, y = nil;
+ break;
+
+ case Adot:
+ MAKEX(y, self);
+ goto Select;
+ case Aarrow:
+ MAKEX(y, selp);
+ Select:
+ t = advance(p, lx);
+ if (t.kind != Aident) {
+ errorat(t.pos, "invalid operand of selector expression");
+ goto Bad;
+ }
+ y->pos.beg = b;
+ y->pos.end = lx->pos;
+
+ y->idx.f = t.val.s;
+ y->idx.x = x;
+ x = y, y = nil;
+ break;
+
+ case Albrakt:
+ t = advance(p, lx);
+ if (t.kind == Arbrakt) {
+ errorat(t.pos, "empty index expression");
+ goto Bad;
+ }
+ MAKEX(y, index);
+ y->idx.x = x;
+ y->idx.i = expr(p, lx);
+
+ t = peek(p, 0);
+ if (t.kind != Albrakt) {
+ errorat(t.pos, "malformed index expression");
+ goto Bad;
+ }
+
+ x = y, y = nil;
+ break;
+
+ case Alparen:
+ t = advance(p, lx);
+ MAKEX(y, call);
+ y->call.fn = x;
+ y->pos.beg = b;
+ y->call.n = 0;
+ if (t.kind == Arparen) {
+ y->call.arg = nil;
+ goto Endfunc;
+ }
+ c = 0;
+ a = nil;
+ Arg:
+ if (y->call.n >= c) {
+ c += 20;
+ a = realloc(a, c * sizeof(*a));
+ }
+ a[y->call.n++] = expr(p, lx);
+ t = peek(p, 0);
+ if (t.kind == Acomma) {
+ advance(p, lx);
+ goto Arg;
+ }
+ if (t.kind != Arparen) {
+ errorat(t.pos, "invalid token '%s' found in call argument");
+ goto Bad;
+ }
+ movearray(y->call.arg, a, y->call.n);
+ Endfunc:
+ y->pos.end = lx->pos;
+ x = y, y = nil;
+ break;
+
+ default:
+ return x;
+ }
+ t = advance(p, lx);
+ }
+ return x;
+Bad:
+ errorat(lx->pos, "failed to parse primary expression");
+ return nil;
+}
+
+static
+uint32
+typename(Parser *p, Lexer *lx, uint32 *spec)
+{
+ uint32 base;
+ uint64 s;
+
+ base = basetype(p, lx, &s);
+ if (!base) {
+ errorat(lx->pos, "failed to parse type name specifiers");
+ return 0;
+ }
+ *spec = (uint32)s;
+ namedecl(p, lx, &base, 1);
+
+ return base;
+}
+
+static Expr* cast(Parser *p, Lexer *lx);
+
+static
+Expr*
+unary(Parser *p, Lexer *lx)
+{
+ Expr *x;
+ Token t;
+
+ t = peek(p, 0);
+ switch (t.kind) {
+ case Ainc: MAKEX(x, preinc); goto Prefix;
+ case Adec: MAKEX(x, predec); /* fallthrough */
+ Prefix:
+ advance(p, lx);
+ x->pos.beg = t.pos;
+ x->unary.pre = unary(p, lx);
+ x->pos.end = x->unary.pre->pos.end;
+ return x;
+
+ case Aneg: MAKEX(x, neg); goto Unary;
+ case Aand: MAKEX(x, ref); goto Unary;
+ case Anot: MAKEX(x, not); goto Unary;
+ case Astar: MAKEX(x, star); goto Unary;
+ case Aadd: MAKEX(x, plus); goto Unary;
+ case Asub: MAKEX(x, minus); /* fallthrough */
+ Unary:
+ advance(p, lx);
+ x->pos.beg = t.pos;
+ x->unary.pre = cast(p, lx);
+ x->pos.end = x->unary.pre->pos.end;
+ return x;
+
+ case Akeywd:
+ switch (t.val.i) {
+ case Ksizeof:
+ MAKEX(x, sizeof);
+ goto Key;
+ case Kalignof:
+ MAKEX(x, alignof);
+ /* fallthrough */
+ Key:
+ t = advance(p, lx);
+ if (t.kind == Alparen)
+ if (istypename(p, peek(p, 1))) {
+ t = advance(p, lx);
+ x->info.type = 0;
+ x->info.of.type = typename(p, lx, &x->info.of.qual);
+
+ t = peek(p, 0);
+ if (nomatch(t, Arparen)) {
+ errorat(t.pos, "missing paren for size/alignof statement");
+ goto Bad;
+ }
+ advance(p, lx);
+ return x;
+ }
+
+ x->info.type = 1;
+ x->info.x = unary(p, lx);
+ return x;
+
+ default:
+ ;
+ }
+ /* fallthrough */
+ default:
+ return postfix(p, lx);
+ }
+Bad:
+ return nil;
+}
+
+static
+Expr*
+cast(Parser *p, Lexer *lx)
+{
+ Expr *x;
+ Token t;
+
+ t = peek(p, 0);
+ if (t.kind == Alparen && istypename(p, peek(p,1))) {
+ t = advance(p, lx);
+ MAKEX(x, cast);
+
+ x->pos.beg = t.pos;
+ x->cast.to.type = typename(p, lx, &x->cast.to.qual);
+ if (!x->cast.to.type) {
+ errorat(lx->pos, "invalid type operand of cast");
+ goto Bad;
+ }
+
+ t = peek(p, 0);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after cast expression");
+ goto Bad;
+ }
+ advance(p, lx);
+
+ x->cast.x = cast(p, lx);
+ x->pos.beg = lx->pos;
+ return x;
+ }
+ return unary(p, lx);
+
+Bad:
+ errorat(lx->pos, "failed to parse cast expression");
+ return nil;
+}
+
+/* static data for binary operators */
+#define OPERATORS \
+ OPERATOR(Astar, 10, Xmul) \
+ OPERATOR(Adiv, 10, Xdiv) \
+ OPERATOR(Amod, 10, Xmod) \
+ OPERATOR(Aadd, 9, Xadd) \
+ OPERATOR(Asub, 9, Xsub) \
+ OPERATOR(Alsft, 8, Xlsft) \
+ OPERATOR(Arsft, 8, Xrsft) \
+ OPERATOR(Agteq, 7, Xgteq) \
+ OPERATOR(Alteq, 7, Xlteq) \
+ OPERATOR(Alt, 7, Xlt) \
+ OPERATOR(Agt, 7, Xgt) \
+ OPERATOR(Aeq, 6, Xeql) \
+ OPERATOR(Aneq, 6, Xneq) \
+ OPERATOR(Aand, 5, Xand) \
+ OPERATOR(Axor, 4, Xxor) \
+ OPERATOR(Aor, 3, Xor) \
+ OPERATOR(Aandand, 2, Xandand) \
+ OPERATOR(Aoror, 1, Xoror)
+
+static int prectab[NUM_TOKENS] =
+{
+#define OPERATOR(a, b, c) [a] = b,
+ OPERATORS
+#undef OPERATOR
+};
+
+static int optab[NUM_TOKENS] =
+{
+#define OPERATOR(a, b, c) [a] = c,
+ OPERATORS
+#undef OPERATOR
+};
+#undef OPERATORS
+
+static
+Expr*
+binary(Parser *p, Lexer *lx, int prec)
+{
+ Token t;
+ int k, np;
+ Expr *l, *x;
+
+ l = cast(p, lx);
+ for (;;) {
+ t = peek(p, 0);
+ k = t.kind;
+ np = prectab[k];
+ if (np < prec)
+ return l;
+
+ alloc(x);
+ t = advance(p, lx);
+
+ x->pos.beg = l->pos.beg;
+ x->kind = optab[k];
+ x->binary.l = l;
+ x->binary.r = binary(p, lx, np + 1);
+ x->pos.end = x->binary.r->pos.end;
+
+ l = x;
+ }
+ return l;
+Bad:
+ errorat(t.pos, "failed to parse expression");
+ return nil;
+}
+
+static
+Expr*
+ternary(Parser *p, Lexer *lx)
+{
+ Pos b;
+ Token t;
+ Expr *x, *y;
+
+ x = binary(p, lx, 1);
+ t = peek(p, 0);
+ b = t.pos;
+
+ switch (t.kind) {
+ case Aqmark:
+ t = advance(p, lx);
+ y = x;
+ MAKEX(x, ternary);
+ x->pos.beg = b;
+ x->kind = Xternary;
+ x->cond.c = y;
+ x->cond.t = expr(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "ternary expression missing ':'");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ x->cond.e = expr(p, lx);
+ x->pos.end = lx->pos;
+ break;
+
+ case Aasn: MAKEX(y, asn); goto Assign;
+ case Aorasn: MAKEX(y, orasn); goto Assign;
+ case Axorasn: MAKEX(y, xorasn); goto Assign;
+ case Aandasn: MAKEX(y, andasn); goto Assign;
+ case Asubasn: MAKEX(y, subasn); goto Assign;
+ case Amulasn: MAKEX(y, mulasn); goto Assign;
+ case Adivasn: MAKEX(y, divasn); goto Assign;
+ case Amodasn: MAKEX(y, modasn); goto Assign;
+ case Alsftasn: MAKEX(y, lsftasn); goto Assign;
+ case Arsftasn: MAKEX(y, rsftasn); goto Assign;
+ Assign:
+ advance(p, lx);
+
+ y->asn.l = x;
+ y->asn.r = ternary(p, lx);
+ x = y;
+ x->pos.beg = b;
+ x->pos.end = lx->pos;
+ break;
+ default:
+ ;
+ }
+
+ return x;
+Bad:
+ errorat(lx->pos, "failing expression parse");
+ return nil;
+}
+
+static
+Expr*
+expr(Parser *p, Lexer *lx)
+{
+ Pos b;
+ Token t;
+ Expr *x, *y;
+
+ x = ternary(p, lx);
+ while (t = peek(p, 0), t.kind == Acomma) {
+ advance(p, lx);
+ y = x;
+ MAKEX(x, comma);
+ x->pos.beg = y->pos.beg;
+ x->comma.x[0] = y;
+ x->comma.x[1] = ternary(p, lx);
+ x->pos.end = lx->pos;
+ y = nil;
+ }
+
+ return x;
+}
+
+// -----------------------------------------------------------------------
+// statements
+
+static
+struct Node*
+stmt(Parser *p, Lexer *lx)
+{
+ int k;
+ Stmt *s;
+ Sym *sym;
+ Token t;
+
+ t = peek(p, 0);
+ k = t.kind;
+
+ /* intercept decl before allocating a statement */
+ if (k == Aident) {
+ if (peek(p, 1).kind == Acolon)
+ goto Tlabel;
+ sym = lookupobj(p, t.val.s);
+ if (!sym) {
+ errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s);
+ goto Bad;
+ }
+
+ if (sym->kind == Stype)
+ goto Tdecl;
+ if (sym->kind == Svar) {
+ alloc(s);
+ s->pos.beg = lx->pos;
+ goto Texpr;
+ }
+
+ errorat(lx->pos, "bad symbol type used as type identifier");
+ goto Bad;
+ }
+
+ if (k == Akeywd) {
+ if ((Kauto <= t.val.i && t.val.i <= Ktypedef) || (Kconst <= t.val.i && t.val.i <= Kenum)) {
+ Tdecl:
+ return (Node *)decl(p, lx);
+ }
+ }
+
+ alloc(s);
+ s->pos.beg = lx->pos;
+
+ switch (k) {
+ case Akeywd:
+ switch (k = t.val.i) {
+ case Kif:
+ t = advance(p, lx);
+ s->kind = Sif;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before if conditional");
+ goto Bad;
+ }
+ s->br.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after if conditional");
+ goto Bad;
+ }
+ s->br.body = stmt(p, lx);
+
+ t = peek(p, 0);
+ if (iskw(t, Kelse))
+ s->br.orelse = stmt(p, lx);
+ else
+ s->br.orelse = nil;
+
+ break;
+
+ case Kswitch:
+ t = advance(p, lx);
+ s->kind = Sswitch;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before switch conditional");
+ goto Bad;
+ }
+ s->br.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(lx->pos, "missing closing paren after switch conditional");
+ goto Bad;
+ }
+ s->br.body = stmt(p, lx);
+ s->br.orelse = nil;
+
+ break;
+
+ case Kfor:
+ t = advance(p, lx);
+ s->kind = Sfor;
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before for loop preamble");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.init = nil;
+ else {
+ // TODO: test for declaration
+ s->loop.init = (Node *)expr(p, lx);
+ }
+
+ if (nomatch(t, Asemi)) {
+ errorat(lx->pos, "missing semicolon");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.cond = nil;
+ else
+ s->loop.cond = expr(p, lx);
+
+ if (nomatch(t, Asemi)) {
+ errorat(lx->pos, "missing semicolon");
+ goto Bad;
+ }
+
+ if (t.kind == Asemi)
+ s->loop.step = nil;
+ else
+ s->loop.step = expr(p, lx);
+
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing closing paren after for loop preamble");
+ goto Bad;
+ }
+ s->loop.body = stmt(p, lx);
+ break;
+
+ case Kwhile:
+ t = advance(p, lx);
+ s->kind = Swhile;
+ if (nomatch(t, Alparen)) {
+ errorat(lx->pos, "missing opening paren before while loop conditional");
+ goto Bad;
+ }
+ s->loop.cond = expr(p, lx);
+ if (nomatch(t, Arparen)) {
+ errorat(t.pos, "missing closing paren after while loop conditional");
+ goto Bad;
+ }
+
+ s->loop.init = nil;
+ s->loop.step = nil;
+ s->loop.body = stmt(p, lx);
+ break;
+
+ case Kdo:
+ t = advance(p, lx);
+ s->kind = Sdo;
+ s->loop.body = stmt(p, lx);
+
+ if (!iskw(t, Kwhile)) {
+ errorat(t.pos, "missing while statement conditional after do body");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ if (nomatch(t, Alparen)) {
+ errorat(t.pos, "missing open paren after while conditional");
+ goto Bad;
+ }
+
+ s->loop.init = nil;
+ s->loop.step = nil;
+ s->loop.cond = expr(p, lx);
+ break;
+
+ case Kgoto:
+ t = advance(p, lx);
+ s->kind = Sgoto;
+ if (t.kind != Aident) {
+ errorat(t.pos, "invalid argument to goto");
+ goto Bad;
+ }
+ s->jmp.lbl = t.val.s;
+ t = advance(p, lx);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after goto");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kcontinue:
+ t = advance(p, lx);
+ s->kind = Scontin;
+ s->jmp.lbl = nil;
+ s->jmp.x = nil;
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after continue");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kbreak:
+ t = advance(p, lx);
+ s->kind = Sbreak;
+ s->jmp.lbl = nil;
+ s->jmp.x = nil;
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after break");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kreturn:
+ t = advance(p, lx);
+ s->kind = Sreturn;
+
+ s->jmp.lbl = nil;
+ s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after return statement");
+ goto Bad;
+ }
+ advance(p, lx);
+ break;
+
+ case Kcase:
+ t = advance(p, lx);
+ s->kind = Scase;
+ s->lbl.x = expr(p, lx);
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after default label");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ case Kdefault:
+ t = advance(p, lx);
+ s->kind = Scase;
+ s->lbl.x = nil;
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after default label");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ default:
+ panicf("unexpected statement keyword %s", keywords[k]);
+ }
+ break;
+ case Albrace:
+ s->kind = Sblock;
+ openscope(p);
+ if (blkstmt(p, lx, &s)) {
+ errorat(lx->pos, "failed to parse block statement");
+ goto Bad;
+ }
+ closescope(p);
+ break;
+
+ case Asemi:
+ t = advance(p, lx);
+ s->kind = Sempty;
+ break;
+
+ case Aident:
+ Tlabel:
+ t = advance(p, lx);
+ s->kind = Slabel;
+ if (nomatch(t, Acolon)) {
+ errorat(t.pos, "missing colon after labelled block");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ s->lbl.stmt = stmt(p, lx);
+ break;
+
+ default:
+ Texpr:
+ s->kind = Sexpr;
+ s->x = expr(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "missing semicolon after statement expression");
+ goto Bad;
+ }
+ advance(p, lx);
+ }
+
+ s->pos.end = lx->pos;
+ return (Node *)s;
+Bad:
+ errorat(lx->pos, "failed to parse statement");
+ return nil;
+}
+
+static
+error
+blkstmt(Parser *p, Lexer *lx, Stmt **s)
+{
+ Token t;
+ int len;
+ int cap;
+ Node **ns;
+
+ alloc(*s);
+ (*s)->kind = Sblock;
+ (*s)->pos.beg = lx->pos;
+
+ t = peek(p, 0);
+ if (nomatch(t, Albrace))
+ goto Bad;
+ t = advance(p, lx);
+
+ len = 0, cap = 20;
+ ns = malloc(cap*sizeof(*ns));
+ while (t.kind != Arbrace) {
+ if (cap == len) {
+ cap += 20;
+ ns = realloc(ns, cap*sizeof(*ns));
+ }
+ ns[len++] = stmt(p, lx);
+ t = peek(p, 0);
+ }
+ advance(p, lx);
+
+ (*s)->pos.end = lx->pos;
+ (*s)->blk.n = len;
+ movearray((*s)->blk.item, ns, len);
+ return 0;
+Bad:
+ errorat(lx->pos, "failed to parse block statement");
+ free(ns);
+ return 1;
+}
+
+// -----------------------------------------------------------------------
+// types
+
+uint32
+ptrtype(uint32 base, uint32 qual)
+{
+ uint32 i;
+ Type *t;
+
+ i = type();
+ t = C.type.info + i;
+ t->kind = Tptr;
+ t->ptr.base = base;
+ t->ptr.qual = qual;
+ t->size = pointer.size;
+ t->align = pointer.align;
+ t->sign = pointer.sign;
+
+ return i;
+}
+
+uint32
+arraytype(uint32 base, uint32 qual, Expr *ix)
+{
+ int i, n;
+ Type *t;
+
+ /* TODO: evaluate the length */
+ n = 10;
+ i = type();
+ t = C.type.info + i;
+ t->kind = Tarray;
+ t->ptr.base = base;
+ t->size = n * C.type.info[base].size;
+ t->align = C.type.info[base].align;
+ t->sign = 0;
+
+ return i;
+}
+
+uint32
+functype(uint32 ret, int n, Field *args, int dots)
+{
+ uint32 i;
+ Type *t;
+
+ i = type();
+ t = C.type.info + i;
+ t->kind = Tfunc;
+ t->size = pointer.size;
+ t->align = pointer.align;
+ t->sign = pointer.sign;
+
+ t->func.ret = ret;
+ t->func.n = n;
+ t->func.arg = args;
+ t->func.dots = dots;
+
+ return i;
+}
+
+#define ALIGN_DOWN(n, a) ((n) & ~((a)-1))
+#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a))
+uint32
+structtype(int n, Field *field, Expr *bits)
+{
+ uint32 i;
+ Type *t;
+ Field *f, *e;
+
+ i = type();
+ t = C.type.info + i;
+ t->kind = Tstruct;
+ t->size = 0;
+ t->align = 0;
+ for (f = field, e = field+n; f != e; ++f) {
+ t->size += C.type.info[f->type].size + ALIGN_UP(t->size, C.type.info[f->type].align);
+ t->align = MAX(t->align, C.type.info[f->type].align);
+ }
+ t->aggr.len = n;
+ t->aggr.f = field;
+ t->aggr.x = bits;
+
+ return i;
+}
+
+uint32
+uniontype(int n, Field *field, Expr *bits)
+{
+ uint32 i;
+ Type *t;
+ Field *f, *e;
+
+ i = type();
+ t = C.type.info + i;
+ t->kind = Tstruct;
+ t->size = 0;
+ t->align = 0;
+ for (f = field, e = field+n; f != e; ++f) {
+ t->size = MAX(t->size, C.type.info[f->type].size);
+ t->align = MAX(t->align, C.type.info[f->type].align);
+ }
+ t->aggr.len = n;
+ t->aggr.f = field;
+ t->aggr.x = bits;
+
+ return i;
+}
+
+uint32
+enumtype(int n, string *elts, Expr *vals)
+{
+ uint32 i;
+ Type *t;
+ Field *f, *e;
+
+ i = type();
+ t = C.type.info + i;
+ t->kind = Tenum;
+ /* TODO: dont hardcode int32 */
+ t->size = 4;
+ t->align = 4;
+ t->enm.len = n;
+ t->enm.elt = elts;
+ t->enm.val = vals;
+
+ return i;
+}
+#undef ALIGN_UP
+#undef ALIGN_DOWN
+
+/* unpacking C declarations into sensible types */
+static
+uint32
+typeofname(Name *name, uint32 base)
+{
+ switch (name->kind) {
+ /* Nnil corresponds to an abstract declarator (i.e. no identifier) */
+ case Nnil:
+ case Nident:
+ return base;
+ case Nparen:
+ return typeofdtor(name->paren, base);
+ case Nindex:
+ return typeofname(name->sfx.name, arraytype(base, name->sfx.idx.q, name->sfx.idx.x));
+ case Ncall:
+ return typeofname(name->sfx.name, functype(base, name->sfx.call.n, name->sfx.call.arg, name->sfx.call.dots));
+ default:
+ panicf("unreachable");
+ }
+ return 0;
+}
+
+static
+uint32
+typeofdtor(Dtor *decl, uint32 base)
+{
+ int n;
+ Ptr *p;
+ uint64 b, tmp;
+
+ n = 0;
+ p = &decl->ptr;
+ b = p->kind;
+ while (b & 1) {
+ base = ptrtype(base, b >> 1);
+ if (++n >= 8) {
+ p = p->link;
+ b = p->kind;
+ } else {
+ b >>= 6;
+ }
+ }
+
+ return typeofname(decl->name, base);
+}
+
+static
+uint32
+basetype(Parser *p, Lexer *lx, uint64 *s)
+{
+ int n;
+ uint64 m;
+
+ if (spec(p, lx, s)) {
+ errorat(lx->pos, "failed to parse type specifier");
+ return 0;
+ }
+
+ m = (((*s<<32)>>32) & ~(MaskQul|MaskMem|MaskFcn));
+ for (n = 0; n < arrlen(validtypespec); n++) {
+ if (validtypespec[n] == m) {
+ if (indextypespec[n] < 0) {
+ m = *s >> 32;
+ if (!m) {
+ errorat(lx->pos, "not a valid type identifier");
+ return 0;
+ }
+ return m;
+ }
+ return indextypespec[n];
+ }
+ }
+
+ errorat(lx->pos, "invalid type specifier");
+ return 0;
+}
+
+static
+string
+namedecl(Parser *p, Lexer *lx, uint32 *base, int noname)
+{
+ Dtor *dt;
+ string name;
+ Type *t;
+
+ dt = getdtor(p);
+ name = nil;
+ if (dtor(p, lx, dt, noname)) {
+ errorat(lx->pos, "invalid declarator");
+ goto End;
+ }
+ if (!noname || noname == 2 && dt->name->kind)
+ name = nameof(dt->name);
+
+ *base = typeofdtor(dt, *base);
+ putdtor(p, dt);
+ return name;
+End:
+ putdtor(p, dt);
+ return nil;
+}
+
+// -----------------------------------------------------------------------
+// declarations
+
+static
+uint32
+enumerate(Parser *p, Lexer *lx, string name, int kind)
+{
+ int i, n;
+ uint64 s;
+ uint32 t;
+ Token tk;
+ /* TODO: think of a better soln */
+ string nm[1024], *elts;
+ Expr *cx[1024], *vals;
+
+ for (n = 0; tk.kind != Arbrace && n < arrlen(nm); n++) {
+ if (tk.kind != Aident) {
+ errorat(tk.pos, "invalid token %s in enum declaration", tokens[tk.kind]);
+ goto Bad;
+ }
+ nm[n] = tk.val.s;
+ cx[n] = nil;
+
+ tk = advance(p, lx);
+ switch(tk.kind) {
+ case Aeq:
+ advance(p, lx);
+ cx[n] = expr(p, lx);
+ tk = peek(p, 0);
+ if (tk.kind != Acomma)
+ continue;
+ /* fallthrough */
+ case Acomma:
+ tk = advance(p, lx);
+ }
+ }
+ copyarray(elts, nm, n);
+ copyarray(vals, cx, n);
+
+ t = enumtype(n, elts, vals);
+ declareenum(p, n, elts, vals);
+ return t;
+Bad:
+ errorat(tk.pos, "failed to parse enum declaration");
+ return 0;
+}
+
+static
+uint32
+aggregate(Parser *p, Lexer *lx, string name, int kind)
+{
+ int n;
+ uint64 s;
+ Token tk;
+ /* TODO: think of a better soln */
+ static Field fs[1024];
+ Field *f;
+ static Expr *cx[1024];
+ Expr *x;
+
+ for (n = 0, tk = peek(p, 0); tk.kind != Arbrace && n < arrlen(fs); n++) {
+ fs[n].type = basetype(p, lx, &s);
+ fs[n].qual = (uint32)(s & ~(MaskTyp|MaskInt|MaskFlt));
+ Field:
+ fs[n].name = namedecl(p, lx, &fs[n].type, 0);
+ tk = peek(p, 0);
+ switch (tk.kind) {
+ case Acolon:
+ advance(p, lx);
+ cx[n] = expr(p, lx);
+ tk = peek(p, 0);
+ if (tk.kind == Asemi) {
+ tk = advance(p, lx);
+ continue;
+ }
+ if (tk.kind != Acomma) {
+ errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]);
+ goto Bad;
+ }
+ /* fallthrough */
+ case Acomma:
+ advance(p, lx);
+ n++;
+ goto Field;
+
+ case Asemi:
+ tk = advance(p, lx);
+ continue;
+
+ default:
+ errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]);
+ goto Bad;
+ }
+ }
+ copyarray(f, fs, n);
+ copyarray(x, cx, n);
+ return (kind == Tstruct) ? structtype(n, f, x) : uniontype(n, f, x);
+Bad:
+ errorat(tk.pos, "failed to parse aggregate declaration");
+ return 0;
+}
+
+static
+error
+spec(Parser *p, Lexer *lx, uint64 *spec)
+{
+ Token t;
+ int n, i;
+ Sym *typ;
+ string name;
+ uint32 tag;
+ uint64 s, sm;
+ static uint32 (*aggrfunc[2])(Parser *, Lexer *, string , int) = {aggregate, enumerate};
+
+ s = 0;
+ while (t = peek(p, 0), t.kind >= Aident) {
+ /* typename */
+ if (t.kind == Aident) {
+ typ = lookupobj(p, t.val.s);
+ if (!typ || (typ && typ->kind != Stype))
+ break;
+
+ sm = typ->type;
+ s |= (sm << 32 | Tname);
+ advance(p, lx);
+ continue;
+ }
+
+ /* keyword */
+ switch (n = t.val.i) {
+ case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef: case Ktls:
+ if (s & MaskMem) {
+ errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]);
+ goto Bad;
+ }
+ break;
+
+ case Kinline: case Knoret:
+ if (s & Bit(n))
+ warnat(lx->pos, "duplicate %s function specifier", keywords[n]);
+ break;
+
+ case Kconst: case Kvolatile:
+ if (s & Bit(n))
+ warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]);
+ break;
+
+ case Ksigned: case Kunsigned:
+ if (s & MaskSgn) {
+ if (s & Bit(n)) {
+ warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]);
+ break;
+ }
+ errorat(lx->pos, "multiple storage class specifiers");
+ goto Bad;
+ }
+ break;
+
+ case Kshort:
+ if (s & Tshort) {
+ warnat(lx->pos, "duplicated short specifier");
+ break;
+ }
+ break;
+
+ case Klong:
+ if ((s >> Klong) & 2) {
+ errorat(lx->pos, "cannot chain three or more long specifiers");
+ goto Bad;
+ }
+ s += Bit(n);
+ t = advance(p, lx);
+ continue;
+
+ case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble:
+ if (s & MaskTyp) {
+ errorat(lx->pos, "more than one base type specified");
+ goto Bad;
+ }
+ break;
+
+ case Kstruct: case Kunion:
+ i = 0;
+ goto Aggr;
+ case Kenum:
+ i = 1;
+ Aggr:
+ if (s & (Tstruct | Tunion | Tenum)) {
+ errorat(lx->pos, "more than one aggregate/enum type specified");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ if (t.kind != Aident && t.kind != Albrace) {
+ errorat(t.pos, "enum specifier missing valid declaration");
+ goto Bad;
+ }
+
+ /* NOTE: This offset is needed to correctly obtain Tstruct */
+ n++;
+ name = nil;
+ tag = 0;
+ if (t.kind == Aident) {
+ name = t.val.s;
+ t = advance(p, lx);
+ }
+ if (t.kind == Albrace) {
+ /* TODO: we need check if the name exists. */
+ t = advance(p, lx);
+ /* NOTE: This depends on the enum order. KEEP IN SYNC */
+ tag = aggrfunc[i](p, lx, name, Bit(n));
+ if (t = peek(p, 0), nomatch(t, Arbrace)) {
+ errorat(t.pos, "invalid token %s in aggregate/enum declaration", tokens[t.kind]);
+ goto Bad;
+ }
+ /* high bits encode the type index */
+ s |= (uint64)tag << 32;
+ }
+ /* TODO: if name does not exist, enter in an incomplete type! */
+ if (name)
+ declaretag(p, tag, name);
+
+ break;
+
+ default:
+ errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]);
+ }
+
+ s |= Bit(n);
+ advance(p, lx);
+ }
+
+ *spec = s;
+ return 0;
+
+Bad:
+ /* TODO: serialize bitflags to string for nice error message */
+ errorat(lx->pos, "ignoring specifier");
+ *spec = Sbad;
+ return 1;
+}
+
+/*
+ * name declaration
+ * see dtor for valid values of ab
+ */
+static
+error
+name(Parser *p, Lexer *lx, Name **nmp, int ab)
+{
+ Token t;
+ int n, k;
+ uint64 s;
+ Sym *sym;
+ Name *nm, *tmp;
+
+ /* max args = 100 */
+ struct Field args[100];
+
+ nm = *nmp;
+ t = peek(p, 0);
+ switch (k = t.kind) {
+ case Aident:
+ if (ab == 1) {
+ errorat(t.pos, "identifier not allowed in abstract declarator");
+ goto Bad;
+ }
+ nm->kind = Nident;
+ nm->ident = t.val.s;
+ break;
+
+ case Alparen:
+ advance(p, lx);
+ nm->kind = Nparen;
+ nm->paren = getdtor(p);
+ if (dtor(p, lx, nm->paren, ab)) {
+ putdtor(p, nm->paren);
+ nm->paren = nil;
+ errorat(lx->pos, "invalid declarator in parenthesis");
+ goto Bad;
+ }
+
+ t = peek(p, 0);
+ if (nomatch(t, Arparen)) {
+ putdtor(p, nm->paren);
+ nm->paren = nil;
+ errorat(lx->pos, "missing closing paren in declarator");
+ goto Bad;
+ }
+ break;
+
+ case Albrakt:
+ if (ab)
+ goto Sfx;
+ errorat(lx->pos, "missing identifier in non-abstract declarator");
+ /* fallthrough */
+ default:
+ if (ab)
+ goto Sfx;
+ errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]);
+ goto Bad;
+ }
+
+ t = advance(p, lx);
+Sfx:
+ for (;;) {
+ switch (k = t.kind) {
+ case Albrakt:
+ tmp = getname(p);
+ tmp->kind = Nindex;
+ tmp->sfx.name = nm;
+
+ nm = tmp, tmp = nil;
+
+ t = advance(p, lx);
+ if (t.kind == Arbrakt) {
+ nm->sfx.idx.q = 0;
+ Iend:
+ nm->sfx.idx.x = nil;
+ t = advance(p, lx);
+ break;
+ }
+ if (t.kind == Astar) {
+ nm->sfx.idx.q = -1;
+ IStar:
+ nm->sfx.idx.x = nil;
+ t = advance(p, lx);
+ if (t.kind != Arbrakt) {
+ errorat(t.pos, "invalid '*' syntax in index expression");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ break;
+ }
+
+ if (spec(p, lx, &s)) {
+ errorat(lx->pos, "invalid type qualifier list in index expression");
+ goto Bad;
+ }
+
+ nm->sfx.idx.q = (uint32)s;
+ t = peek(p, 0);
+
+ if (t.kind == Astar)
+ goto IStar;
+
+ if (t.kind == Arbrakt)
+ goto Iend;
+
+ nm->sfx.idx.x = expr(p, lx);
+
+ t = peek(p, 0);
+ if (nomatch(t, Arbrakt)) {
+ errorat(t.pos, "unterminated index expression");
+ goto Bad;
+ }
+
+ t = advance(p, lx);
+ continue;
+
+ case Alparen:
+ tmp = getname(p);
+ tmp->kind = Ncall;
+ tmp->sfx.name = nm;
+
+ nm = tmp, tmp = nil;
+
+ t = advance(p, lx);
+ nm->sfx.call.n = 0;
+ switch (t.kind) {
+ case Arparen:
+ nm->sfx.call.arg = nil;
+ break;
+
+ case Aident:
+ sym = lookupobj(p, t.val.s);
+ if (!sym || (sym && sym->kind != Stype)) {
+ while (t.kind == Aident) {
+ if (nm->sfx.call.n >= arrlen(args))
+ panicf("ident stack overflow");
+ args[nm->sfx.call.n++] = (struct Field) {
+ .qual = 0,
+ .type = 0,
+ .name = t.val.s,
+ };
+ t = advance(p, lx);
+ }
+ if (nomatch(t, Arparen)) {
+ errorat(t.pos, "token '%s' found in function parameter identifier list");
+ goto Bad;
+ }
+ copyarray(nm->sfx.call.arg, args, nm->sfx.call.n);
+ break;
+ }
+ goto ParamLoop;
+
+ case Akeywd:
+ if (t.val.i < Kconst || t.val.i > Kenum) {
+ errorat(t.pos, "invalid keyword %s inside function signature");
+ goto Bad;
+ }
+
+ ParamLoop:
+ if (nm->sfx.call.n >= arrlen(args)-1)
+ panicf("out of argument buffer");
+
+ args[nm->sfx.call.n].type = basetype(p, lx, &s);
+ if (!args[nm->sfx.call.n].type) {
+ errorat(lx->pos, "could not parse base type in function call");
+ goto Bad;
+ }
+
+ args[nm->sfx.call.n].qual = (uint32)s & ~(MaskTyp|MaskInt|MaskFlt);
+ args[nm->sfx.call.n].name = namedecl(p, lx, &args[nm->sfx.call.n].type, 2);
+
+ nm->sfx.call.n++;
+ if ((t = peek(p, 0)).kind == Acomma) {
+ advance(p, lx);
+ goto ParamLoop;
+ }
+
+ if (t.kind == Aellip) {
+ nm->sfx.call.dots = 1;
+ t = advance(p, lx);
+ }
+
+ if (nomatch(t, Arparen)) {
+ errorat(t.pos, "token '%s' found in function parameter list");
+ goto Bad;
+ }
+ copyarray(nm->sfx.call.arg, args, nm->sfx.call.n);
+ break;
+
+ default:
+ errorat(t.pos, "invalid token %s inside function call signature", tokens[t.kind]);
+ goto Bad;
+ }
+
+ t = advance(p, lx);
+ continue;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ *nmp = nm;
+ return 0;
+Bad:
+ return 1;
+}
+
+/* pointer kind is partitioned into 8x6 regions
+ * ab => abstract
+ * @ 0: must have identifier
+ * @ 1: must not have identifier
+ * @ 2: don't care
+ * else: undefined
+ */
+static
+error
+dtor(Parser *p, Lexer *lx, Dtor *d, int ab)
+{
+ int n, k;
+ error err;
+ Token t;
+ Dtor *link;
+ Ptr *ptr, *x;
+
+ err = 1;
+
+ ptr = &d->ptr;
+ ptr->kind = 0;
+ ptr->link = nil;
+
+ t = peek(p, 0);
+ if (t.kind != Astar) {
+ if (ab || t.kind == Aident || t.kind == Arparen)
+ goto Name;
+ goto Bad;
+ }
+ n = 0;
+Ptr:
+ ptr->kind |= Bit(n);
+ advance(p, lx);
+Key:
+ t = peek(p, 0);
+ switch (k = t.kind) {
+ case Akeywd:
+ if (Kconst <= t.val.i && t.val.i <= Katomic)
+ ptr->kind |= Bit(6*n + (t.val.i - Kconst + 1));
+ else {
+ errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]);
+ goto Bad;
+ }
+ advance(p, lx);
+ goto Key;
+
+ case Astar:
+ if (++n >= 8) {
+ x = getptr(p);
+ x->kind = 0;
+ x->link = nil;
+ ptr->link = x;
+ ptr = x;
+ n = 0;
+ }
+ goto Ptr;
+
+ case Aident:
+ case Alparen:
+ goto Name;
+
+ default:
+ if (ab)
+ goto Name;
+ errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]);
+ goto Bad;
+ }
+Name:
+ return name(p, lx, &d->name, ab);
+Bad:
+ return err;
+}
+
+static
+Decl *
+decl(Parser *p, Lexer *lx)
+{
+ uint64 s;
+ Token t;
+ Decl *d;
+ Expr *x;
+ string name;
+ struct Decls *ds;
+ uint32 base, type;
+
+ alloc(d);
+
+ d->kind = 0;
+ d->pos.beg = lx->pos;
+
+ base = basetype(p, lx, &s);
+ if (!base) {
+ errorat(lx->pos, "could not parse type declaration");
+ goto Bad;
+ }
+
+ x = nil;
+ d->spec = (uint32)s & ~(MaskInt|MaskFlt|MaskTyp);
+ d->type = base;
+ d->name = namedecl(p, lx, &d->type, 0);
+ /* TODO: think about functions (both decls and defs) */
+ d->kind = (s & Mtype) ? Dtype : Dvar;
+
+ switch (t = peek(p, 0), t.kind) {
+ case Aeq:
+ if (s & Mtype) {
+ errorat(d->pos.beg, "initialization of type not allowed");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ x = initx(p, lx);
+ d->kind = Dvar;
+ if (t.kind != Acomma) {
+ d->init = x;
+ goto Semi;
+ }
+ /* fallthrough */
+ case Acomma:
+ d->kind |= Dlist;
+ d->list.init = x;
+ /* move singleton data over */
+ name = d->name;
+ type = d->type;
+ d->list.name = name;
+ d->list.type = type;
+ ds = &d->list;
+ /* iterate until we hit end of list */
+ while (t.kind == Acomma) {
+ t = advance(p, lx);
+
+ alloc(ds->link);
+ ds = ds->link;
+ ds->type = base;
+ ds->name = namedecl(p, lx, &ds->type, 0);
+
+ t = peek(p, 0);
+ if (t.kind == Aeq) {
+ t = advance(p, lx);
+ ds->init = initx(p, lx);
+ } else
+ ds->init = nil;
+ }
+ goto Semi;
+
+ case Albrace:
+ d->kind = Dfunc;
+ alloc(d->body);
+
+ if (!attop(p)) {
+ errorat(lx->pos, "nested function declarations are illegal");
+ goto Bad;
+ }
+
+ if (C.type.info[d->type].kind != Tfunc) {
+ errorat(lx->pos, "attempted to define function body for non function type");
+ goto Bad;
+ }
+
+ openscope(p);
+ if (blkstmt(p, lx, &d->body)) {
+ errorat(lx->pos, "failed to parse function body");
+ goto Bad;
+ }
+ closescope(p);
+ break;
+
+ default:
+ Semi:
+ if (nomatch(t, Asemi)) {
+ errorat(t.pos, "no semicolon after declaration");
+ goto Bad;
+ }
+ t = advance(p, lx);
+ }
+
+ d->pos.end = lx->pos;
+ declareobj(p, d);
+ return d;
+Bad:
+ errorat(lx->pos, "failed to parse top level declaration");
+ return nil;
+}
+
+// -----------------------------------------------------------------------
+// top level api
+
+void
+setup(Parser *p, Lexer *lx)
+{
+ advance(p,lx);
+ advance(p,lx);
+
+ /* define all builtin typedefs */
+ declareobj(p, &C.builtin.vargs);
+}
+
+error
+parse(Parser *p, Lexer *lx)
+{
+ Token tok;
+
+ setup(p, lx);
+ while ((tok = peek(p, 0)), tok.kind > Aeof) {
+ if (p->ast.len >= p->ast.cap) {
+ p->ast.cap += 20;
+ p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls));
+ }
+ p->ast.decls[p->ast.len++] = decl(p, lx);
+ }
+
+ return 0;
+}
diff --git a/src/cmd/cc/bits.c b/src/cmd/cc/bits.c
new file mode 100644
index 0000000..4b405dc
--- /dev/null
+++ b/src/cmd/cc/bits.c
@@ -0,0 +1,114 @@
+#include "cc.h"
+
+// -----------------------------------------------------------------------
+// Architecture
+
+enum
+{
+ archx64,
+ numarch,
+};
+
+// -----------------------------------------------------------------------
+// Types
+
+/*
+ * enumerated type specifers
+ * see https://en.wikipedia.org/wiki/C_data_types
+ */
+#define VOID X(Tvoid, 2)
+
+#define BOOL X(Tbool, 3)
+#define CHAR X(Tchar, 4)
+#define SCHAR X(Tsign|Tchar, 5)
+#define UCHAR X(Tunsign|Tchar, 6)
+
+#define SHORT X(Tshort, 7), X(Tshort|Tint, 7)
+#define SSHORT X(Tsign|Tshort, 8), X(Tsign|Tshort|Tint, 8)
+#define USHORT X(Tunsign|Tshort, 9), X(Tunsign|Tshort|Tint, 9)
+
+#define INT X(0, 10), X(Tint, 10)
+#define SINT X(Tsign, 11), X(Tsign|Tint, 11)
+#define UINT X(Tunsign, 12), X(Tunsign|Tint, 12)
+
+#define LONG X(Tlong, 13), X(Tlong|Tint, 13)
+#define SLONG X(Tsign|Tlong, 14), X(Tsign|Tlong|Tint, 14)
+#define ULONG X(Tunsign|Tlong, 15), X(Tunsign|Tlong|Tint, 15)
+
+#define VLONG X(Tvlong, 16), X(Tvlong|Tint, 16)
+#define SVLONG X(Tsign|Tvlong, 17), X(Tsign|Tvlong|Tint, 17)
+#define UVLONG X(Tunsign|Tvlong, 18), X(Tunsign|Tvlong|Tint, 18)
+
+#define FLOAT X(Tfloat, 19)
+#define DOUBLE X(Tdouble, 20)
+#define LONGDB X(Tlong|Tdouble, 21)
+#define COMPLEX X(Tcmplx, 22)
+#define IMAGINARY X(Timag, 23)
+
+/* fixed width definitions */
+#define DEF(sz, aln, mx, sgn) {.size=sz, .align=aln, .max=mx, .sign=sgn }
+
+#define INT8 DEF(1, 1, 0x7fff, 0)
+#define UINT8 DEF(1, 1, 0xffff, 1)
+
+#define INT16 DEF(2, 2, 0x7fff, 0)
+#define UINT16 DEF(2, 2, 0xffff, 1)
+
+#define INT32 DEF(4, 4, 0x7fffffff, 0)
+#define UINT32 DEF(4, 4, 0xffffffff, 1)
+
+#define INT64 DEF(8, 8, 0x7fffffffffffffff, 0)
+#define UINT64 DEF(8, 8, 0xffffffffffffffff, 1)
+
+/* architecture specific definitions */
+// TODO: max value should be able to take floats
+#define TYPES \
+ TYPE(DEF(0, 0, 0, 0), VOID) \
+ TYPE(INT8, BOOL) \
+ TYPE(UINT8, CHAR) \
+ TYPE(INT8, SCHAR) \
+ TYPE(UINT8, UCHAR) \
+ TYPE(INT16, SHORT) \
+ TYPE(INT16, SSHORT) \
+ TYPE(UINT16, USHORT) \
+ TYPE(INT32, INT) \
+ TYPE(INT32, SINT) \
+ TYPE(UINT32, UINT) \
+ TYPE(INT64, LONG) \
+ TYPE(INT64, SLONG) \
+ TYPE(UINT64, ULONG) \
+ TYPE(INT64, VLONG) \
+ TYPE(INT64, SVLONG) \
+ TYPE(UINT64, UVLONG) \
+ TYPE(DEF(4, 4, 0, 0), FLOAT) \
+ TYPE(DEF(8, 8, 0, 0), DOUBLE) \
+ TYPE(DEF(16, 16, 0, 0), LONGDB) \
+ TYPE(DEF(8, 8, 0, 0), COMPLEX) \
+ TYPE(DEF(4, 4, 0, 0), IMAGINARY) \
+
+Type pointer = {.size=8, .align=8, .max=0xffffffffffffffff, .sign=0};
+
+/* pack architecture specific definitions into exported arrays */
+#define TYPE(a, ...) a,
+Type basetypes[] = {
+ { 0 }, /* sentinel value for bad types */
+ { 0 }, /* sentinel value for variadic args */
+ TYPES
+};
+#undef TYPE
+
+#define TYPE(a, ...) __VA_ARGS__,
+#define X(a, b) a
+uint64 validtypespec[38] = {
+ TYPES
+ Tstruct, Tunion, Tenum, Tname,
+};
+#undef X
+
+#define X(a, b) b
+int indextypespec[38] = {
+ TYPES
+ -1, -1, -1, -1,
+};
+#undef X
+#undef TYPE
diff --git a/src/cmd/cc/cc.c b/src/cmd/cc/cc.c
new file mode 100644
index 0000000..8ad0022
--- /dev/null
+++ b/src/cmd/cc/cc.c
@@ -0,0 +1,409 @@
+#include "cc.h"
+#include <libn/macro/map.h>
+
+// -----------------------------------------------------------------------
+// string interning
+
+/* jenkins' one at a time hash */
+static
+int32
+hash_string(byte* s)
+{
+ int32 h;
+
+ h = 0;
+ if (s != nil) {
+ for (; *s; ++s) {
+ h += *s;
+ h = (h << 10);
+ h = (h >> 6);
+ }
+ }
+
+ h += (h << 3);
+ h ^= (h >> 11);
+ h += (h >> 11);
+
+ return h;
+}
+
+static
+int
+streq(byte *s, byte *t)
+{
+ if (s == nil) {
+ if (t == nil)
+ return 1;
+ else
+ return 0;
+ }
+
+ return (t == nil) ? 0 : strcmp(s, t) == 0;
+}
+
+#define HASH(s) hash_string(s)
+#define EQUAL(s, t) (streq(s, t))
+static
+int
+getstr(string key, int *ok)
+{
+ int idx;
+ MAP_GET(idx, (&C.strs), key, HASH, EQUAL);
+
+ *ok = idx < C.strs.n_buckets;
+ return idx;
+}
+
+static
+void
+·free(void* _, void* ptr) {
+ return free(ptr);
+}
+
+static
+void *
+·alloc(void* _, uint n, ulong size) {
+ return malloc(n*size);
+}
+
+static
+void *
+·calloc(void* _, uint n, ulong size) {
+ return calloc(n, size);
+}
+
+static
+int
+morestrtab(StrTab *tab, int n)
+{
+ MAP_GROW(tab, string, int32, n, HASH, ·calloc, ·free, nil);
+}
+
+static
+int
+putstr(byte *s, error *err)
+{
+ int sz;
+ sz = C.strs.size;
+ MAP_PUT((&C.strs), s, sz, HASH, EQUAL, morestrtab, err);
+}
+#undef HASH
+#undef EQUAL
+
+int32
+intern(byte **s)
+{
+ int i, ok;
+
+ i = getstr(*s, &ok);
+ if (ok) {
+ *s = C.strs.keys[i];
+ goto END;
+ }
+
+ *s = str·make(*s);
+ i = putstr(*s, &ok);
+ C.strs.vals[i] = C.strs.size - 1;
+
+END:
+ return C.strs.vals[i];
+}
+
+// -----------------------------------------------------------------------
+// type interning
+
+/* TODO: intern types for memory savings */
+int
+type()
+{
+ if (C.type.len >= C.type.cap) {
+ C.type.cap += 100;
+ C.type.info = realloc(C.type.info, C.type.cap * sizeof(*C.type.info));
+ }
+
+ return C.type.len++;
+}
+
+// -----------------------------------------------------------------------
+// universal compiler builtins
+
+#define KEYWORD(a, b) b,
+byte *keywords[NUM_KEYWORDS] = { KEYWORDS };
+#undef KEYWORD
+
+#define DIRECTIVE(a, b, c) b,
+byte *directives[NUM_DIRECTIVES] = { DIRECTIVES };
+#undef DIRECTIVE
+
+struct Compiler C = { 0 };
+
+// -----------------------------------------------------------------------
+// cli flag handlers
+
+void
+pushinclude(byte *dirs)
+{
+ string d, s, *it, *end;
+
+ while (*dirs != '\0') {
+ d = strchr(dirs, ' ');
+ if (d != nil)
+ *d = '\0';
+
+ s = dirs;
+ intern(&s);
+ for (it = C.inc.dir, end = it + C.inc.len; it != end; ++it) {
+ if ((uintptr)s == (uintptr)(*it))
+ goto Nextdir;
+ }
+
+ if (C.inc.len == C.inc.cap) {
+ C.inc.cap += 20;
+ C.inc.dir = realloc(C.inc.dir, C.inc.cap*sizeof(*C.inc.dir));
+ }
+ C.inc.dir[C.inc.len++] = s;
+Nextdir:
+ if (d == nil)
+ break;
+ dirs = d + 1;
+ }
+}
+
+// -----------------------------------------------------------------------
+// error reporting
+
+void
+errorat(Pos x, byte *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ printf("error:%s:%d:%d: ", os·basename(x.path), x.line, x.col);
+ vprintf(fmt, args);
+ printf("\n");
+
+ va_end(args);
+ assert(0);
+}
+
+void
+warnat(Pos x, byte *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ printf("warning:%s:%d:%d: ", os·basename(x.path), x.line, x.col);
+ vprintf(fmt, args);
+ printf("\n");
+
+ va_end(args);
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
+void
+init(void)
+{
+ int i;
+
+ for (i = 0; i < arrlen(keywords); i++)
+ intern(&keywords[i]);
+
+ for (i = 0; i < arrlen(directives); i++)
+ intern(&directives[i]);
+
+ C.heap = mem·makearena(mem·sys, nil);
+
+ /* compiler definitions */
+ C.def.len = 0;
+ C.def.cap = 100;
+ C.def.val = calloc(C.def.cap, sizeof(*C.def.val));
+
+ /* compiler include paths */
+ C.inc.len = 0;
+ C.inc.cap = 100;
+ C.inc.dir = calloc(C.inc.cap, sizeof(*C.inc.dir));
+ C.inc.dir[C.inc.len++] = ".";
+
+ C.outfile = nil;
+
+ /* type info */
+ C.type.len = arrlen(basetypes);
+ C.type.cap = 100 + arrlen(basetypes);
+ C.type.info = calloc(C.type.cap, sizeof(*C.type.info));
+
+ memcpy(C.type.info, basetypes, C.type.len * sizeof(*C.type.info));
+
+ /* builtins */
+ C.builtin.vargs = (Decl) {
+ .pos = (Range) {
+ .beg = {
+ .col = 0,
+ .line = 0,
+ .path = "<builtin>",
+ },
+ .end = {
+ .col = 0,
+ .line = 0,
+ .path = "<builtin>",
+ },
+ },
+ .kind = Dtype,
+ .spec = Mtype,
+ .type = 1,
+ .name = "__builtin_va_list",
+ };
+
+ intern(&C.builtin.vargs.name);
+}
+
+void
+initlx(Lexer *lx)
+{
+ int i;
+
+ memset(lx, 0, sizeof(*lx));
+ lx->b = lx->buf;
+
+ /* predefine macros */
+ dodefine(lx, "__LINE__");
+ dodefine(lx, "__FILE__");
+ lx->macline = (uintptr)lookup(&lx->sym, "__LINE__");
+ lx->macfile = (uintptr)lookup(&lx->sym, "__FILE__");
+
+ for (i = 0; i < C.def.len; i++)
+ dodefine(lx, C.def.val[i]);
+
+ lx->omit.len = 0;
+ lx->omit.cap = 100;
+ lx->omit.path = calloc(lx->omit.cap, sizeof(*C.inc.dir));
+
+ lx->new = lx->iostk;
+ lx->new->link = nil;
+ memset(lx->iostk, 0, sizeof(lx->iostk));
+
+ lx->sym = (SymTab){ 0 };
+}
+
+void
+freelx(Lexer *lx)
+{
+ free(lx->omit.path);
+}
+
+void
+initp(Parser *p)
+{
+ /* initialize temporary buffers */
+ memset(p->spstk, 0, sizeof(p->spstk));
+ memset(p->nmstk, 0, sizeof(p->nmstk));
+ memset(p->dtstk, 0, sizeof(p->dtstk));
+ memset(p->ptstk, 0, sizeof(p->ptstk));
+
+ p->sp = p->spstk;
+ p->nm = p->nmstk;
+ p->dt = p->dtstk;
+ p->pt = p->ptstk;
+
+ /* initialize ast */
+ p->ast.cap = 0;
+ p->ast.len = 0;
+ p->ast.decls = nil;
+}
+
+error
+compile(byte *path)
+{
+ Lexer lx;
+ Parser p;
+ error err;
+ byte *sep, out[400];
+
+ intern(&path);
+ strcpy(out, path);
+
+ sep = utf8·findrrune(out, '/');
+ if (sep)
+ *sep++ = '\0';
+ else
+ sep = out;
+
+ if (!C.outfile) {
+ C.outfile = sep;
+ if (C.outfile) {
+ if ((sep = utf8·findrrune(C.outfile, '.'))) {
+ sep[0] = '.';
+ sep[1] = 'o';
+ sep[2] = '\0';
+ }
+ } else {
+ C.outfile = "/dev/null";
+ }
+ }
+
+ initlx(&lx);
+ initp(&p);
+
+ lx.io = openio(&lx, path);
+ lx.pos = (Pos){
+ .path = path,
+ .line = 1,
+ .col = 1,
+ };
+
+ err = parse(&p, &lx);
+ freelx(&lx);
+ return err;
+}
+
+error
+main(int argc, byte *argv[])
+{
+ byte *a, *src;
+ int err;
+
+ init();
+
+ ARGBEGIN {
+ case 'o':
+ C.outfile = ARGF();
+ break;
+
+ case 'D':
+ a = ARGF();
+ if (a) {
+ intern(&a);
+ if (C.def.len >= C.def.cap) {
+ C.def.cap += 20;
+ C.def.val = realloc(C.def.val, C.def.cap * sizeof(*C.def.val));
+ }
+ C.def.val[C.def.len++] = a;
+ }
+ break;
+
+ case 'I':
+ a = ARGF();
+ if (a)
+ pushinclude(a);
+ break;
+ } ARGEND
+
+ if (argc < 1 && C.outfile == nil) {
+ printf("usage: cc [-options] files\n");
+ exit(1);
+ }
+
+ // NOTE: This is just for my comfort during debugging.
+ pushinclude("/home/nolln/root/include");
+ pushinclude("/home/nolln/root/include/vendor/libc");
+
+ src = (argc == 0) ? "<stdin>" : argv[0];
+ intern(&src);
+
+ if ((err = compile(src)), err) {
+ exit(2);
+ }
+
+ exit(0);
+}
diff --git a/src/cmd/cc/cc.h b/src/cmd/cc/cc.h
new file mode 100644
index 0000000..8fc5f73
--- /dev/null
+++ b/src/cmd/cc/cc.h
@@ -0,0 +1,806 @@
+#pragma once
+
+#include <u.h>
+#include <libn.h>
+
+#define iota(x) 1 << (x)
+
+/* core types */
+typedef struct Io Io;
+typedef struct Pos Pos;
+typedef struct Range Range;
+typedef struct Token Token;
+
+typedef struct Lexer Lexer;
+
+typedef struct Sym Sym;
+typedef struct Type Type;
+typedef struct Scope Scope;
+
+typedef struct Parser Parser;
+
+typedef struct Ptr Ptr;
+typedef struct Name Name;
+typedef struct Dtor Dtor;
+typedef struct Field Field;
+
+typedef struct Node Node;
+typedef struct Decl Decl;
+typedef struct Stmt Stmt;
+typedef struct Expr Expr;
+
+typedef struct SymTab SymTab;
+typedef struct StrTab StrTab;
+
+typedef struct Compiler Compiler;
+
+/* keywords of language */
+#define KEYWORDS \
+ KEYWORD(Kauto,"auto") \
+ KEYWORD(Kregister,"register") \
+ KEYWORD(Kstatic,"static") \
+ KEYWORD(Kextern,"extern") \
+ KEYWORD(Ktls,"thread_local") \
+ KEYWORD(Ktypedef,"typedef") \
+ KEYWORD(Kinline,"inline") \
+ KEYWORD(Knoret,"_Noreturn") \
+ KEYWORD(Kconst,"const") \
+ KEYWORD(Kvolatile,"volatile") \
+ KEYWORD(Krestrict,"restrict") \
+ KEYWORD(Katomic,"_Atomic") \
+ KEYWORD(Ksigned,"signed") \
+ KEYWORD(Kunsigned,"unsigned") \
+ KEYWORD(Kvoid,"void") \
+ KEYWORD(Kbool,"_Bool") \
+ KEYWORD(Kchar,"char") \
+ KEYWORD(Kfloat,"float") \
+ KEYWORD(Kdouble,"double") \
+ KEYWORD(Kcomplex,"complex") \
+ KEYWORD(Kimaginary,"imaginary") \
+ KEYWORD(Kint,"int") \
+ KEYWORD(Kshort,"short") \
+ KEYWORD(Klong,"long") \
+ KEYWORD(Kstruct,"struct") \
+ KEYWORD(Kunion,"union") \
+ KEYWORD(Kenum,"enum") \
+ KEYWORD(Kfor,"for") \
+ KEYWORD(Kdo,"do") \
+ KEYWORD(Kwhile,"while") \
+ KEYWORD(Kcontinue,"continue") \
+ KEYWORD(Kif,"if") \
+ KEYWORD(Kelse,"else") \
+ KEYWORD(Kswitch,"switch") \
+ KEYWORD(Kcase,"case") \
+ KEYWORD(Kdefault,"default") \
+ KEYWORD(Kbreak,"break") \
+ KEYWORD(Kgoto,"goto") \
+ KEYWORD(Kreturn,"return") \
+ KEYWORD(Ksizeof,"sizeof") \
+ KEYWORD(Kalignof,"alignof") \
+ KEYWORD(Kalignas,"alignas")
+
+#define KEYWORD(a, b) a,
+enum { KEYWORDS NUM_KEYWORDS };
+#undef KEYWORD
+
+extern byte *keywords[NUM_KEYWORDS];
+
+// -----------------------------------------------------------------------
+// lexing: byte stream -> tokens
+// pre-processor built in
+
+/* source position: error reporting */
+struct Pos
+{
+ int col;
+ int line;
+ string path;
+};
+
+
+struct Range
+{
+ Pos beg;
+ Pos end;
+};
+
+void errorat(Pos x, byte *fmt, ...);
+void warnat(Pos x, byte *fmt, ...);
+
+/* pre-processor */
+#define DIRECTIVES \
+ DIRECTIVE(Dpragma,"pragma", ppprag) \
+ DIRECTIVE(Dinclude,"include", ppinc) \
+ DIRECTIVE(Ddefine,"define", ppdef) \
+ DIRECTIVE(Dundef,"undef", ppund) \
+ DIRECTIVE(Dif,"if", ppif0) \
+ DIRECTIVE(Delif,"elif", ppif1) \
+ DIRECTIVE(Delse, "else", ppif1) \
+ DIRECTIVE(Difdef,"ifdef", ppif2) \
+ DIRECTIVE(Difndef,"ifndef", ppif3) \
+ DIRECTIVE(Dendif,"endif", ppend)
+
+#define DIRECTIVE(a, b, c) a,
+enum { DIRECTIVES NUM_DIRECTIVES };
+#undef DIRECTIVE
+
+extern byte *directives[NUM_DIRECTIVES];
+
+error domacro(Lexer*);
+error dodefine(Lexer *lx, string s);
+int expandmacro(Lexer *lx, Sym *s, byte *dst);
+
+extern error (*macros[NUM_DIRECTIVES])(Lexer*);
+
+/* tokenization of byte stream */
+#define TOKENS \
+ TOK(Anil,"nil") \
+ TOK(Aeof,"eof") \
+ TOK(Aeq, "==") \
+ TOK(Aneq, "!=") \
+ TOK(Anot, "!") \
+ TOK(Aneg, "~") \
+ TOK(Axor, "^") \
+ TOK(Aor, "|") \
+ TOK(Aand, "&") \
+ TOK(Aoror, "||") \
+ TOK(Aandand, "&&") \
+ TOK(Aadd,"+") \
+ TOK(Asub,"-") \
+ TOK(Astar,"*") \
+ TOK(Adiv,"/") \
+ TOK(Amod,"%") \
+ TOK(Agt,">") \
+ TOK(Alt,"<") \
+ TOK(Agteq,">=") \
+ TOK(Alteq,"<=") \
+ TOK(Alsft,"<<") \
+ TOK(Arsft,">>") \
+ TOK(Ainc,"++") \
+ TOK(Adec,"--") \
+ TOK(Aasn,"=") \
+ TOK(Aorasn,"|=") \
+ TOK(Axorasn,"^=") \
+ TOK(Aandasn,"&=") \
+ TOK(Aaddasn,"+=") \
+ TOK(Asubasn,"-=") \
+ TOK(Amulasn,"*=") \
+ TOK(Adivasn,"/=") \
+ TOK(Amodasn,"%=") \
+ TOK(Alsftasn,"<<=") \
+ TOK(Arsftasn,">>=") \
+ TOK(Acomma,",") \
+ TOK(Acolon,":") \
+ TOK(Asemi,";") \
+ TOK(Alparen,"(") \
+ TOK(Arparen,")") \
+ TOK(Albrace,"{") \
+ TOK(Arbrace,"}") \
+ TOK(Albrakt,"[") \
+ TOK(Arbrakt,"]") \
+ TOK(Adot,".") \
+ TOK(Aarrow,"->") \
+ TOK(Aqmark,"?") \
+ TOK(Aellip,"...") \
+ TOK(Alit,"<literal>") \
+ TOK(Aident,"<identifier>") \
+ TOK(Akeywd,"<keyword>") \
+
+#define TOK(a, b) a,
+enum
+{
+ TOKENS
+ NUM_TOKENS,
+
+ Vchar = iota(8),
+ Vrune = iota(9),
+ Vint = iota(10),
+ Vlong = iota(11),
+ Vvlong = iota(12),
+ Vun = iota(13),
+ Vfloat = iota(14),
+ Vstr = iota(15),
+ Vwstr = iota(16),
+
+ Vmask = Vchar - 1,
+};
+#undef TOK
+
+extern byte *tokens[NUM_TOKENS];
+
+/* TODO: store literals in a big val */
+union Val
+{
+ byte *s;
+ double f;
+ vlong i;
+ uvlong ui;
+ int32 c;
+ uint32 uc;
+ rune r;
+};
+
+struct Token
+{
+ uint32 kind;
+ Pos pos;
+ union Val val;
+};
+
+enum
+{
+ Svar = iota(1),
+ Sfunc = iota(2),
+ Stype = iota(3),
+ Stag = iota(4),
+ Senum = iota(5),
+ Slabl = iota(6),
+ Smacro = iota(7),
+};
+
+struct Sym
+{
+ uint32 kind;
+ string name;
+ union {
+ string macro;
+ Decl *obj;
+ int32 type;
+ Stmt *blk;
+ Expr *val;
+ };
+};
+
+struct SymTab
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ string *keys;
+ Sym **vals;
+};
+
+Sym *define(SymTab *tab, string ident, uint32 kind);
+Sym *lookup(SymTab *tab, string ident);
+error forget(SymTab *tab, string ident);
+void forgetall(SymTab *tab);
+
+enum
+{
+ IOnil = iota(0),
+ IOfile = iota(1),
+ IObuff = iota(2),
+};
+
+struct Io
+{
+ io·Buffer rdr;
+ string path;
+ uint32 kind;
+ union {
+ Stream *f;
+ byte *b;
+ };
+
+ Pos store;
+ struct Io *link;
+};
+
+struct Lexer
+{
+ Pos pos;
+ SymTab sym;
+ byte *b;
+ byte buf[2*1024];
+
+ /* predefined dynamic macros */
+ uintptr macfile;
+ uintptr macline;
+
+ /* i/o data */
+ Io *io, *new;
+ Io iostk[100];
+ struct {
+ int cap;
+ int len;
+ string *path;
+ } omit;
+};
+
+/* lex.c functions */
+Token lex(Lexer *);
+
+int getbyte(Lexer *);
+int getnsbyte(Lexer *l);
+rune getrune(Lexer *);
+byte ungetbyte(Lexer *);
+rune ungetrune(Lexer *, rune r);
+
+Io* openio(Lexer *lx, byte *path);
+void pushio(Lexer *lx, Io *new);
+void popio(Lexer *lx);
+
+void puttok(Token);
+
+// -----------------------------------------------------------------------
+// parsing & type resolution
+// tokens -> ast
+
+/* parent data */
+struct Node
+{
+ Range pos;
+ uint32 kind;
+};
+
+/* ast types */
+enum
+{
+ Nbad,
+ /* labels */
+ Sempty, Slabel, Scase,
+ Sblock,
+ Sexpr, Sdecl,
+ Sselect,
+ /* loops */
+ Sfor, Swhile, Sdo,
+ /* jumps */
+ Sgoto, Scontin, Sbreak, Sreturn,
+ /* forks */
+ Sif, Sswitch,
+
+
+ /* assignments */
+ Xasn, Xmulasn, Xdivasn, Xmodasn, Xsubasn, Xaddasn,
+ Xlsftasn, Xrsftasn, Xandasn, Xxorasn, Xorasn,
+ /* conditional */
+ Xternary,
+ /* unary prefix ops */
+ Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec,
+ Xcast,
+ /* unary postfix ops */
+ Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xinitlist,
+ /* binary ops */
+ Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft,
+ Xadd, Xsub, Xmul, Xdiv, Xmod,
+ /* primary */
+ Xparen, Xident, Xlit,
+ /* lists */
+ Xcomma,
+
+
+ Dvar,
+ Dfunc,
+ Dtype,
+ Dlist = iota(20),
+ Dvars = Dvar | Dlist,
+ Dtypes = Dtype | Dlist,
+
+ /* names (don't interact w/ final AST) */
+ Nnil = 0,
+ Nident,
+ Nparen,
+ Nindex,
+ Ncall,
+};
+
+/* expressions */
+enum
+{
+ Keynil,
+ Keyidx,
+ Keysel,
+};
+
+struct Key
+{
+ uint kind : 2;
+ union {
+ Expr *x;
+ string s;
+ };
+};
+
+struct Expr
+{
+ struct Node;
+ uint32 qual;
+ uint32 type;
+ union {
+ string name;
+ struct {
+ uint64 kind;
+ union {
+ union Val;
+ union Val v;
+ };
+ } val;
+ struct {
+ int n;
+ struct Key *k;
+ Expr *v;
+ } init;
+ Expr *x;
+ struct {
+ Expr *l;
+ Expr *r;
+ } asn;
+ struct {
+ Expr *c;
+ Expr *t;
+ Expr *e;
+ } cond;
+ struct {
+ Expr *x;
+ union {
+ Expr *i;
+ string f;
+ };
+ } idx;
+ struct {
+ Expr *fn;
+ int n;
+ Expr **arg;
+ } call;
+ union {
+ Expr *pre;
+ Expr *post;
+ } unary;
+ struct {
+ int type : 1;
+ union {
+ struct {
+ uint32 qual;
+ uint32 type;
+ } of;
+ Expr *x;
+ };
+ } info;
+ struct {
+ struct {
+ uint32 qual;
+ uint32 type;
+ } to;
+ Expr *x;
+ } cast;
+ struct {
+ Expr *l;
+ Expr *r;
+ } binary;
+ struct {
+ Expr *x[2];
+ } comma;
+ };
+};
+
+
+/* statements */
+struct Stmt
+{
+ struct Node;
+ union {
+ struct {
+ union {
+ string ident;
+ Expr *x;
+ };
+ Node *stmt;
+ } lbl;
+ struct {
+ long n;
+ struct Node **item;
+ } blk;
+ Expr *x;
+ struct {
+ Node *init;
+ Expr *cond;
+ Expr *step;
+ Node *body;
+ } loop;
+ union{
+ string lbl;
+ Expr *x;
+ } jmp;
+ struct {
+ Expr *cond;
+ Node *body;
+ Node *orelse;
+ } br;
+ };
+};
+
+/* declarations */
+
+/*
+ * specifiers
+ * the design is the following:
+ * type info is held w/in a 64 bit integer.
+ * the bottom 32 bits are associated to specializations
+ * the top 32 bits index into a type-info array held by the compiler.
+ */
+enum
+{
+ /* memory */
+ Mauto = iota(Kauto),
+ Mstatic = iota(Kstatic),
+ Mreg = iota(Kregister),
+ Mtls = iota(Ktls),
+ Mtype = iota(Ktypedef),
+ Mextern = iota(Kextern),
+
+ MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype | Mextern,
+
+ /* qualifiers */
+ Qconst = iota(Kconst),
+ Qrestr = iota(Krestrict),
+ Qvoltl = iota(Kvolatile),
+ Qatom = iota(Katomic),
+
+ MaskQul = Qconst | Qrestr | Qvoltl | Qatom,
+
+ Finlne = iota(Kinline),
+ Fnoret = iota(Knoret),
+
+ MaskFcn = Finlne | Fnoret,
+
+ /* types */
+ Tsign = iota(Ksigned),
+ Tunsign = iota(Kunsigned),
+
+ MaskSgn = Tsign | Tunsign,
+
+ Tvoid = iota(Kvoid),
+ Tfloat = iota(Kfloat),
+ Tdouble = iota(Kdouble),
+ Tcmplx = iota(Kcomplex),
+ Timag = iota(Kimaginary),
+
+ MaskFlt = Tfloat | Tdouble | Tcmplx | Timag,
+
+ Tchar = iota(Kchar),
+ Tbool = iota(Kbool),
+
+ Tshort = iota(Kshort),
+ Tint = iota(Kint),
+ Tlong = iota(Klong),
+ Tvlong = iota(Klong+1),
+
+ MaskInt = Tshort | Tint | Tlong | Tvlong,
+ MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx,
+ /*
+ * NOTE IMPORTANT: vlong takes over the struct bit place
+ * DON'T MOVE KEYWORDS WITHOUT REORGANIZING
+ */
+ Tstruct = iota(Kstruct+1),
+ Tunion = iota(Kunion+1),
+ Tenum = iota(Kenum+1),
+ Tname = iota(Kenum+2),
+
+ Sbad = -1,
+};
+
+/* intermediate nodes */
+struct Ptr
+{
+ uint64 kind;
+ Ptr *link;
+};
+
+struct Name
+{
+ uint32 kind;
+ union {
+ string ident;
+ struct Dtor *paren;
+ struct {
+ Name *name;
+ union {
+ struct {
+ uint32 q;
+ Expr *x;
+ } idx;
+ struct {
+ int n;
+ int dots : 1;
+ Field *arg;
+ } call;
+ };
+ } sfx;
+ };
+};
+
+struct Dtor
+{
+ Ptr ptr;
+ Name *name;
+};
+
+/* final ast node */
+
+struct Field
+{
+ uint32 qual;
+ uint32 type;
+ string name;
+};
+
+struct Decls
+{
+ string name;
+ uint32 type;
+ Expr *init;
+ struct Decls *link;
+};
+
+
+struct Decl
+{
+ struct Node;
+ uint32 spec;
+ union {
+ struct {
+ string name;
+ uint32 type;
+ union {
+ Stmt *body;
+ Expr *init;
+ };
+ };
+ struct Decls list;
+ };
+};
+
+enum
+{
+ Tbad,
+ Tbase,
+ Tdef,
+ Tptr,
+ Tarray,
+ Tfunc,
+};
+
+/* types */
+struct Type
+{
+ uint32 kind;
+ Sym *sym;
+ uintptr size;
+ uintptr max;
+ uint16 align : 8;
+ uint8 sign : 2;
+ union {
+ struct {
+ uint32 qual;
+ uint32 base;
+ } ptr;
+ struct {
+ int len;
+ uint32 qual;
+ uint32 *elt;
+ } arr;
+ struct {
+ int len;
+ Field *f;
+ Expr *x;
+ } aggr;
+ struct {
+ int len;
+ string *elt;
+ Expr *val;
+ } enm;
+ struct {
+ uint32 ret;
+ int n;
+ int dots : 1;
+ Field *arg;
+ } func;
+ };
+};
+
+/* platform specific */
+extern Type pointer;
+extern Type basetypes[24];
+/* mandated by C standard */
+extern uint64 validtypespec[38];
+extern int indextypespec[38];
+
+struct Scope
+{
+ SymTab tags;
+ SymTab objs;
+};
+
+struct Parser
+{
+ Token tok[2];
+ struct {
+ int cap;
+ int len;
+ Decl **decls;
+ } ast;
+
+ /* static buffers/stacks */
+ Scope *sp;
+ Scope spstk[40];
+
+ Name *nm;
+ Name nmstk[40];
+
+ Ptr *pt;
+ Ptr ptstk[10];
+
+ Dtor *dt;
+ Dtor dtstk[40];
+};
+
+/* ast.c functions */
+error parse(Parser *, Lexer *);
+
+// -----------------------------------------------------------------------
+// global compiler data
+
+struct StrTab
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ string *keys;
+ int32 *vals;
+};
+
+#if 0
+struct TypeSet
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ Type **keys;
+};
+#endif
+
+/* main data */
+struct Compiler
+{
+ mem·Arena *heap;
+ StrTab strs;
+ string outfile;
+
+ struct {
+ int cap;
+ int len;
+ string *val;
+ } def;
+
+ struct {
+ int cap;
+ int len;
+ string *dir;
+ } inc;
+
+ struct {
+ int cap;
+ int len;
+ Type *info;
+ } type;
+
+ /* TODO: make array */
+ struct {
+ Decl vargs;
+ } builtin;
+};
+
+extern Compiler C;
+
+/* cc.c functions */
+void init();
+int32 intern(byte **str);
+int32 type();
+
+#undef iota
diff --git a/src/cmd/cc/lex.c b/src/cmd/cc/lex.c
new file mode 100644
index 0000000..33fc5d0
--- /dev/null
+++ b/src/cmd/cc/lex.c
@@ -0,0 +1,873 @@
+#include "cc.h"
+#include <libn/macro/map.h>
+
+// -----------------------------------------------------------------------
+// printing functions
+
+void
+puttok(Token tok)
+{
+ if (tok.kind < Alit)
+ printf("%s", tokens[tok.kind]);
+ else if (tok.kind & Alit) {
+ if (tok.kind & Vchar)
+ if (tok.kind & Vint)
+ if (tok.kind & Vlong)
+ if (tok.kind & Vvlong)
+ printf("literal <%lld>", tok.val.i);
+ if (tok.kind & Vfloat)
+ printf("literal <%f>", tok.val.f);
+ printf("literal <%s>", tok.val.s);
+ } else
+ printf("ident <%s>", tok.val.s);
+}
+
+// -----------------------------------------------------------------------
+// io buffer management
+
+#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x}
+
+// path should be absolute
+Io*
+openio(Lexer *lx, byte *path)
+{
+ string *it, *end;
+
+ intern(&path);
+
+ // See if we have already opened file;
+ // If so, and it hasn't been flagged return it
+ for (it = lx->omit.path, end = it + lx->omit.len; it < end; ++it) {
+ if ((uintptr)(*it) == (uintptr)(path))
+ return nil;
+ }
+
+ // TODO: See if we have already loaded the file
+
+ if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1)
+ panicf("out of I/O space!");
+
+ lx->new->f = io·open(path, "r");
+ if (!lx->new->f)
+ panicf("file %s not found", path);
+
+ lx->new->kind = IOfile;
+ lx->new->path = path;
+ bufio·initreader(&lx->new->rdr, asrdr(io·read), lx->new->f);
+
+ return lx->new++;
+}
+
+static
+Io*
+makeio(Lexer *lx, byte *name)
+{
+ if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1)
+ panicf("out of I/O space!");
+
+ lx->new->path = name;
+ lx->new->rdr = (io·Buffer) {
+ .state = bufio·rdr | bufio·end,
+ .runesize = 0,
+ .h = nil,
+ .size = bufio·size,
+ .beg = lx->new->rdr.buf + bufio·ungets,
+ .pos = lx->new->rdr.buf + bufio·ungets,
+ .end = lx->new->rdr.buf + bufio·ungets,
+ };
+ lx->new->b = lx->new->rdr.beg;
+
+ return lx->new++;
+}
+#undef asrdr
+
+static
+void
+freeio(Lexer *lx, Io *io)
+{
+ if (io->kind & IOfile) {
+ io·close(io->f);
+ }
+
+ io->rdr.state = 0;
+ io->kind = 0;
+ io->link = nil;
+ io->path = nil;
+ io->store = (Pos){ 0 };
+ io->path = "<empty>";
+}
+
+void
+pushio(Lexer *lx, Io *new)
+{
+ new->link = lx->io;
+ lx->io->store = lx->pos;
+ lx->io = new;
+
+ lx->pos = (Pos){
+ .line = 1,
+ .col = 1,
+ .path = new->path,
+ };
+}
+
+void
+popio(Lexer *lx)
+{
+ Io *prev;
+
+ assert(lx->io == lx->new-1);
+ --lx->new;
+
+ prev = lx->io->link;
+ freeio(lx, lx->io);
+
+ lx->io = prev;
+ if (!prev) {
+ return;
+ }
+
+ lx->pos = prev->store;
+}
+
+// -----------------------------------------------------------------------
+// simple wrappers
+
+int
+getbyte(Lexer *lx)
+{
+ return bufio·getbyte(&lx->io->rdr);
+}
+
+int
+getnsbyte(Lexer *lx)
+{
+ int b;
+ b = getbyte(lx);
+ for (;;) {
+ if (b == EOF) {
+ if (lx->io->link) {
+ popio(lx);
+ assert(lx->io);
+ b = getbyte(lx);
+ continue;
+ } else
+ return b;
+ }
+ if (b >= RuneSelf || !isspace(b))
+ return b;
+ if (b == '\n')
+ return b;
+ b = getbyte(lx);
+ }
+ return b;
+}
+
+rune
+getrune(Lexer *lx)
+{
+ return bufio·getrune(&lx->io->rdr);
+}
+
+byte
+ungetbyte(Lexer *lx)
+{
+ byte b;
+ return bufio·ungetbyte(&lx->io->rdr, b);
+}
+
+rune
+ungetrune(Lexer *l, rune r)
+{
+ return bufio·ungetrune(&l->io->rdr, r);
+}
+
+// -----------------------------------------------------------------------
+// main lexer
+
+#define TOK(a, b) b,
+byte *tokens[NUM_TOKENS] = { TOKENS };
+#undef TOK
+
+static uint8 Atoi[256] =
+{
+ ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5,
+ ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10,
+ ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13,
+ ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15,
+};
+
+static
+error
+escapechar(Lexer *lx, int x, int islong, int esc, vlong *val)
+{
+ int i, u, c;
+ vlong l;
+
+ c = getrune(lx);
+
+ switch (c) {
+ case '\\':
+ break;
+ case EOF:
+ errorat(lx->pos, "EOF in string");
+ return 1;
+ case '\n':
+ errorat(lx->pos, "newline in string");
+ return 1;
+ default:
+ if (c == x)
+ return 1;
+ *val = c;
+ return 0;
+ }
+
+ u = 0;
+ c = getrune(lx);
+
+ switch(c) {
+ case 'x':
+ i = islong ? 4 : 2;
+ goto hex;
+
+ case 'u':
+ i = islong ? 8 : 4;
+ u = 1;
+ goto hex;
+
+ case 'U':
+ i = 8;
+ u = 1;
+ goto hex;
+
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ i = islong ? 4 : 2;
+ goto oct;
+
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case '\\':c = '\\'; break;
+
+ default:
+ if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c);
+ }
+ *val = c;
+ return 0;
+
+hex:
+ l = 0;
+ for(; i > 0; i--) {
+ c = getbyte(lx);
+ if (c >= '0' && c <= '9') {
+ l = l*16 + c-'0';
+ continue;
+ }
+ if (c >= 'a' && c <= 'f') {
+ l = l*16 + c-'a' + 10;
+ continue;
+ }
+ if (c >= 'A' && c <= 'F') {
+ l = l*16 + c-'A' + 10;
+ continue;
+ }
+ ungetbyte(lx);
+ break;
+ }
+ if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) {
+ errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l);
+ l = RuneErr;
+ }
+ *val = l;
+ if (esc)
+ *val |= RuneMask + 1;
+ return 0;
+
+oct:
+ l = c - '0';
+ for (; i > 0; i--) {
+ c = getbyte(lx);
+ if (c >= '0' && c <= '7') {
+ l = l*8 + c-'0';
+ continue;
+ }
+ ungetbyte(lx);
+ break;
+ }
+ if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l);
+
+ *val = l;
+ if (esc)
+ *val |= RuneMask + 1;
+ return 0;
+}
+
+#define CASE1(stmt1, kind1) \
+ case stmt1: \
+ tok.kind = kind1; \
+ goto Return
+
+#define CASE2(stmt1, kind1, b1, kind2) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else \
+ ungetbyte(lx); \
+ goto Return
+
+#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else if (b == b2) \
+ tok.kind = kind3; \
+ else \
+ ungetbyte(lx); \
+ goto Return
+
+#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else if (b == b2) \
+ tok.kind = kind3; \
+ else if (b == b3) \
+ tok.kind = type4; \
+ else \
+ ungetbyte(lx); \
+ goto Return
+
+
+Token
+lex(Lexer *lx)
+{
+ int b, n, f;
+ vlong v, _;
+ rune r;
+ string s;
+ double d;
+ byte *e;
+ Token tok;
+ Sym *sym;
+ Io *io;
+
+GetByte:
+ b = getbyte(lx);
+Dispatch:
+ tok.pos = lx->pos;
+
+ if ((b != EOF && b >= RuneSelf) || b == '_')
+ goto Talpha;
+ if (isalpha(b)) {
+ if (b != 'L')
+ goto Talpha;
+
+ n = b;
+ b = getbyte(lx);
+ if (b == '\'') {
+ if (escapechar(lx, '\'', 1, 0, &v))
+ b = '\'';
+ if (!escapechar(lx, '\'', 1, 0, &_)) {
+ errorat(lx->pos, "missing ' at end of character constant");
+ }
+ tok.kind = Alit | Vrune;
+ tok.val.r = v;
+ goto Return;
+ }
+ if (b == '"')
+ goto TLstr;
+ ungetbyte(lx);
+ b = n;
+
+ goto Talpha;
+ }
+ if (isdigit(b))
+ goto Tnum;
+
+ switch (b) {
+ case '\n':
+ lx->pos.line++;
+ case ' ': case '\r': case '\t': case '\v': case '\f':
+ while (b = getbyte(lx), isspace(b))
+ if (b == '\n')
+ lx->pos.line++;
+ goto Dispatch;
+
+ case '\\':
+ b = getbyte(lx);
+ if (b != '\n')
+ errorat(lx->pos, "'\\' without a trailing newline");
+ goto GetByte;
+
+ Tchar:
+ case '\'':
+ if (escapechar(lx, '\'', 0, 0, &v)) {
+ errorat(lx->pos, "empty literal or escaped ' in char literal");
+ v = '\'';
+ }
+ if (!escapechar(lx, '\'', 0, 0, &_)) {
+ errorat(lx->pos, "missing '");
+ ungetbyte(lx);
+ }
+
+ if (v > 0xff) {
+ errorat(lx->pos, "overflowed character literal");
+ v = 0;
+ }
+ tok.kind = Alit | Vchar;
+ tok.val.c = v;
+ goto Return;
+
+ case '"':
+ s = str·makecap("", 0, 8);
+ for (;;) {
+ if (escapechar(lx, '"', 0, 1, &v))
+ break;
+
+ if (v & (RuneMask + 1))
+ str·appendbyte(&s, v);
+ else {
+ r = v;
+ b = utf8·runelen(r);
+ utf8·runetobyte(lx->buf, &r);
+ str·appendlen(&s, b, lx->buf);
+ }
+ }
+ tok.kind = Alit | Vstr;
+ tok.val.s = s;
+ intern(&tok.val.s);
+
+ str·free(s);
+ goto Return;
+
+ TLstr:
+ s = str·makecap("", 0, 8);
+ // NOTE: this violates strict aliasing
+ for (;;) {
+ if (escapechar(lx, '"', 1, 0, &v))
+ break;
+ str·appendlen(&s, sizeof(wchar_t), (byte*)&v);
+ }
+ tok.kind = Alit | Vwstr;
+ tok.val.s = s;
+ intern(&tok.val.s);
+
+ str·free(s);
+ goto Return;
+
+ case '.':
+ tok.kind = Adot;
+ b = getbyte(lx);
+
+ if (isdigit(b)) {
+ // *lx->b++ = b;
+ goto Tflt;
+ } else if (b == '.') {
+ b = getbyte(lx);
+ if (b != '.') {
+ errorat(lx->pos, "invalid token '..'");
+ tok.kind = Aellip;
+ break;
+ }
+ }
+ ungetbyte(lx);
+ goto Return;
+
+ case '<':
+ tok.kind = Alt;
+ b = getbyte(lx);
+
+ if (b == '<') {
+ tok.kind = Alsft;
+ b = getbyte(lx);
+ if (b == '=')
+ tok.kind = Alsftasn;
+ else
+ ungetbyte(lx);
+ } else if (b == '=')
+ tok.kind = Alteq;
+ else
+ ungetbyte(lx);
+ goto Return;
+
+ case '>':
+ tok.kind = Agt;
+ b = getbyte(lx);
+
+ if (b == '>') {
+ tok.kind = Arsft;
+ b = getbyte(lx);
+ if (b == '=')
+ tok.kind = Arsftasn;
+ else
+ ungetbyte(lx);
+ } else if (b == '=')
+ tok.kind = Agteq;
+ else
+ ungetbyte(lx);
+ goto Return;
+
+ case '/':
+ tok.kind = Adiv;
+ b = getbyte(lx);
+
+ if (b == '=')
+ tok.kind = Adivasn;
+ else if (b == '/') {
+ while (b != EOF && b != '\n')
+ b = getbyte(lx);
+ goto Dispatch;
+ } else if (b == '*') {
+ int level = 1;
+ b = getbyte(lx);
+ while (b != EOF && level > 0) {
+ if (b == '/') {
+ b = getbyte(lx);
+ if (b == '*')
+ level++;
+ } else if (b == '*') {
+ b = getbyte(lx);
+ if (b == '/')
+ level--;
+ }
+ if (b == '\n')
+ lx->pos.line++;
+ b = getbyte(lx);
+ }
+ goto Dispatch;
+ } else
+ ungetbyte(lx);
+ goto Return;
+
+ case '#':
+ if (domacro(lx)) {
+ tok.kind = Anil;
+ errorat(lx->pos, "failed to perform preprocessor directive");
+ return tok;
+ }
+ goto GetByte;
+
+ case EOF:
+ popio(lx);
+ if (lx->io)
+ goto GetByte;
+ tok.kind = Aeof;
+ goto Return;
+
+ CASE1('(', Alparen);
+ CASE1(')', Arparen);
+ CASE1('{', Albrace);
+ CASE1('}', Arbrace);
+ CASE1('[', Albrakt);
+ CASE1(']', Arbrakt);
+ CASE1(',', Acomma);
+ CASE1('?', Aqmark);
+ CASE1(';', Asemi);
+ CASE1('~', Aneg);
+ CASE1(':', Acolon);
+ CASE2('^', Axor, '=', Axorasn);
+ CASE2('!', Anot, '=', Aneq);
+ CASE2('*', Astar,'=', Amulasn);
+ CASE2('=', Aasn, '=', Aeq);
+ CASE2('%', Amod, '=', Amodasn);
+ CASE3('+', Aadd, '=', Aaddasn, '+', Ainc);
+ CASE3('&', Aand, '=', Aandasn, '&', Aandand);
+ CASE3('|', Aor, '=', Aorasn, '|', Aoror);
+ CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow);
+
+ Tnum:
+ e = lx->buf + arrlen(lx->buf);
+ do {
+ if (lx->b >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ *lx->b++ = b;
+ } while (b = getbyte(lx), isdigit(b) || b == '_');
+
+ if (b == '.' || tolower(b) == 'e')
+ goto Tflt;
+ Tint:
+ n = 10;
+ s = lx->buf;
+ if (*s == '0') {
+ switch (b) {
+ case 'x': n = 16; break;
+ case 'b': n = 2; break;
+ case 'o': n = 8; break;
+ default: goto Rint;
+ }
+ lx->b = s;
+ /* reparse number, now with base info */
+ while (b = getbyte(lx), (isdigit(b) ||
+ ('a' <= b && b <= 'f') ||
+ ('A' <= b && b <= 'F') ||
+ b == '_'))
+ *lx->b++ = b;
+ }
+ Rint:
+ v = 0;
+ r = b;
+ for (; s != lx->b ; s++) {
+ b = *s;
+ if (b == '_') continue;
+
+ f = Atoi[b];
+ if (f == 0 && b != '0')
+ break;
+
+ if (f >= n) {
+ errorat(lx->pos, "digit '%c' out of range for base %d", b, n);
+ f = 0;
+ }
+
+ if (v > (UINT64_MAX - f) / n) {
+ errorat(lx->pos, "integer literal overflow");
+ v = 0;
+ break;
+ }
+
+ v = v * n + f;
+ }
+
+ b = r;
+ tok.kind = Alit;
+ tok.val.i = v;
+
+ if (b == 'u' || b == 'U') {
+ tok.kind |= Vun;
+ b = getbyte(lx);
+ }
+ if (b == 'l' || b == 'L') {
+ r = getbyte(lx);
+ if (r == 'l' || r == 'L') {
+ if (r != b)
+ errorat(lx->pos, "mismatched case on long long integer suffix");
+ tok.kind |= Vvlong;
+ r = getbyte(lx);
+ } else
+ tok.kind |= Vlong;
+
+ if (r == 'u' || r == 'U') {
+ if (tok.kind & Vun)
+ errorat(lx->pos, "multiple unsigned designators on integer suffix");
+ tok.kind |= Vun;
+ goto Return;
+ }
+
+ ungetbyte(lx);
+ goto Return;
+ }
+
+ tok.kind |= Vint;
+ ungetbyte(lx);
+ goto Return;
+
+ Tflt:
+ if (b == '.') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ }
+
+ while (isdigit(b)) {
+ *lx->b++ = b;
+
+ if (lx->b >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ }
+
+ if (tolower(b) == 'e') {
+ b = getbyte(lx);
+ if (b == '-' || b == '+')
+ b = getbyte(lx);
+
+ if (!isdigit(b))
+ errorat(lx->pos, "expected number after exponent, found %c", b);
+
+ do {
+ *lx->b++ = b;
+ } while (b = getbyte(lx), isdigit(b));
+ }
+ *lx->b = '\0';
+ d = strtod(lx->buf, nil);
+ ungetbyte(lx);
+
+ tok.kind = Alit | Vfloat;
+ tok.val.f = d;
+
+ goto Return;
+
+ Talpha:
+ s = lx->buf;
+ e = lx->buf + arrlen(lx->buf);
+ for (;;) {
+ if (s >= e) {
+ errorat(lx->pos, "identifier too long for buffer: %s", s);
+ goto Nospace;
+ }
+ if (b != EOF && b >= RuneSelf) {
+ ungetbyte(lx);
+ r = getrune(lx);
+ if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) {
+ errorat(lx->pos, "invalid identifier character %d", r);
+ }
+ s += utf8·runetobyte(s, &r);
+ } else if (!isalnum(b) && b != '_')
+ break;
+ else
+ *s++ = b;
+ b = getbyte(lx);
+ }
+ *s = '\0';
+ ungetbyte(lx);
+
+ tok.kind = Aident;
+ tok.val.s = lx->buf;
+
+ n = intern(&tok.val.s);
+ if (n < arrlen(keywords)) {
+ tok.kind = Akeywd;
+ tok.val.i = n;
+ goto Return;
+ }
+
+ sym = lookup(&lx->sym, tok.val.s);
+ if (sym && ((uintptr)sym->name != (uintptr)lx->io->path)) {
+ if ((uintptr)sym == lx->macline) {
+ tok.kind = Alit | Vint;
+ tok.val.i = lx->pos.line;
+ goto Return;
+ }
+ if ((uintptr)sym == lx->macfile) {
+ tok.kind = Alit | Vstr;
+ tok.val.s = lx->pos.path;
+ goto Return;
+ }
+ io = makeio(lx, sym->name);
+ io->rdr.end += expandmacro(lx, sym, io->b);
+ printf("EXPANDED %s: %s\n", sym->name, io->rdr.beg);
+ *io->rdr.end++ = EOF;
+ pushio(lx, io);
+ goto GetByte;
+ }
+ goto Return;
+
+ default:
+ tok.kind = Anil;
+ errorat(lx->pos, "invalid token, crashing");
+ abort();
+ }
+
+Return:
+ lx->b = lx->buf;
+ return tok;
+
+Nospace:
+ panicf("aborting compilation");
+ exit(1);
+}
+
+#undef CASE4
+#undef CASE3
+#undef CASE2
+#undef CASE1
+
+// -----------------------------------------------------------------------
+// symbol tables
+
+#define PTR_HASH(p) (uintptr)(p)
+#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
+
+static
+void
+·free(void* _, void* ptr) {
+ return free(ptr);
+}
+
+static
+void *
+·alloc(void* _, uint n, ulong size) {
+ return malloc(n*size);
+}
+
+static
+void *
+·calloc(void* _, uint n, ulong size) {
+ return calloc(n, size);
+}
+
+static
+int
+moresymtab(SymTab *tab, int n)
+{
+ MAP_GROW(tab, string, Sym*, n, PTR_HASH, sys·Memory, nil);
+}
+
+static
+int
+putsym(SymTab *tab, Sym *sym, error *err)
+{
+ MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
+}
+
+Sym*
+define(SymTab *tab, string name, uint32 kind)
+{
+ int i;
+ Sym *sym;
+ error err;
+
+ sym = mem·arenaalloc(C.heap, 1, sizeof(*sym));
+ sym->name = name;
+ sym->kind = kind;
+
+ i = putsym(tab, sym, &err);
+ tab->vals[i] = sym;
+
+ return sym;
+}
+
+Sym*
+lookup(SymTab *tab, string ident)
+{
+ int idx;
+ MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
+
+ if (idx < tab->n_buckets)
+ return tab->vals[idx];
+
+ return nil;
+}
+
+
+error
+forget(SymTab *tab, string ident)
+{
+ int idx;
+ MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
+
+ if (idx < tab->n_buckets) {
+ MAP_DEL(tab, idx);
+ return 0;
+ }
+ return 1;
+}
+
+void
+forgetall(SymTab *tab)
+{
+ MAP_RESET(tab);
+}
diff --git a/src/cmd/cc/pp.c b/src/cmd/cc/pp.c
new file mode 100644
index 0000000..57c3501
--- /dev/null
+++ b/src/cmd/cc/pp.c
@@ -0,0 +1,1125 @@
+#include "cc.h"
+
+// -----------------------------------------------------------------------
+// helper functions
+
+static
+void
+pushomit(Lexer *lx, string omit)
+{
+ if (lx->omit.len == lx->omit.cap) {
+ lx->omit.cap += 20;
+ lx->omit.path = realloc(lx->omit.path, lx->omit.cap*sizeof(*lx->omit.path));
+ }
+ lx->omit.path[lx->omit.len++] = omit;
+}
+
+// NOTE: The iterator of lexer lx->b IS NOT reset.
+// Its the caller's responsibility.
+static
+string
+ident(Lexer *lx)
+{
+ int b;
+ byte *s;
+
+ b = getnsbyte(lx);
+ if (!isalpha(b) && b != '_' && b < RuneSelf) {
+ ungetbyte(lx);
+ return "";
+ }
+
+ s = lx->b;
+ for (;;) {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ if (isalnum(b) || b == '_' || b >= RuneSelf)
+ continue;
+ ungetbyte(lx);
+ break;
+ }
+ *lx->b++ = '\0';
+
+ return s;
+}
+
+static
+string
+identdots(Lexer *lx, int *dots)
+{
+ int c;
+ byte *s;
+
+ s = ident(lx);
+ if (*s != '\0')
+ return s;
+
+ c = getnsbyte(lx);
+ if (c != '.') {
+ ungetbyte(lx);
+ return s;
+ }
+
+ if (getbyte(lx) != '.' || getbyte(lx) != '.')
+ errorat(lx->pos, "incorrect '...' token in macro");
+
+ *dots = 1;
+ // TODO: should only run intern once...
+ s = "__VA_ARGS__";
+ intern(&s);
+ return s;
+}
+
+static
+Sym*
+defmacro(Lexer *lx, string name, string macro)
+{
+ Sym *mac;
+
+ // printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path));
+ mac = define(&lx->sym, name, Smacro);
+ mac->macro = macro;
+
+ return mac;
+}
+
+static vlong evalmacro(Lexer *lx, byte prec);
+
+static
+vlong
+opand(Lexer *lx)
+{
+ int b;
+ vlong v;
+ string s;
+ Token tok;
+ Sym *sym;
+
+ b = getnsbyte(lx);
+ if (b == '\n') {
+ errorat(lx->pos, "new line in macro expression");
+ return 0;
+ }
+ ungetbyte(lx);
+
+ tok = lex(lx);
+
+ switch (tok.kind & Vmask) {
+ case Aneg:
+ return ~opand(lx);
+
+ case Anot:
+ return !opand(lx);
+
+ case Alparen:
+ v = evalmacro(lx, 1);
+ tok = lex(lx);
+ if (!(tok.kind & Arparen)) {
+ errorat(lx->pos, "unbalanced parenthesis in macro expression");
+ return 0;
+ }
+ return v;
+
+ case Alit:
+ switch (tok.kind & ~Vmask) {
+ case Vint: case Vlong: case Vvlong:
+ return tok.val.i;
+ case Vun|Vint : case Vun|Vlong : case Vun|Vvlong:
+ return tok.val.ui;
+ case Vrune:
+ return tok.val.r;
+ case Vchar:
+ return tok.val.c;
+ default:
+ errorat(lx->pos, "invalid literal of type '%s' in conditional macro", tokens[tok.kind & ~Vmask]);
+ return 0;
+ }
+
+ case Aident:
+ sym = lookup(&lx->sym, tok.val.s);
+ if (!sym) {
+ /* calling lex directly would expand the operand here
+ * manually lex the result
+ */
+ if (strcmp(tok.val.s, "defined") == 0) {
+ b = getnsbyte(lx);
+ if (b == '\n') {
+ errorat(lx->pos, "new line in defined operand");
+ return 0;
+ }
+ s = lx->buf;
+ if (b == '(') {
+ b = getnsbyte(lx);
+ while (b != ')') {
+ if (b == '\n') {
+ errorat(lx->pos, "new line inside defined operand");
+ return 0;
+ }
+ if (b == '(') {
+ errorat(lx->pos, "nested parens not allowed inside defined operator");
+ return 0;
+ }
+ if (!isspace(b))
+ *s++ = b;
+ b = getbyte(lx);
+ }
+ } else {
+ while (!isspace(b)) {
+ *s++ = b;
+ b = getbyte(lx);
+
+ if (b == '\n') {
+ errorat(lx->pos, "new line inside defined operand");
+ return 0;
+ }
+ }
+ }
+ *s = '\0';
+ s = lx->buf;
+ intern(&s);
+ return lookup(&lx->sym, s) != nil;
+ }
+ return 0;
+ }
+ panicf("unreachable");
+ return 1;
+
+ default:
+ errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]);
+ return 0;
+ }
+}
+
+// recursively evaluates a macro
+// reduced set of operators allowed here
+static
+vlong
+evalmacro(Lexer *lx, byte prec)
+{
+ int b;
+ vlong l, r;
+ Token tok;
+
+ l = opand(lx);
+ for (;;) {
+ b = getnsbyte(lx);
+ // NOTE: Either this or we pass in what are stopping byte is
+ // New line should always stop us...
+ // Is there any case where we SHOULDN'T STOP ON ')'?
+ if (b == '\n' || b == ')') {
+ ungetbyte(lx);
+ break;
+ }
+ ungetbyte(lx);
+
+ tok = lex(lx);
+ // simplified jump table of precedence
+ // unpacked to evaluate inline
+ switch (tok.kind & Vmask) {
+ case Astar:
+ if (prec > 10) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 10 + 1);
+ l = l * r;
+ continue;
+
+ case Adiv:
+ if (prec > 10) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 10 + 1);
+ l = l / r;
+ continue;
+
+ case Amod:
+ if (prec > 10) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 10 + 1);
+ l = l % r;
+ continue;
+
+ case Aadd:
+ if (prec > 9) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 9 + 1);
+ l = l + r;
+ continue;
+
+ case Asub:
+ if (prec > 9) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 9 + 1);
+ l = l - r;
+ continue;
+
+ case Alsft:
+ if (prec > 8) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 8 + 1);
+ l = l << r;
+ continue;
+
+ case Arsft:
+ if (prec > 8) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 8 + 1);
+ l = l >> r;
+ continue;
+
+ case Alt:
+ if (prec > 7) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l < r;
+ continue;
+
+ case Agt:
+ if (prec > 7) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l > r;
+ continue;
+
+ case Agteq:
+ if (prec > 7) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l >= r;
+ continue;
+
+ case Alteq:
+ if (prec > 7) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l >= r;
+ continue;
+
+ case Aeq:
+ if (prec > 6) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 6 + 1);
+ l = l == r;
+ continue;
+
+ case Aneq:
+ if (prec > 6) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 6 + 1);
+ l = l != r;
+ continue;
+
+ case Aand:
+ if (prec > 5) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 5 + 1);
+ l = l & r;
+ continue;
+
+ case Axor:
+ if (prec > 4) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 4 + 1);
+ l = l ^ r;
+ continue;
+
+ case Aor:
+ if (prec > 3) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 3 + 1);
+ l = l | r;
+ continue;
+
+ case Aandand:
+ if (prec > 2) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 2 + 1);
+ l = l && r;
+ continue;
+
+ case Aoror:
+ if (prec > 1) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 1 + 1);
+ l = l || r;
+ continue;
+
+ default:
+ errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]);
+ abort();
+ return 0;
+ }
+ }
+
+ return l;
+}
+
+// -----------------------------------------------------------------------
+// preprocessor magic numbers
+
+enum
+{
+ PPbeg = 0x02,
+ PParg = 0x03,
+ PPcat = 0x04,
+ PPstr = 0x05,
+
+ PPnarg = 30,
+};
+
+#define PPvar 0x80u
+
+// -----------------------------------------------------------------------
+// preprocessor functions
+
+/* #endif */
+static
+error
+ppend(Lexer *lx)
+{
+ int b;
+ do {
+ b = getnsbyte(lx);
+ } while (b > 0 && b != '\n');
+
+ if (b == '\n')
+ lx->pos.line++;
+
+ return 0;
+}
+
+
+/* #undef */
+static
+error
+ppund(Lexer *lx)
+{
+ string s;
+ error err;
+
+ s = ident(lx);
+ intern(&s);
+ lx->b = lx->buf;
+
+ err = forget(&lx->sym, s);
+ if (err)
+ warnat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s);
+
+ ppend(lx);
+ return 0;
+}
+
+/* #define */
+static
+error
+ppdef(Lexer *lx)
+{
+ int b;
+ Sym *sym;
+ int i, j, n, dot;
+ string s, a, base, end, buf, args[PPnarg];
+
+ s = ident(lx);
+ if (!s) {
+ errorat(lx->pos, "failed to parse defined identifer");
+ goto Bad;
+ }
+ intern(&s);
+ printf("DEFINING %s\n", s);
+ lx->b = lx->buf;
+
+ sym = lookup(&lx->sym, s);
+ if (sym)
+ warnat(lx->pos, "macro redefined: '%s'", sym->name);
+
+ n = 0;
+ dot = 0;
+ b = getbyte(lx);
+ if (b == '(') {
+ b = getnsbyte(lx);
+ if (b != ')') {
+ ungetbyte(lx);
+ for (;;) {
+ // NOTE: This is a pointer into the lx->buffer.
+ // Can't reset lx->b while we hold the args!
+ a = identdots(lx, &dot);
+ if (a == nil) {
+ errorat(lx->pos, "macro syntax error: improper argument");
+ goto Bad;
+ }
+ if (n >= PPnarg) {
+ errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg);
+ goto Bad;
+ }
+
+ args[n++] = a;
+ b = getnsbyte(lx);
+
+ if (b == ')')
+ break;
+ if (b != ',') {
+ errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b);
+ goto Bad;
+ }
+ }
+ }
+ b = getbyte(lx);
+ }
+
+ if (isspace(b))
+ if (b != '\n')
+ b = getnsbyte(lx);
+
+ base = lx->b;
+ end = lx->buf + arrlen(lx->buf);
+ if (base >= end) {
+ errorat(lx->pos, "out of macro buffer space!");
+ goto Bad;
+ }
+ buf = str·makef("%c%c", n, PPbeg);
+ for (;;) {
+ if (isalpha(b) || b == '_') {
+ lx->b = base;
+ *lx->b++ = b;
+
+ b = getbyte(lx);
+ while (isalnum(b) || b == '_') {
+ *lx->b++ = b;
+ if (lx->b >= end) {
+ errorat(lx->pos, "out of macro buffer space!");
+ goto Bad;
+ }
+ b = getbyte(lx);
+ }
+ *lx->b++ = '\0';
+
+ for (i = 0; i < n; i++) {
+ if (strcmp(base, args[i]) == 0) {
+ goto Arg;
+ }
+ }
+ str·appendlen(&buf, (lx->b - base - 1), base);
+ continue;
+ Arg:
+ str·appendbyte(&buf, PParg);
+ str·appendbyte(&buf, 'a' + i);
+ continue;
+ }
+
+ if (b == '/') {
+ b = getbyte(lx);
+ if (b == '/') {
+ while (b = getbyte(lx), b != '\n');
+ continue;
+ }
+ if (b == '*') {
+ b = getbyte(lx);
+ for (;;) {
+ if (b == '*') {
+ b = getbyte(lx);
+ if (b != '/')
+ continue;
+ b = getbyte(lx);
+ break;
+ }
+ if (b == '\n') {
+ errorat(lx->pos, "comment and newline found in define statement of %s", s);
+ break;
+ }
+ b = getbyte(lx);
+ }
+ continue;
+ }
+ str·appendbyte(&buf, '/');
+ continue;
+ }
+
+ if (b == '\\') {
+ b = getbyte(lx);
+ /* unix */
+ if (b == '\n') {
+ lx->pos.line++;
+ b = getbyte(lx);
+ continue;
+ }
+ /* windows */
+ if (b == '\r') {
+ b = getbyte(lx);
+ if (b == '\n') {
+ lx->pos.line++;
+ b = getbyte(lx);
+ continue;
+ }
+ }
+ str·appendbyte(&buf, '\\');
+ }
+ if (b == '\n') {
+ lx->pos.line++;
+ break;
+ }
+
+ if (b == '#') {
+ b = getnsbyte(lx);
+ if (b == '#') {
+ str·appendbyte(&buf, PPcat);
+ b = getbyte(lx);
+ continue;
+ }
+
+ lx->b = base;
+ while (isalnum(b) || b == '_') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ }
+ *lx->b = '\0';
+
+ for (i = 0; i < n; i++) {
+ if (strcmp(base, args[i]) == 0)
+ goto Str;
+ }
+ errorat(lx->pos, "macro operator '#' must be followed by a valid variable identifier");
+ goto Bad;
+ Str:
+ str·appendbyte(&buf, PPstr);
+ str·appendbyte(&buf, 'a' + i);
+ continue;
+ }
+
+ str·appendbyte(&buf, b);
+ b = getbyte(lx);
+ if (b == EOF) {
+ errorat(lx->pos, "eof found in macro '%s'", s);
+ goto Bad;
+ }
+ }
+ if (dot)
+ *buf |= PPvar;
+
+ lx->b = lx->buf;
+ sym = defmacro(lx, s, buf);
+ return 0;
+Bad:
+ errorat(lx->pos, "failed parse of #define macro '%s'", s);
+ lx->b = lx->buf;
+ ppend(lx);
+ return 1;
+}
+
+/* macro expansion */
+int
+expandmacro(Lexer *lx, Sym *s, byte *dst)
+{
+ int n, lv, nargs, dots;
+ byte b, *it, *e, *arg[PPnarg];
+
+ /* not a function macro */
+ if (s->macro[0] == '\0') {
+ if (s->macro[1] != PPbeg) {
+ errorat(lx->pos, "malformed macro");
+ goto Bad;
+ }
+ strcpy(dst, s->macro + 2);
+ return str·len(s->macro)-2;
+ }
+ dots = (ubyte)s->macro[0] & PPvar;
+ nargs = (ubyte)s->macro[0] & (~PPvar);
+
+ b = getnsbyte(lx);
+ if (b != '(') {
+ errorat(lx->pos, "macro function not given arguments");
+ goto Bad;
+ }
+
+ n = 0;
+ b = getbyte(lx);
+ if (b != ')') {
+ ungetbyte(lx);
+ lv = 0;
+ lx->b = lx->buf;
+ e = lx->buf + arrlen(lx->buf) - 4;
+ arg[n++] = lx->buf;
+ for (;;) {
+ if (lx->b >= e)
+ goto Nospace;
+ b = getbyte(lx);
+ if (b == '"')
+ for (;;) {
+ if (lx->b >= e)
+ goto Nospace;
+ *lx->b++ = b;
+ b = getbyte(lx);
+ if (b == '\\') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ continue;
+ }
+ if (b == '\n') {
+ errorat(lx->pos, "newline found in arguments: macro '%s'", s->name);
+ goto Bad;
+ }
+ if (b == '"')
+ break;
+ }
+ if (b == '\'')
+ for (;;) {
+ if (lx->b >= e)
+ goto Nospace;
+ *lx->b++ = b;
+ b = getbyte(lx);
+ if (b == '\\') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ continue;
+ }
+ if (b == '\n') {
+ errorat(lx->pos, "newline found in arguments: macro '%s'", s->name);
+ goto Bad;
+ }
+ if (b == '"')
+ break;
+ }
+ if (b == '/') {
+ b = getbyte(lx);
+ switch(b) {
+ case '*':
+ for (;;) {
+ b = getbyte(lx);
+ if (b == '*') {
+ b = getbyte(lx);
+ if (b == '/')
+ break;
+ }
+ }
+ *lx->b++ = ' ';
+ continue;
+ case '/':
+ while ((b = getbyte(lx)) != '\n')
+ ;
+ break;
+
+ default:
+ ungetbyte(lx);
+ b = '/';
+ }
+ }
+ if (lv == 0) {
+ if (b == ',') {
+ if (n == nargs && dots) {
+ *lx->b++ = ',';
+ continue;
+ }
+ *lx->b++ = '\0';
+ arg[n++] = lx->b;
+ if (n > nargs)
+ break;
+ continue;
+ }
+ if (b == ')')
+ break;
+ }
+ if (b == '\n')
+ b = ' ';
+ *lx->b++ = b;
+ if (b == '(')
+ lv++;
+ if (b == ')')
+ lv--;
+ }
+ *lx->b = '\0';
+ }
+
+ if (n != nargs) {
+ errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name);
+ *dst = '\0';
+ goto Bad;
+ }
+
+ if (s->macro[1] != PPbeg) {
+ errorat(lx->pos, "corrupted macro buffer: %s", s->name);
+ *dst = '\0';
+ goto Bad;
+ }
+
+ it = s->macro+2;
+ e = dst;
+ for (;;) {
+ b = *it++;
+ if (b == '\n')
+ b = ' ';
+ switch (b) {
+ case PParg:
+ b = *it++;
+ b -= 'a';
+ if (b < 0 && b > n) {
+ errorat(lx->pos, "malformed macro index: %s", s->name);
+ goto Bad;
+ }
+ strcpy(dst, arg[b]);
+ dst += strlen(arg[b]);
+
+ break;
+
+ case PPstr:
+ b = *it++;
+ b -= 'a';
+ if (b < 0 && b > n) {
+ errorat(lx->pos, "malformed macro index: %s", s->name);
+ goto Bad;
+ }
+ *dst++ = '"';
+ strcpy(dst, arg[b]);
+ *dst++ = '"';
+
+ break;
+
+ case PPcat:
+ continue;
+
+ case '\0':
+ goto End;
+
+ default:
+ *dst++ = b;
+ continue;
+ }
+ }
+End:
+ *dst = '\0';
+ return dst - e;
+Nospace:
+ errorat(lx->pos, "out of memory during macro expansion %s", s->name);
+Bad:
+ ppend(lx);
+ lx->b = lx->buf;
+ errorat(lx->pos, "failed to expand macro %s", s->name);
+ return -1;
+}
+
+/* #include */
+static
+error
+ppinc(Lexer *lx)
+{
+ int i;
+ byte b, end;
+ string s;
+
+ Stream *f;
+ Io *io;
+
+ b = getnsbyte(lx);
+ if (b != '"') {
+ end = b;
+ if (b != '<') {
+ errorat(lx->pos, "unrecognized token '%c' in include directive", b);
+ goto Bad;
+ }
+ end = '>';
+ } else
+ end = '"';
+
+ lx->b = lx->buf;
+ for (;;) {
+ b = getbyte(lx);
+ if (b == end)
+ break;
+ if (b == '\n') {
+ errorat(lx->pos, "hit end of line before include directive completed");
+ goto Bad;
+ }
+ *lx->b++ = b;
+ }
+ *lx->b = '\0';
+ s = lx->buf;
+ intern(&s); // NOTE: we could use this to see if we already have the file
+
+ lx->b = lx->buf;
+ for (i = 0; i < C.inc.len; i++) {
+ if (i == 0 && end == '>')
+ continue;
+
+ strcpy(lx->buf, C.inc.dir[i]);
+ strcat(lx->buf, "/");
+
+ if (strcmp(lx->buf, "./") == 0)
+ lx->buf[0] = '\0';
+ strcat(lx->buf, s);
+
+ if (os·exists(lx->buf, ReadOK)) {
+ break;
+ }
+ }
+ if (i == C.inc.len) {
+ errorat(lx->pos, "could not find file '%s' on standard include search path", s);
+ goto Bad;
+ }
+
+ io = openio(lx, lx->buf);
+ if (io != nil) {
+ pushio(lx, io);
+ }
+
+ return 0;
+
+Bad:
+ ungetbyte(lx);
+ lx->b = lx->buf;
+ errorat(lx->pos, "failed include");
+ ppend(lx);
+ return 1;
+}
+
+/* #pragma */
+static
+error
+ppprag(Lexer *lx)
+{
+ string s;
+
+ s = ident(lx);
+ if (s == nil) {
+ errorat(lx->pos, "failed to parse pragma identifier");
+ goto Bad;
+ }
+ lx->b = lx->buf;
+ if (strcmp(s, "once") == 0) {
+ pushomit(lx, lx->io->path);
+ return 0;
+ }
+Bad:
+ lx->b = lx->buf;
+ errorat(lx->pos, "unrecognized pragma '%s'", s);
+ ppend(lx);
+ return 1;
+}
+
+/* all #if statements */
+static
+error
+ppif(Lexer *lx, int f)
+{
+ Sym *sym;
+ string s;
+ int c, l, b;
+
+Eval:
+ if (f == 0) {
+ b = evalmacro(lx, 1);
+ if (b) {
+ ppend(lx);
+ return 0;
+ }
+ goto Skip;
+ }
+
+ if (f == 1)
+ goto Skip;
+
+ s = ident(lx);
+ if (s == nil) {
+ errorat(lx->pos, "failed to parse preprocessor identifier");
+ goto Bad;
+ }
+ intern(&s);
+ lx->b = lx->buf;
+
+ sym = lookup(&lx->sym, s);
+ if ((!sym && (f == 3)) || (sym && (f == 2)))
+ return 0;
+
+Skip:
+ b = 1;
+ l = 0;
+ for (;;) {
+ c = getbyte(lx);
+ if (c != '#') {
+ if (!isspace(c))
+ b = 0;
+ if (c == '\n') {
+ lx->pos.line++;
+ b = 1;
+ }
+ if (c == EOF) {
+ errorat(lx->pos, "EOF hit while skipping if block. Missing endif");
+ goto Bad;
+ }
+ continue;
+ }
+ if (!b)
+ continue;
+ s = ident(lx);
+ lx->b = lx->buf;
+ if (!s)
+ continue;
+
+ if (l == 0 && (strcmp(s, "elif") == 0)) {
+ f = 0;
+ goto Eval;
+ }
+
+ if (strcmp(s, "endif") == 0) {
+ if (l) {
+ l--;
+ continue;
+ }
+ ppend(lx);
+ return 0;
+ }
+ if (strcmp(s, "if") == 0 ||
+ strcmp(s, "ifdef") == 0 ||
+ strcmp(s, "ifndef") == 0) {
+ l++;
+ continue;
+ }
+
+ if (l == 0 && f != 1 && strcmp(s, "else") == 0) {
+ return 0;
+ }
+ }
+
+Bad:
+ lx->b = lx->buf;
+ errorat(lx->pos, "bad syntax in preprocessor conditional directive");
+ ppend(lx);
+ return 1;
+}
+
+/* #if */
+static
+error
+ppif0(Lexer *lx)
+{
+ return ppif(lx, 0);
+}
+
+/* #else */
+static
+error
+ppif1(Lexer *lx)
+{
+ return ppif(lx, 1);
+}
+
+/* #ifdef */
+static
+error
+ppif2(Lexer *lx)
+{
+ return ppif(lx, 2);
+}
+
+/* #ifndef */
+static
+error
+ppif3(Lexer *lx)
+{
+ return ppif(lx, 3);
+}
+
+// -----------------------------------------------------------------------
+// dispatch function
+
+#define DIRECTIVE(a, b, c) c,
+error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES };
+#undef DIRECTIVE
+
+/* reads an identifier into the lexer's buffer */
+/* caller must intern */
+
+error
+domacro(Lexer *lx)
+{
+ int n;
+ error err;
+ string s;
+
+ s = ident(lx);
+ intern(&s);
+ lx->b = lx->buf;
+ for (n = 0; n < NUM_DIRECTIVES; n++) {
+ if ((uintptr)s == (uintptr)directives[n]) {
+ goto Do;
+ }
+ }
+ errorat(lx->pos, "unrecognized directive name '%s'", s);
+ return 1;
+Do:
+ err = macros[n](lx);
+ return err;
+}
+
+error
+dodefine(Lexer *lx, string s)
+{
+ int n;
+ byte *c, *def;
+ Sym *sym;
+
+ strcpy(lx->buf, s);
+ c = strchr(lx->buf, '=');
+ if (c) {
+ *c++ = '\0';
+ sym = lookup(&lx->sym, lx->buf);
+ if (sym) {
+ errorf("redefinition of symbol '%s'", sym->name);
+ return 1;
+ }
+ sym = define(&lx->sym, lx->buf, Smacro);
+ n = strlen(c) + 2;
+ sym->macro = str·makelen("", n);
+ str·appendbyte(&sym->macro, '\0');
+ str·append(&sym->macro, c);
+ } else {
+ sym = lookup(&lx->sym, lx->buf);
+ if (sym) {
+ errorf("redefinition of symbol '%s'", sym->name);
+ return 1;
+ }
+ sym = define(&lx->sym, s, Smacro);
+ sym->macro = "\00\02";
+ }
+
+ return 0;
+}
diff --git a/src/cmd/cc/rules.mk b/src/cmd/cc/rules.mk
new file mode 100644
index 0000000..b7b4688
--- /dev/null
+++ b/src/cmd/cc/rules.mk
@@ -0,0 +1,21 @@
+include share/push.mk
+
+# local sources
+SRCS_$(d):=\
+ $(d)/pp.c\
+ $(d)/lex.c\
+ $(d)/ast.c\
+ $(d)/bits.c\
+ $(d)/cc.c
+TEST_$(d) :=
+
+# outputs
+BINS_$(d) := $(d)/cc
+
+include share/paths.mk
+
+# Local rules
+$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/libn/libn.a
+ $(COMPLINK)
+
+include share/pop.mk
diff --git a/src/cmd/cc/scratch.c b/src/cmd/cc/scratch.c
new file mode 100644
index 0000000..b37d9a5
--- /dev/null
+++ b/src/cmd/cc/scratch.c
@@ -0,0 +1,7 @@
+#define XXX(a, b, c) a ## b ## c
+
+int
+main()
+{
+ XXX(d, e, f);
+}
diff --git a/src/cmd/cc/util.c b/src/cmd/cc/util.c
new file mode 100644
index 0000000..cca16f2
--- /dev/null
+++ b/src/cmd/cc/util.c
@@ -0,0 +1,21 @@
+#include "cc.h"
+
+void
+·free(void* _, void* ptr) {
+ return free(ptr);
+}
+
+void *
+·alloc(void* _, uint n, ulong size) {
+ return malloc(n*size);
+}
+
+void *
+·calloc(void* _, uint n, ulong size) {
+ return calloc(n, size);
+}
+
+void *
+·realloc(void* _, void *ptr, uint n, ulong size) {
+ return realloc(ptr, n*size);
+}