From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/cmd/cc/ast.c | 2139 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/cmd/cc/bits.c | 114 +++ src/cmd/cc/cc.c | 409 ++++++++++ src/cmd/cc/cc.h | 806 +++++++++++++++++++ src/cmd/cc/lex.c | 873 ++++++++++++++++++++ src/cmd/cc/pp.c | 1125 ++++++++++++++++++++++++++ src/cmd/cc/rules.mk | 21 + src/cmd/cc/scratch.c | 7 + src/cmd/cc/util.c | 21 + 9 files changed, 5515 insertions(+) create mode 100644 src/cmd/cc/ast.c create mode 100644 src/cmd/cc/bits.c create mode 100644 src/cmd/cc/cc.c create mode 100644 src/cmd/cc/cc.h create mode 100644 src/cmd/cc/lex.c create mode 100644 src/cmd/cc/pp.c create mode 100644 src/cmd/cc/rules.mk create mode 100644 src/cmd/cc/scratch.c create mode 100644 src/cmd/cc/util.c (limited to 'src/cmd/cc') diff --git a/src/cmd/cc/ast.c b/src/cmd/cc/ast.c new file mode 100644 index 0000000..4330bcc --- /dev/null +++ b/src/cmd/cc/ast.c @@ -0,0 +1,2139 @@ +#include "cc.h" + +// ----------------------------------------------------------------------- +// helper macros + +#define alloc(ptr) (ptr) = mem·arenaalloc(C.heap, 1, sizeof *(ptr)) +#define copyarray(dst, arr, n) (dst) = mem·arenaalloc(C.heap, (n), sizeof *(arr)), memcpy((dst), (arr), n * sizeof *(arr)) +#define movearray(dst, arr, n) copyarray(dst,arr,n), free(arr) + +#define attop(prs) ((uintptr)prs->sp == (uintptr)prs->spstk) +#define peek(p, i) (p->tok[i]) +#define iskw(t, k) (((t).kind == Akeywd) && (t).val.i == (k)) +#define advance(p, l) (p->tok[0] = p->tok[1], p->tok[1] = lex(l), p->tok[0]) + +#define Bit(i) (1 << (i)) + +// ----------------------------------------------------------------------- +// helper functions + +static +string +nameof(Name *n) +{ + switch (n->kind) { + /* 0 corresponds to no state - i.e. an abstract name */ + case Nnil: + return nil; + case Nident: + return n->ident; + case Nparen: + return nameof(n->paren->name); + case Nindex: + case Ncall: + return nameof(n->sfx.name); + } + panicf("unreachable"); + return nil; +} + +static +void +openscope(Parser *p) +{ + if (++p->sp >= arrend(p->spstk)) + panicf("scope stack overflow"); +} + +/* + * TODO: save the symbol table with the ast node + * write a "copy(move)scope" + */ + +static +void +closescope(Parser *p) +{ + if (p->sp <= p->spstk) + panicf("scope stack underflow"); + + forgetall(&p->sp->objs); + forgetall(&p->sp->tags); + p->sp--; +} + +/* temporary stack helpers */ +static +Name* +getname(Parser *p) +{ + if (p->nm >= arrend(p->nmstk)) + panicf("name stack overflow"); + return p->nm++; +} + +static void putdtor(Parser *p, Dtor *dt); + +static +void +putname(Parser *p, Name *n) +{ + if (p->nm <= p->nmstk) + panicf("name stack underflow"); + + switch (n->kind) { + case Nnil: + case Nident: + break; + case Nparen: + putdtor(p, n->paren); + break; + case Nindex: + case Ncall: + putname(p, n->sfx.name); + break; + default: + panicf("unrecognized name kind"); + } + *p->nm-- = (Name){0}; +} + +static +Ptr* +getptr(Parser *p) +{ + if (p->pt >= arrend(p->ptstk)) + panicf("pointer stack overflow"); + + return p->pt++; +} + +static +void +putptr(Parser *p, Ptr *ptr) +{ + if (p->pt <= p->ptstk) + panicf("pointer stack underflow"); + + while ((ptr = ptr->link)) + putptr(p, ptr); + + *p->pt-- = (Ptr){0}; +} + + +static +Dtor* +getdtor(Parser *p) +{ + if (p->dt >= arrend(p->dtstk)) + panicf("dtor stack overflow"); + + p->dt->name = getname(p); + return p->dt++; +} + +static +void +putdtor(Parser *p, Dtor *dt) +{ + if (p->dt <= p->dtstk) + panicf("dtor stack underflow"); + + /* release the pointer overflow if we had to use it */ + if (p->dt->ptr.link) + putptr(p, p->dt->ptr.link); + + /* the dtor could encompass multiple names hierarchically */ + putname(p, dt->name); + *p->dt-- = (Dtor){0}; +} + +/* TODO: This will fail for forward declarations */ +static +void +declareobj(Parser *p, Decl *d) +{ + Sym *sym; + string ident; + uint32 kind; + struct Decls *link; + + switch (d->kind) { + case Dfunc: + case Dvar: + kind = Svar; + goto one; + case Dtype: + kind = Stype; + one: + ident = d->name; + break; + + case Dvars: + kind = Svar; + goto many; + case Dtypes: + kind = Stype; + many: + while (link = &d->list, link != nil) { + ident = link->name; + sym = lookup(&p->sp->objs, ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); + return; + } + sym = define(&p->sp->objs, ident, kind); + if (kind == Svar) + sym->obj = d; + else + sym->type = d->type; + } + break; + + default: + panicf("unrecognized node kind %d. expected declaration", d->kind); + } + sym = lookup(&p->sp->objs, ident); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in object space", ident); + return; + } + sym = define(&p->sp->objs, ident, kind); + if (kind == Svar) + sym->obj = d; + else + sym->type = d->type; +} + +/* enters the object identifier space */ +static +void +declareenum(Parser *p, int n, string *elts, Expr *vals) +{ + int i; + Sym *s; + + for (i = 0; i < n; i++) { + s = lookup(&p->sp->objs, elts[i]); + if (s) { + errorat(peek(p, 0).pos, "redeclaration of name %s in object space", elts[i]); + continue; + } + s = define(&p->sp->objs, elts[i], Senum); + s->val = vals + i; + } +} + +static +void +declaretag(Parser *p, uint32 t, string name) +{ + Sym *sym; + sym = lookup(&p->sp->tags, name); + if (sym) { + errorat(peek(p, 0).pos, "redeclaration of name '%s' in tag space", name); + return; + } + + sym = define(&p->sp->tags, name, Stype); + sym->type = t; +} + +static +Sym * +lookupobj(Parser *p, string ident) +{ + Sym *sym; + Scope *it; + + it = p->sp; + do { + sym = lookup(&it->objs, ident); + } while (sym == nil && --it >= p->spstk); + + return sym; +} + +static +Sym * +lookuptag(Parser *p, string ident) +{ + Sym *sym; + Scope *it; + + it = p->sp; + do { + sym = lookup(&it->tags, ident); + } while (sym == nil && --it >= p->spstk); + + return sym; +} + +static +int +nomatch(Token t, vlong kind) +{ + if (t.kind == kind) + return 0; + + if (t.kind == Akeywd) + errorat(t.pos, "expected token '%s', instead found keyword '%s'", tokens[kind], keywords[t.val.i]); + else + errorat(t.pos, "expected token '%s', instead found '%s'", tokens[kind], tokens[t.kind]); + return 1; +} + +// ----------------------------------------------------------------------- +// needed forward declarations + +static error spec(Parser *, Lexer *, uint64 *); +static uint32 basetype(Parser *, Lexer *, uint64 *s); +static string namedecl(Parser *, Lexer *, uint32 *, int); +static uint32 typename(Parser *, Lexer *, uint32 *); + +static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab); +static uint32 typeofdtor(Dtor *, uint32); + + +static Decl *decl(Parser *, Lexer *); + +static Expr *ternary(Parser *, Lexer *); +static Expr *expr(Parser *, Lexer *); + +static error blkstmt(Parser *, Lexer *, Stmt **); + + +// ----------------------------------------------------------------------- +// expressions + +#define MAKEX(x, state) alloc((x)), (x)->kind = X##state + +static +Expr* +primary(Parser *p, Lexer *lx) +{ + int k; + Expr *x; + Token t; + Pos b; + + t = peek(p, 0); + b = t.pos; + switch (k = (t.kind & Vmask)) { + case Aident: + MAKEX(x, ident); + x->pos.beg = b; + x->pos.end = lx->pos; + x->name = t.val.s; + break; + + case Alit: + MAKEX(x, lit); + x->pos.beg = b; + x->pos.end = lx->pos; + x->val.kind = t.kind & ~Vmask; + x->val.v = t.val; + break; + + case Alparen: + advance(p, lx); + x = expr(p, lx); + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "unterminated paren expression"); + goto Bad; + } + break; + + default: + panicf("unreachable"); + } + + advance(p, lx); + return x; +Bad: + errorat(lx->pos, "unable to parse operand expression"); + return nil; +} + +static +int +istypename(Parser *p, Token t) +{ + Sym *sym; + + if (t.kind == Akeywd && (Kconst <= t.val.i && t.val.i <= Kenum)) + return 1; + if (t.kind == Aident) { + sym = lookupobj(p, t.val.s); + return (sym != nil) && sym->kind == Stype; + } + + return 0; +} + +static Expr* initx(Parser *p, Lexer *lx); + +static +Expr* +initlist(Parser *p, Lexer *lx) +{ + Token t; + int c, n; + Expr *x, **a; + struct Key *k; + + MAKEX(x, initlist); + x->pos.beg = lx->pos; + x->init.n = 0; + if (t.kind == Arbrace) { + x->init.k = nil; + x->init.v = nil; + return x; + } + + c = 0; + n = 0; + a = nil; + k = nil; +Key0: + if (n >= c) { + c += 20; + k = realloc(k, c * sizeof(*k)); + a = realloc(a, c * sizeof(*a)); + } +Key1: + switch (t.kind) { + case Adot: + t = advance(p, lx); + if (t.kind != Aident) { + errorat(t.pos, "dot designator must be followed by identifier"); + goto Bad; + } + k[n++] = (struct Key) { + .kind = (uint32)x->init.n, + .s = t.val.s, + }; + t = advance(p, lx); + goto Key0; + + case Albrakt: + t = advance(p, lx); + k[n++] = (struct Key) { + .kind = (uint32)x->init.n | (1ULL << 32), + .x = expr(p, lx), + }; + t = peek(p, 0); + goto Key0; + + case Aeq: + t = advance(p, lx); + /* fallthrough */ + default: + a[x->init.n++] = initx(p, lx); + + t = peek(p, 0); + switch (t.kind) { + case Arbrace: + break; + case Acomma: + advance(p, lx); + /* fallthrough */ + default: + goto Key0; + } + break; + + case Acomma: + t = advance(p, lx); + break; + } + movearray(x->init.k, k, n); + movearray(x->init.v, a, x->init.n); + return x; +Bad: + errorat(t.pos, "could not parse initializer list"); + return nil; +} + +static +Expr* +initx(Parser *p, Lexer *lx) +{ + Expr *x; + Token t; + + t = peek(p, 0); + if (t.kind != Albrace) + return ternary(p, lx); + + advance(p, lx); + x = initlist(p, lx); + t = peek(p, 0); + if (nomatch(t, Arbrace)) { + errorat(t.pos, "unmatched brace in initializer list, found %s instead", tokens[t.kind]); + advance(p, lx); + } + + return x; +} + +static +Expr* +postfix(Parser *p, Lexer *lx) +{ + Pos b; + Token t; + int c, n; + uint32 type, qual; + Expr *x, *y, **a; + + t = peek(p, 0); + if (t.kind == Alparen) + if (istypename(p, peek(p, 1))) { + t = advance(p, lx); + type = typename(p, lx, &qual); + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "unmatched paren: found %s instead", tokens[t.kind]); + goto Bad; + } + t = advance(p, lx); + if (nomatch(t, Albrace)) { + errorat(lx->pos, "bad initializer list: found %s", tokens[t.kind]); + goto Bad; + } + + x = initlist(p, lx); + + t = peek(p, 0); + if (nomatch(t, Arbrace)) { + errorat(lx->pos, "unmatched brace: found %s instead", tokens[t.kind]); + goto Bad; + } + + x->type = type; + x->qual = qual; + return x; + } + + x = primary(p, lx); + t = peek(p, 0); + for (;;) { + b = x->pos.beg; + switch (t.kind) { + case Ainc: + MAKEX(y, postinc); + goto Postfix; + case Adec: + MAKEX(y, postdec); + Postfix: + y->pos.beg = b; + y->pos.end = lx->pos; + y->unary.post = x; + x = y, y = nil; + break; + + case Adot: + MAKEX(y, self); + goto Select; + case Aarrow: + MAKEX(y, selp); + Select: + t = advance(p, lx); + if (t.kind != Aident) { + errorat(t.pos, "invalid operand of selector expression"); + goto Bad; + } + y->pos.beg = b; + y->pos.end = lx->pos; + + y->idx.f = t.val.s; + y->idx.x = x; + x = y, y = nil; + break; + + case Albrakt: + t = advance(p, lx); + if (t.kind == Arbrakt) { + errorat(t.pos, "empty index expression"); + goto Bad; + } + MAKEX(y, index); + y->idx.x = x; + y->idx.i = expr(p, lx); + + t = peek(p, 0); + if (t.kind != Albrakt) { + errorat(t.pos, "malformed index expression"); + goto Bad; + } + + x = y, y = nil; + break; + + case Alparen: + t = advance(p, lx); + MAKEX(y, call); + y->call.fn = x; + y->pos.beg = b; + y->call.n = 0; + if (t.kind == Arparen) { + y->call.arg = nil; + goto Endfunc; + } + c = 0; + a = nil; + Arg: + if (y->call.n >= c) { + c += 20; + a = realloc(a, c * sizeof(*a)); + } + a[y->call.n++] = expr(p, lx); + t = peek(p, 0); + if (t.kind == Acomma) { + advance(p, lx); + goto Arg; + } + if (t.kind != Arparen) { + errorat(t.pos, "invalid token '%s' found in call argument"); + goto Bad; + } + movearray(y->call.arg, a, y->call.n); + Endfunc: + y->pos.end = lx->pos; + x = y, y = nil; + break; + + default: + return x; + } + t = advance(p, lx); + } + return x; +Bad: + errorat(lx->pos, "failed to parse primary expression"); + return nil; +} + +static +uint32 +typename(Parser *p, Lexer *lx, uint32 *spec) +{ + uint32 base; + uint64 s; + + base = basetype(p, lx, &s); + if (!base) { + errorat(lx->pos, "failed to parse type name specifiers"); + return 0; + } + *spec = (uint32)s; + namedecl(p, lx, &base, 1); + + return base; +} + +static Expr* cast(Parser *p, Lexer *lx); + +static +Expr* +unary(Parser *p, Lexer *lx) +{ + Expr *x; + Token t; + + t = peek(p, 0); + switch (t.kind) { + case Ainc: MAKEX(x, preinc); goto Prefix; + case Adec: MAKEX(x, predec); /* fallthrough */ + Prefix: + advance(p, lx); + x->pos.beg = t.pos; + x->unary.pre = unary(p, lx); + x->pos.end = x->unary.pre->pos.end; + return x; + + case Aneg: MAKEX(x, neg); goto Unary; + case Aand: MAKEX(x, ref); goto Unary; + case Anot: MAKEX(x, not); goto Unary; + case Astar: MAKEX(x, star); goto Unary; + case Aadd: MAKEX(x, plus); goto Unary; + case Asub: MAKEX(x, minus); /* fallthrough */ + Unary: + advance(p, lx); + x->pos.beg = t.pos; + x->unary.pre = cast(p, lx); + x->pos.end = x->unary.pre->pos.end; + return x; + + case Akeywd: + switch (t.val.i) { + case Ksizeof: + MAKEX(x, sizeof); + goto Key; + case Kalignof: + MAKEX(x, alignof); + /* fallthrough */ + Key: + t = advance(p, lx); + if (t.kind == Alparen) + if (istypename(p, peek(p, 1))) { + t = advance(p, lx); + x->info.type = 0; + x->info.of.type = typename(p, lx, &x->info.of.qual); + + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(t.pos, "missing paren for size/alignof statement"); + goto Bad; + } + advance(p, lx); + return x; + } + + x->info.type = 1; + x->info.x = unary(p, lx); + return x; + + default: + ; + } + /* fallthrough */ + default: + return postfix(p, lx); + } +Bad: + return nil; +} + +static +Expr* +cast(Parser *p, Lexer *lx) +{ + Expr *x; + Token t; + + t = peek(p, 0); + if (t.kind == Alparen && istypename(p, peek(p,1))) { + t = advance(p, lx); + MAKEX(x, cast); + + x->pos.beg = t.pos; + x->cast.to.type = typename(p, lx, &x->cast.to.qual); + if (!x->cast.to.type) { + errorat(lx->pos, "invalid type operand of cast"); + goto Bad; + } + + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after cast expression"); + goto Bad; + } + advance(p, lx); + + x->cast.x = cast(p, lx); + x->pos.beg = lx->pos; + return x; + } + return unary(p, lx); + +Bad: + errorat(lx->pos, "failed to parse cast expression"); + return nil; +} + +/* static data for binary operators */ +#define OPERATORS \ + OPERATOR(Astar, 10, Xmul) \ + OPERATOR(Adiv, 10, Xdiv) \ + OPERATOR(Amod, 10, Xmod) \ + OPERATOR(Aadd, 9, Xadd) \ + OPERATOR(Asub, 9, Xsub) \ + OPERATOR(Alsft, 8, Xlsft) \ + OPERATOR(Arsft, 8, Xrsft) \ + OPERATOR(Agteq, 7, Xgteq) \ + OPERATOR(Alteq, 7, Xlteq) \ + OPERATOR(Alt, 7, Xlt) \ + OPERATOR(Agt, 7, Xgt) \ + OPERATOR(Aeq, 6, Xeql) \ + OPERATOR(Aneq, 6, Xneq) \ + OPERATOR(Aand, 5, Xand) \ + OPERATOR(Axor, 4, Xxor) \ + OPERATOR(Aor, 3, Xor) \ + OPERATOR(Aandand, 2, Xandand) \ + OPERATOR(Aoror, 1, Xoror) + +static int prectab[NUM_TOKENS] = +{ +#define OPERATOR(a, b, c) [a] = b, + OPERATORS +#undef OPERATOR +}; + +static int optab[NUM_TOKENS] = +{ +#define OPERATOR(a, b, c) [a] = c, + OPERATORS +#undef OPERATOR +}; +#undef OPERATORS + +static +Expr* +binary(Parser *p, Lexer *lx, int prec) +{ + Token t; + int k, np; + Expr *l, *x; + + l = cast(p, lx); + for (;;) { + t = peek(p, 0); + k = t.kind; + np = prectab[k]; + if (np < prec) + return l; + + alloc(x); + t = advance(p, lx); + + x->pos.beg = l->pos.beg; + x->kind = optab[k]; + x->binary.l = l; + x->binary.r = binary(p, lx, np + 1); + x->pos.end = x->binary.r->pos.end; + + l = x; + } + return l; +Bad: + errorat(t.pos, "failed to parse expression"); + return nil; +} + +static +Expr* +ternary(Parser *p, Lexer *lx) +{ + Pos b; + Token t; + Expr *x, *y; + + x = binary(p, lx, 1); + t = peek(p, 0); + b = t.pos; + + switch (t.kind) { + case Aqmark: + t = advance(p, lx); + y = x; + MAKEX(x, ternary); + x->pos.beg = b; + x->kind = Xternary; + x->cond.c = y; + x->cond.t = expr(p, lx); + + t = peek(p, 0); + if (nomatch(t, Acolon)) { + errorat(t.pos, "ternary expression missing ':'"); + goto Bad; + } + t = advance(p, lx); + x->cond.e = expr(p, lx); + x->pos.end = lx->pos; + break; + + case Aasn: MAKEX(y, asn); goto Assign; + case Aorasn: MAKEX(y, orasn); goto Assign; + case Axorasn: MAKEX(y, xorasn); goto Assign; + case Aandasn: MAKEX(y, andasn); goto Assign; + case Asubasn: MAKEX(y, subasn); goto Assign; + case Amulasn: MAKEX(y, mulasn); goto Assign; + case Adivasn: MAKEX(y, divasn); goto Assign; + case Amodasn: MAKEX(y, modasn); goto Assign; + case Alsftasn: MAKEX(y, lsftasn); goto Assign; + case Arsftasn: MAKEX(y, rsftasn); goto Assign; + Assign: + advance(p, lx); + + y->asn.l = x; + y->asn.r = ternary(p, lx); + x = y; + x->pos.beg = b; + x->pos.end = lx->pos; + break; + default: + ; + } + + return x; +Bad: + errorat(lx->pos, "failing expression parse"); + return nil; +} + +static +Expr* +expr(Parser *p, Lexer *lx) +{ + Pos b; + Token t; + Expr *x, *y; + + x = ternary(p, lx); + while (t = peek(p, 0), t.kind == Acomma) { + advance(p, lx); + y = x; + MAKEX(x, comma); + x->pos.beg = y->pos.beg; + x->comma.x[0] = y; + x->comma.x[1] = ternary(p, lx); + x->pos.end = lx->pos; + y = nil; + } + + return x; +} + +// ----------------------------------------------------------------------- +// statements + +static +struct Node* +stmt(Parser *p, Lexer *lx) +{ + int k; + Stmt *s; + Sym *sym; + Token t; + + t = peek(p, 0); + k = t.kind; + + /* intercept decl before allocating a statement */ + if (k == Aident) { + if (peek(p, 1).kind == Acolon) + goto Tlabel; + sym = lookupobj(p, t.val.s); + if (!sym) { + errorat(lx->pos, "unrecognized type identifier '%s'", t.val.s); + goto Bad; + } + + if (sym->kind == Stype) + goto Tdecl; + if (sym->kind == Svar) { + alloc(s); + s->pos.beg = lx->pos; + goto Texpr; + } + + errorat(lx->pos, "bad symbol type used as type identifier"); + goto Bad; + } + + if (k == Akeywd) { + if ((Kauto <= t.val.i && t.val.i <= Ktypedef) || (Kconst <= t.val.i && t.val.i <= Kenum)) { + Tdecl: + return (Node *)decl(p, lx); + } + } + + alloc(s); + s->pos.beg = lx->pos; + + switch (k) { + case Akeywd: + switch (k = t.val.i) { + case Kif: + t = advance(p, lx); + s->kind = Sif; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before if conditional"); + goto Bad; + } + s->br.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after if conditional"); + goto Bad; + } + s->br.body = stmt(p, lx); + + t = peek(p, 0); + if (iskw(t, Kelse)) + s->br.orelse = stmt(p, lx); + else + s->br.orelse = nil; + + break; + + case Kswitch: + t = advance(p, lx); + s->kind = Sswitch; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before switch conditional"); + goto Bad; + } + s->br.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after switch conditional"); + goto Bad; + } + s->br.body = stmt(p, lx); + s->br.orelse = nil; + + break; + + case Kfor: + t = advance(p, lx); + s->kind = Sfor; + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before for loop preamble"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.init = nil; + else { + // TODO: test for declaration + s->loop.init = (Node *)expr(p, lx); + } + + if (nomatch(t, Asemi)) { + errorat(lx->pos, "missing semicolon"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.cond = nil; + else + s->loop.cond = expr(p, lx); + + if (nomatch(t, Asemi)) { + errorat(lx->pos, "missing semicolon"); + goto Bad; + } + + if (t.kind == Asemi) + s->loop.step = nil; + else + s->loop.step = expr(p, lx); + + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing closing paren after for loop preamble"); + goto Bad; + } + s->loop.body = stmt(p, lx); + break; + + case Kwhile: + t = advance(p, lx); + s->kind = Swhile; + if (nomatch(t, Alparen)) { + errorat(lx->pos, "missing opening paren before while loop conditional"); + goto Bad; + } + s->loop.cond = expr(p, lx); + if (nomatch(t, Arparen)) { + errorat(t.pos, "missing closing paren after while loop conditional"); + goto Bad; + } + + s->loop.init = nil; + s->loop.step = nil; + s->loop.body = stmt(p, lx); + break; + + case Kdo: + t = advance(p, lx); + s->kind = Sdo; + s->loop.body = stmt(p, lx); + + if (!iskw(t, Kwhile)) { + errorat(t.pos, "missing while statement conditional after do body"); + goto Bad; + } + t = advance(p, lx); + if (nomatch(t, Alparen)) { + errorat(t.pos, "missing open paren after while conditional"); + goto Bad; + } + + s->loop.init = nil; + s->loop.step = nil; + s->loop.cond = expr(p, lx); + break; + + case Kgoto: + t = advance(p, lx); + s->kind = Sgoto; + if (t.kind != Aident) { + errorat(t.pos, "invalid argument to goto"); + goto Bad; + } + s->jmp.lbl = t.val.s; + t = advance(p, lx); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after goto"); + goto Bad; + } + advance(p, lx); + break; + + case Kcontinue: + t = advance(p, lx); + s->kind = Scontin; + s->jmp.lbl = nil; + s->jmp.x = nil; + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after continue"); + goto Bad; + } + advance(p, lx); + break; + + case Kbreak: + t = advance(p, lx); + s->kind = Sbreak; + s->jmp.lbl = nil; + s->jmp.x = nil; + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after break"); + goto Bad; + } + advance(p, lx); + break; + + case Kreturn: + t = advance(p, lx); + s->kind = Sreturn; + + s->jmp.lbl = nil; + s->jmp.x = (t.kind == Asemi) ? nil : expr(p, lx); + + t = peek(p, 0); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after return statement"); + goto Bad; + } + advance(p, lx); + break; + + case Kcase: + t = advance(p, lx); + s->kind = Scase; + s->lbl.x = expr(p, lx); + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after default label"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + case Kdefault: + t = advance(p, lx); + s->kind = Scase; + s->lbl.x = nil; + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after default label"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + default: + panicf("unexpected statement keyword %s", keywords[k]); + } + break; + case Albrace: + s->kind = Sblock; + openscope(p); + if (blkstmt(p, lx, &s)) { + errorat(lx->pos, "failed to parse block statement"); + goto Bad; + } + closescope(p); + break; + + case Asemi: + t = advance(p, lx); + s->kind = Sempty; + break; + + case Aident: + Tlabel: + t = advance(p, lx); + s->kind = Slabel; + if (nomatch(t, Acolon)) { + errorat(t.pos, "missing colon after labelled block"); + goto Bad; + } + t = advance(p, lx); + s->lbl.stmt = stmt(p, lx); + break; + + default: + Texpr: + s->kind = Sexpr; + s->x = expr(p, lx); + + t = peek(p, 0); + if (nomatch(t, Asemi)) { + errorat(t.pos, "missing semicolon after statement expression"); + goto Bad; + } + advance(p, lx); + } + + s->pos.end = lx->pos; + return (Node *)s; +Bad: + errorat(lx->pos, "failed to parse statement"); + return nil; +} + +static +error +blkstmt(Parser *p, Lexer *lx, Stmt **s) +{ + Token t; + int len; + int cap; + Node **ns; + + alloc(*s); + (*s)->kind = Sblock; + (*s)->pos.beg = lx->pos; + + t = peek(p, 0); + if (nomatch(t, Albrace)) + goto Bad; + t = advance(p, lx); + + len = 0, cap = 20; + ns = malloc(cap*sizeof(*ns)); + while (t.kind != Arbrace) { + if (cap == len) { + cap += 20; + ns = realloc(ns, cap*sizeof(*ns)); + } + ns[len++] = stmt(p, lx); + t = peek(p, 0); + } + advance(p, lx); + + (*s)->pos.end = lx->pos; + (*s)->blk.n = len; + movearray((*s)->blk.item, ns, len); + return 0; +Bad: + errorat(lx->pos, "failed to parse block statement"); + free(ns); + return 1; +} + +// ----------------------------------------------------------------------- +// types + +uint32 +ptrtype(uint32 base, uint32 qual) +{ + uint32 i; + Type *t; + + i = type(); + t = C.type.info + i; + t->kind = Tptr; + t->ptr.base = base; + t->ptr.qual = qual; + t->size = pointer.size; + t->align = pointer.align; + t->sign = pointer.sign; + + return i; +} + +uint32 +arraytype(uint32 base, uint32 qual, Expr *ix) +{ + int i, n; + Type *t; + + /* TODO: evaluate the length */ + n = 10; + i = type(); + t = C.type.info + i; + t->kind = Tarray; + t->ptr.base = base; + t->size = n * C.type.info[base].size; + t->align = C.type.info[base].align; + t->sign = 0; + + return i; +} + +uint32 +functype(uint32 ret, int n, Field *args, int dots) +{ + uint32 i; + Type *t; + + i = type(); + t = C.type.info + i; + t->kind = Tfunc; + t->size = pointer.size; + t->align = pointer.align; + t->sign = pointer.sign; + + t->func.ret = ret; + t->func.n = n; + t->func.arg = args; + t->func.dots = dots; + + return i; +} + +#define ALIGN_DOWN(n, a) ((n) & ~((a)-1)) +#define ALIGN_UP(n, a) ALIGN_DOWN((n) + (a)-1, (a)) +uint32 +structtype(int n, Field *field, Expr *bits) +{ + uint32 i; + Type *t; + Field *f, *e; + + i = type(); + t = C.type.info + i; + t->kind = Tstruct; + t->size = 0; + t->align = 0; + for (f = field, e = field+n; f != e; ++f) { + t->size += C.type.info[f->type].size + ALIGN_UP(t->size, C.type.info[f->type].align); + t->align = MAX(t->align, C.type.info[f->type].align); + } + t->aggr.len = n; + t->aggr.f = field; + t->aggr.x = bits; + + return i; +} + +uint32 +uniontype(int n, Field *field, Expr *bits) +{ + uint32 i; + Type *t; + Field *f, *e; + + i = type(); + t = C.type.info + i; + t->kind = Tstruct; + t->size = 0; + t->align = 0; + for (f = field, e = field+n; f != e; ++f) { + t->size = MAX(t->size, C.type.info[f->type].size); + t->align = MAX(t->align, C.type.info[f->type].align); + } + t->aggr.len = n; + t->aggr.f = field; + t->aggr.x = bits; + + return i; +} + +uint32 +enumtype(int n, string *elts, Expr *vals) +{ + uint32 i; + Type *t; + Field *f, *e; + + i = type(); + t = C.type.info + i; + t->kind = Tenum; + /* TODO: dont hardcode int32 */ + t->size = 4; + t->align = 4; + t->enm.len = n; + t->enm.elt = elts; + t->enm.val = vals; + + return i; +} +#undef ALIGN_UP +#undef ALIGN_DOWN + +/* unpacking C declarations into sensible types */ +static +uint32 +typeofname(Name *name, uint32 base) +{ + switch (name->kind) { + /* Nnil corresponds to an abstract declarator (i.e. no identifier) */ + case Nnil: + case Nident: + return base; + case Nparen: + return typeofdtor(name->paren, base); + case Nindex: + return typeofname(name->sfx.name, arraytype(base, name->sfx.idx.q, name->sfx.idx.x)); + case Ncall: + return typeofname(name->sfx.name, functype(base, name->sfx.call.n, name->sfx.call.arg, name->sfx.call.dots)); + default: + panicf("unreachable"); + } + return 0; +} + +static +uint32 +typeofdtor(Dtor *decl, uint32 base) +{ + int n; + Ptr *p; + uint64 b, tmp; + + n = 0; + p = &decl->ptr; + b = p->kind; + while (b & 1) { + base = ptrtype(base, b >> 1); + if (++n >= 8) { + p = p->link; + b = p->kind; + } else { + b >>= 6; + } + } + + return typeofname(decl->name, base); +} + +static +uint32 +basetype(Parser *p, Lexer *lx, uint64 *s) +{ + int n; + uint64 m; + + if (spec(p, lx, s)) { + errorat(lx->pos, "failed to parse type specifier"); + return 0; + } + + m = (((*s<<32)>>32) & ~(MaskQul|MaskMem|MaskFcn)); + for (n = 0; n < arrlen(validtypespec); n++) { + if (validtypespec[n] == m) { + if (indextypespec[n] < 0) { + m = *s >> 32; + if (!m) { + errorat(lx->pos, "not a valid type identifier"); + return 0; + } + return m; + } + return indextypespec[n]; + } + } + + errorat(lx->pos, "invalid type specifier"); + return 0; +} + +static +string +namedecl(Parser *p, Lexer *lx, uint32 *base, int noname) +{ + Dtor *dt; + string name; + Type *t; + + dt = getdtor(p); + name = nil; + if (dtor(p, lx, dt, noname)) { + errorat(lx->pos, "invalid declarator"); + goto End; + } + if (!noname || noname == 2 && dt->name->kind) + name = nameof(dt->name); + + *base = typeofdtor(dt, *base); + putdtor(p, dt); + return name; +End: + putdtor(p, dt); + return nil; +} + +// ----------------------------------------------------------------------- +// declarations + +static +uint32 +enumerate(Parser *p, Lexer *lx, string name, int kind) +{ + int i, n; + uint64 s; + uint32 t; + Token tk; + /* TODO: think of a better soln */ + string nm[1024], *elts; + Expr *cx[1024], *vals; + + for (n = 0; tk.kind != Arbrace && n < arrlen(nm); n++) { + if (tk.kind != Aident) { + errorat(tk.pos, "invalid token %s in enum declaration", tokens[tk.kind]); + goto Bad; + } + nm[n] = tk.val.s; + cx[n] = nil; + + tk = advance(p, lx); + switch(tk.kind) { + case Aeq: + advance(p, lx); + cx[n] = expr(p, lx); + tk = peek(p, 0); + if (tk.kind != Acomma) + continue; + /* fallthrough */ + case Acomma: + tk = advance(p, lx); + } + } + copyarray(elts, nm, n); + copyarray(vals, cx, n); + + t = enumtype(n, elts, vals); + declareenum(p, n, elts, vals); + return t; +Bad: + errorat(tk.pos, "failed to parse enum declaration"); + return 0; +} + +static +uint32 +aggregate(Parser *p, Lexer *lx, string name, int kind) +{ + int n; + uint64 s; + Token tk; + /* TODO: think of a better soln */ + static Field fs[1024]; + Field *f; + static Expr *cx[1024]; + Expr *x; + + for (n = 0, tk = peek(p, 0); tk.kind != Arbrace && n < arrlen(fs); n++) { + fs[n].type = basetype(p, lx, &s); + fs[n].qual = (uint32)(s & ~(MaskTyp|MaskInt|MaskFlt)); + Field: + fs[n].name = namedecl(p, lx, &fs[n].type, 0); + tk = peek(p, 0); + switch (tk.kind) { + case Acolon: + advance(p, lx); + cx[n] = expr(p, lx); + tk = peek(p, 0); + if (tk.kind == Asemi) { + tk = advance(p, lx); + continue; + } + if (tk.kind != Acomma) { + errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]); + goto Bad; + } + /* fallthrough */ + case Acomma: + advance(p, lx); + n++; + goto Field; + + case Asemi: + tk = advance(p, lx); + continue; + + default: + errorat(tk.pos, "unrecognized token %s in struct field declaration", tokens[tk.kind]); + goto Bad; + } + } + copyarray(f, fs, n); + copyarray(x, cx, n); + return (kind == Tstruct) ? structtype(n, f, x) : uniontype(n, f, x); +Bad: + errorat(tk.pos, "failed to parse aggregate declaration"); + return 0; +} + +static +error +spec(Parser *p, Lexer *lx, uint64 *spec) +{ + Token t; + int n, i; + Sym *typ; + string name; + uint32 tag; + uint64 s, sm; + static uint32 (*aggrfunc[2])(Parser *, Lexer *, string , int) = {aggregate, enumerate}; + + s = 0; + while (t = peek(p, 0), t.kind >= Aident) { + /* typename */ + if (t.kind == Aident) { + typ = lookupobj(p, t.val.s); + if (!typ || (typ && typ->kind != Stype)) + break; + + sm = typ->type; + s |= (sm << 32 | Tname); + advance(p, lx); + continue; + } + + /* keyword */ + switch (n = t.val.i) { + case Kauto: case Kregister: case Kstatic: case Kextern: case Ktypedef: case Ktls: + if (s & MaskMem) { + errorat(lx->pos, "multiple storage class specifiers: second was %s", keywords[n]); + goto Bad; + } + break; + + case Kinline: case Knoret: + if (s & Bit(n)) + warnat(lx->pos, "duplicate %s function specifier", keywords[n]); + break; + + case Kconst: case Kvolatile: + if (s & Bit(n)) + warnat(lx->pos, "duplicate %s specifier found in declaration", keywords[n]); + break; + + case Ksigned: case Kunsigned: + if (s & MaskSgn) { + if (s & Bit(n)) { + warnat(lx->pos, "duplicated storage class specifier: second was %s", keywords[n]); + break; + } + errorat(lx->pos, "multiple storage class specifiers"); + goto Bad; + } + break; + + case Kshort: + if (s & Tshort) { + warnat(lx->pos, "duplicated short specifier"); + break; + } + break; + + case Klong: + if ((s >> Klong) & 2) { + errorat(lx->pos, "cannot chain three or more long specifiers"); + goto Bad; + } + s += Bit(n); + t = advance(p, lx); + continue; + + case Kvoid: case Kchar: case Kint: case Kfloat: case Kdouble: + if (s & MaskTyp) { + errorat(lx->pos, "more than one base type specified"); + goto Bad; + } + break; + + case Kstruct: case Kunion: + i = 0; + goto Aggr; + case Kenum: + i = 1; + Aggr: + if (s & (Tstruct | Tunion | Tenum)) { + errorat(lx->pos, "more than one aggregate/enum type specified"); + goto Bad; + } + t = advance(p, lx); + if (t.kind != Aident && t.kind != Albrace) { + errorat(t.pos, "enum specifier missing valid declaration"); + goto Bad; + } + + /* NOTE: This offset is needed to correctly obtain Tstruct */ + n++; + name = nil; + tag = 0; + if (t.kind == Aident) { + name = t.val.s; + t = advance(p, lx); + } + if (t.kind == Albrace) { + /* TODO: we need check if the name exists. */ + t = advance(p, lx); + /* NOTE: This depends on the enum order. KEEP IN SYNC */ + tag = aggrfunc[i](p, lx, name, Bit(n)); + if (t = peek(p, 0), nomatch(t, Arbrace)) { + errorat(t.pos, "invalid token %s in aggregate/enum declaration", tokens[t.kind]); + goto Bad; + } + /* high bits encode the type index */ + s |= (uint64)tag << 32; + } + /* TODO: if name does not exist, enter in an incomplete type! */ + if (name) + declaretag(p, tag, name); + + break; + + default: + errorat(t.pos, "invalid keyword '%s' found in declaration specifier", keywords[n]); + } + + s |= Bit(n); + advance(p, lx); + } + + *spec = s; + return 0; + +Bad: + /* TODO: serialize bitflags to string for nice error message */ + errorat(lx->pos, "ignoring specifier"); + *spec = Sbad; + return 1; +} + +/* + * name declaration + * see dtor for valid values of ab + */ +static +error +name(Parser *p, Lexer *lx, Name **nmp, int ab) +{ + Token t; + int n, k; + uint64 s; + Sym *sym; + Name *nm, *tmp; + + /* max args = 100 */ + struct Field args[100]; + + nm = *nmp; + t = peek(p, 0); + switch (k = t.kind) { + case Aident: + if (ab == 1) { + errorat(t.pos, "identifier not allowed in abstract declarator"); + goto Bad; + } + nm->kind = Nident; + nm->ident = t.val.s; + break; + + case Alparen: + advance(p, lx); + nm->kind = Nparen; + nm->paren = getdtor(p); + if (dtor(p, lx, nm->paren, ab)) { + putdtor(p, nm->paren); + nm->paren = nil; + errorat(lx->pos, "invalid declarator in parenthesis"); + goto Bad; + } + + t = peek(p, 0); + if (nomatch(t, Arparen)) { + putdtor(p, nm->paren); + nm->paren = nil; + errorat(lx->pos, "missing closing paren in declarator"); + goto Bad; + } + break; + + case Albrakt: + if (ab) + goto Sfx; + errorat(lx->pos, "missing identifier in non-abstract declarator"); + /* fallthrough */ + default: + if (ab) + goto Sfx; + errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]); + goto Bad; + } + + t = advance(p, lx); +Sfx: + for (;;) { + switch (k = t.kind) { + case Albrakt: + tmp = getname(p); + tmp->kind = Nindex; + tmp->sfx.name = nm; + + nm = tmp, tmp = nil; + + t = advance(p, lx); + if (t.kind == Arbrakt) { + nm->sfx.idx.q = 0; + Iend: + nm->sfx.idx.x = nil; + t = advance(p, lx); + break; + } + if (t.kind == Astar) { + nm->sfx.idx.q = -1; + IStar: + nm->sfx.idx.x = nil; + t = advance(p, lx); + if (t.kind != Arbrakt) { + errorat(t.pos, "invalid '*' syntax in index expression"); + goto Bad; + } + t = advance(p, lx); + break; + } + + if (spec(p, lx, &s)) { + errorat(lx->pos, "invalid type qualifier list in index expression"); + goto Bad; + } + + nm->sfx.idx.q = (uint32)s; + t = peek(p, 0); + + if (t.kind == Astar) + goto IStar; + + if (t.kind == Arbrakt) + goto Iend; + + nm->sfx.idx.x = expr(p, lx); + + t = peek(p, 0); + if (nomatch(t, Arbrakt)) { + errorat(t.pos, "unterminated index expression"); + goto Bad; + } + + t = advance(p, lx); + continue; + + case Alparen: + tmp = getname(p); + tmp->kind = Ncall; + tmp->sfx.name = nm; + + nm = tmp, tmp = nil; + + t = advance(p, lx); + nm->sfx.call.n = 0; + switch (t.kind) { + case Arparen: + nm->sfx.call.arg = nil; + break; + + case Aident: + sym = lookupobj(p, t.val.s); + if (!sym || (sym && sym->kind != Stype)) { + while (t.kind == Aident) { + if (nm->sfx.call.n >= arrlen(args)) + panicf("ident stack overflow"); + args[nm->sfx.call.n++] = (struct Field) { + .qual = 0, + .type = 0, + .name = t.val.s, + }; + t = advance(p, lx); + } + if (nomatch(t, Arparen)) { + errorat(t.pos, "token '%s' found in function parameter identifier list"); + goto Bad; + } + copyarray(nm->sfx.call.arg, args, nm->sfx.call.n); + break; + } + goto ParamLoop; + + case Akeywd: + if (t.val.i < Kconst || t.val.i > Kenum) { + errorat(t.pos, "invalid keyword %s inside function signature"); + goto Bad; + } + + ParamLoop: + if (nm->sfx.call.n >= arrlen(args)-1) + panicf("out of argument buffer"); + + args[nm->sfx.call.n].type = basetype(p, lx, &s); + if (!args[nm->sfx.call.n].type) { + errorat(lx->pos, "could not parse base type in function call"); + goto Bad; + } + + args[nm->sfx.call.n].qual = (uint32)s & ~(MaskTyp|MaskInt|MaskFlt); + args[nm->sfx.call.n].name = namedecl(p, lx, &args[nm->sfx.call.n].type, 2); + + nm->sfx.call.n++; + if ((t = peek(p, 0)).kind == Acomma) { + advance(p, lx); + goto ParamLoop; + } + + if (t.kind == Aellip) { + nm->sfx.call.dots = 1; + t = advance(p, lx); + } + + if (nomatch(t, Arparen)) { + errorat(t.pos, "token '%s' found in function parameter list"); + goto Bad; + } + copyarray(nm->sfx.call.arg, args, nm->sfx.call.n); + break; + + default: + errorat(t.pos, "invalid token %s inside function call signature", tokens[t.kind]); + goto Bad; + } + + t = advance(p, lx); + continue; + + default: + break; + } + break; + } + + *nmp = nm; + return 0; +Bad: + return 1; +} + +/* pointer kind is partitioned into 8x6 regions + * ab => abstract + * @ 0: must have identifier + * @ 1: must not have identifier + * @ 2: don't care + * else: undefined + */ +static +error +dtor(Parser *p, Lexer *lx, Dtor *d, int ab) +{ + int n, k; + error err; + Token t; + Dtor *link; + Ptr *ptr, *x; + + err = 1; + + ptr = &d->ptr; + ptr->kind = 0; + ptr->link = nil; + + t = peek(p, 0); + if (t.kind != Astar) { + if (ab || t.kind == Aident || t.kind == Arparen) + goto Name; + goto Bad; + } + n = 0; +Ptr: + ptr->kind |= Bit(n); + advance(p, lx); +Key: + t = peek(p, 0); + switch (k = t.kind) { + case Akeywd: + if (Kconst <= t.val.i && t.val.i <= Katomic) + ptr->kind |= Bit(6*n + (t.val.i - Kconst + 1)); + else { + errorat(lx->pos, "invalid keyword '%s' modifies pointer", keywords[t.val.i]); + goto Bad; + } + advance(p, lx); + goto Key; + + case Astar: + if (++n >= 8) { + x = getptr(p); + x->kind = 0; + x->link = nil; + ptr->link = x; + ptr = x; + n = 0; + } + goto Ptr; + + case Aident: + case Alparen: + goto Name; + + default: + if (ab) + goto Name; + errorat(lx->pos, "invalid token '%s' modifies pointer specification", tokens[t.kind]); + goto Bad; + } +Name: + return name(p, lx, &d->name, ab); +Bad: + return err; +} + +static +Decl * +decl(Parser *p, Lexer *lx) +{ + uint64 s; + Token t; + Decl *d; + Expr *x; + string name; + struct Decls *ds; + uint32 base, type; + + alloc(d); + + d->kind = 0; + d->pos.beg = lx->pos; + + base = basetype(p, lx, &s); + if (!base) { + errorat(lx->pos, "could not parse type declaration"); + goto Bad; + } + + x = nil; + d->spec = (uint32)s & ~(MaskInt|MaskFlt|MaskTyp); + d->type = base; + d->name = namedecl(p, lx, &d->type, 0); + /* TODO: think about functions (both decls and defs) */ + d->kind = (s & Mtype) ? Dtype : Dvar; + + switch (t = peek(p, 0), t.kind) { + case Aeq: + if (s & Mtype) { + errorat(d->pos.beg, "initialization of type not allowed"); + goto Bad; + } + t = advance(p, lx); + x = initx(p, lx); + d->kind = Dvar; + if (t.kind != Acomma) { + d->init = x; + goto Semi; + } + /* fallthrough */ + case Acomma: + d->kind |= Dlist; + d->list.init = x; + /* move singleton data over */ + name = d->name; + type = d->type; + d->list.name = name; + d->list.type = type; + ds = &d->list; + /* iterate until we hit end of list */ + while (t.kind == Acomma) { + t = advance(p, lx); + + alloc(ds->link); + ds = ds->link; + ds->type = base; + ds->name = namedecl(p, lx, &ds->type, 0); + + t = peek(p, 0); + if (t.kind == Aeq) { + t = advance(p, lx); + ds->init = initx(p, lx); + } else + ds->init = nil; + } + goto Semi; + + case Albrace: + d->kind = Dfunc; + alloc(d->body); + + if (!attop(p)) { + errorat(lx->pos, "nested function declarations are illegal"); + goto Bad; + } + + if (C.type.info[d->type].kind != Tfunc) { + errorat(lx->pos, "attempted to define function body for non function type"); + goto Bad; + } + + openscope(p); + if (blkstmt(p, lx, &d->body)) { + errorat(lx->pos, "failed to parse function body"); + goto Bad; + } + closescope(p); + break; + + default: + Semi: + if (nomatch(t, Asemi)) { + errorat(t.pos, "no semicolon after declaration"); + goto Bad; + } + t = advance(p, lx); + } + + d->pos.end = lx->pos; + declareobj(p, d); + return d; +Bad: + errorat(lx->pos, "failed to parse top level declaration"); + return nil; +} + +// ----------------------------------------------------------------------- +// top level api + +void +setup(Parser *p, Lexer *lx) +{ + advance(p,lx); + advance(p,lx); + + /* define all builtin typedefs */ + declareobj(p, &C.builtin.vargs); +} + +error +parse(Parser *p, Lexer *lx) +{ + Token tok; + + setup(p, lx); + while ((tok = peek(p, 0)), tok.kind > Aeof) { + if (p->ast.len >= p->ast.cap) { + p->ast.cap += 20; + p->ast.decls = realloc(p->ast.decls, p->ast.cap*sizeof(*p->ast.decls)); + } + p->ast.decls[p->ast.len++] = decl(p, lx); + } + + return 0; +} diff --git a/src/cmd/cc/bits.c b/src/cmd/cc/bits.c new file mode 100644 index 0000000..4b405dc --- /dev/null +++ b/src/cmd/cc/bits.c @@ -0,0 +1,114 @@ +#include "cc.h" + +// ----------------------------------------------------------------------- +// Architecture + +enum +{ + archx64, + numarch, +}; + +// ----------------------------------------------------------------------- +// Types + +/* + * enumerated type specifers + * see https://en.wikipedia.org/wiki/C_data_types + */ +#define VOID X(Tvoid, 2) + +#define BOOL X(Tbool, 3) +#define CHAR X(Tchar, 4) +#define SCHAR X(Tsign|Tchar, 5) +#define UCHAR X(Tunsign|Tchar, 6) + +#define SHORT X(Tshort, 7), X(Tshort|Tint, 7) +#define SSHORT X(Tsign|Tshort, 8), X(Tsign|Tshort|Tint, 8) +#define USHORT X(Tunsign|Tshort, 9), X(Tunsign|Tshort|Tint, 9) + +#define INT X(0, 10), X(Tint, 10) +#define SINT X(Tsign, 11), X(Tsign|Tint, 11) +#define UINT X(Tunsign, 12), X(Tunsign|Tint, 12) + +#define LONG X(Tlong, 13), X(Tlong|Tint, 13) +#define SLONG X(Tsign|Tlong, 14), X(Tsign|Tlong|Tint, 14) +#define ULONG X(Tunsign|Tlong, 15), X(Tunsign|Tlong|Tint, 15) + +#define VLONG X(Tvlong, 16), X(Tvlong|Tint, 16) +#define SVLONG X(Tsign|Tvlong, 17), X(Tsign|Tvlong|Tint, 17) +#define UVLONG X(Tunsign|Tvlong, 18), X(Tunsign|Tvlong|Tint, 18) + +#define FLOAT X(Tfloat, 19) +#define DOUBLE X(Tdouble, 20) +#define LONGDB X(Tlong|Tdouble, 21) +#define COMPLEX X(Tcmplx, 22) +#define IMAGINARY X(Timag, 23) + +/* fixed width definitions */ +#define DEF(sz, aln, mx, sgn) {.size=sz, .align=aln, .max=mx, .sign=sgn } + +#define INT8 DEF(1, 1, 0x7fff, 0) +#define UINT8 DEF(1, 1, 0xffff, 1) + +#define INT16 DEF(2, 2, 0x7fff, 0) +#define UINT16 DEF(2, 2, 0xffff, 1) + +#define INT32 DEF(4, 4, 0x7fffffff, 0) +#define UINT32 DEF(4, 4, 0xffffffff, 1) + +#define INT64 DEF(8, 8, 0x7fffffffffffffff, 0) +#define UINT64 DEF(8, 8, 0xffffffffffffffff, 1) + +/* architecture specific definitions */ +// TODO: max value should be able to take floats +#define TYPES \ + TYPE(DEF(0, 0, 0, 0), VOID) \ + TYPE(INT8, BOOL) \ + TYPE(UINT8, CHAR) \ + TYPE(INT8, SCHAR) \ + TYPE(UINT8, UCHAR) \ + TYPE(INT16, SHORT) \ + TYPE(INT16, SSHORT) \ + TYPE(UINT16, USHORT) \ + TYPE(INT32, INT) \ + TYPE(INT32, SINT) \ + TYPE(UINT32, UINT) \ + TYPE(INT64, LONG) \ + TYPE(INT64, SLONG) \ + TYPE(UINT64, ULONG) \ + TYPE(INT64, VLONG) \ + TYPE(INT64, SVLONG) \ + TYPE(UINT64, UVLONG) \ + TYPE(DEF(4, 4, 0, 0), FLOAT) \ + TYPE(DEF(8, 8, 0, 0), DOUBLE) \ + TYPE(DEF(16, 16, 0, 0), LONGDB) \ + TYPE(DEF(8, 8, 0, 0), COMPLEX) \ + TYPE(DEF(4, 4, 0, 0), IMAGINARY) \ + +Type pointer = {.size=8, .align=8, .max=0xffffffffffffffff, .sign=0}; + +/* pack architecture specific definitions into exported arrays */ +#define TYPE(a, ...) a, +Type basetypes[] = { + { 0 }, /* sentinel value for bad types */ + { 0 }, /* sentinel value for variadic args */ + TYPES +}; +#undef TYPE + +#define TYPE(a, ...) __VA_ARGS__, +#define X(a, b) a +uint64 validtypespec[38] = { + TYPES + Tstruct, Tunion, Tenum, Tname, +}; +#undef X + +#define X(a, b) b +int indextypespec[38] = { + TYPES + -1, -1, -1, -1, +}; +#undef X +#undef TYPE diff --git a/src/cmd/cc/cc.c b/src/cmd/cc/cc.c new file mode 100644 index 0000000..8ad0022 --- /dev/null +++ b/src/cmd/cc/cc.c @@ -0,0 +1,409 @@ +#include "cc.h" +#include + +// ----------------------------------------------------------------------- +// string interning + +/* jenkins' one at a time hash */ +static +int32 +hash_string(byte* s) +{ + int32 h; + + h = 0; + if (s != nil) { + for (; *s; ++s) { + h += *s; + h = (h << 10); + h = (h >> 6); + } + } + + h += (h << 3); + h ^= (h >> 11); + h += (h >> 11); + + return h; +} + +static +int +streq(byte *s, byte *t) +{ + if (s == nil) { + if (t == nil) + return 1; + else + return 0; + } + + return (t == nil) ? 0 : strcmp(s, t) == 0; +} + +#define HASH(s) hash_string(s) +#define EQUAL(s, t) (streq(s, t)) +static +int +getstr(string key, int *ok) +{ + int idx; + MAP_GET(idx, (&C.strs), key, HASH, EQUAL); + + *ok = idx < C.strs.n_buckets; + return idx; +} + +static +void +·free(void* _, void* ptr) { + return free(ptr); +} + +static +void * +·alloc(void* _, uint n, ulong size) { + return malloc(n*size); +} + +static +void * +·calloc(void* _, uint n, ulong size) { + return calloc(n, size); +} + +static +int +morestrtab(StrTab *tab, int n) +{ + MAP_GROW(tab, string, int32, n, HASH, ·calloc, ·free, nil); +} + +static +int +putstr(byte *s, error *err) +{ + int sz; + sz = C.strs.size; + MAP_PUT((&C.strs), s, sz, HASH, EQUAL, morestrtab, err); +} +#undef HASH +#undef EQUAL + +int32 +intern(byte **s) +{ + int i, ok; + + i = getstr(*s, &ok); + if (ok) { + *s = C.strs.keys[i]; + goto END; + } + + *s = str·make(*s); + i = putstr(*s, &ok); + C.strs.vals[i] = C.strs.size - 1; + +END: + return C.strs.vals[i]; +} + +// ----------------------------------------------------------------------- +// type interning + +/* TODO: intern types for memory savings */ +int +type() +{ + if (C.type.len >= C.type.cap) { + C.type.cap += 100; + C.type.info = realloc(C.type.info, C.type.cap * sizeof(*C.type.info)); + } + + return C.type.len++; +} + +// ----------------------------------------------------------------------- +// universal compiler builtins + +#define KEYWORD(a, b) b, +byte *keywords[NUM_KEYWORDS] = { KEYWORDS }; +#undef KEYWORD + +#define DIRECTIVE(a, b, c) b, +byte *directives[NUM_DIRECTIVES] = { DIRECTIVES }; +#undef DIRECTIVE + +struct Compiler C = { 0 }; + +// ----------------------------------------------------------------------- +// cli flag handlers + +void +pushinclude(byte *dirs) +{ + string d, s, *it, *end; + + while (*dirs != '\0') { + d = strchr(dirs, ' '); + if (d != nil) + *d = '\0'; + + s = dirs; + intern(&s); + for (it = C.inc.dir, end = it + C.inc.len; it != end; ++it) { + if ((uintptr)s == (uintptr)(*it)) + goto Nextdir; + } + + if (C.inc.len == C.inc.cap) { + C.inc.cap += 20; + C.inc.dir = realloc(C.inc.dir, C.inc.cap*sizeof(*C.inc.dir)); + } + C.inc.dir[C.inc.len++] = s; +Nextdir: + if (d == nil) + break; + dirs = d + 1; + } +} + +// ----------------------------------------------------------------------- +// error reporting + +void +errorat(Pos x, byte *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + printf("error:%s:%d:%d: ", os·basename(x.path), x.line, x.col); + vprintf(fmt, args); + printf("\n"); + + va_end(args); + assert(0); +} + +void +warnat(Pos x, byte *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + printf("warning:%s:%d:%d: ", os·basename(x.path), x.line, x.col); + vprintf(fmt, args); + printf("\n"); + + va_end(args); +} + +// ----------------------------------------------------------------------- +// main point of entry + +void +init(void) +{ + int i; + + for (i = 0; i < arrlen(keywords); i++) + intern(&keywords[i]); + + for (i = 0; i < arrlen(directives); i++) + intern(&directives[i]); + + C.heap = mem·makearena(mem·sys, nil); + + /* compiler definitions */ + C.def.len = 0; + C.def.cap = 100; + C.def.val = calloc(C.def.cap, sizeof(*C.def.val)); + + /* compiler include paths */ + C.inc.len = 0; + C.inc.cap = 100; + C.inc.dir = calloc(C.inc.cap, sizeof(*C.inc.dir)); + C.inc.dir[C.inc.len++] = "."; + + C.outfile = nil; + + /* type info */ + C.type.len = arrlen(basetypes); + C.type.cap = 100 + arrlen(basetypes); + C.type.info = calloc(C.type.cap, sizeof(*C.type.info)); + + memcpy(C.type.info, basetypes, C.type.len * sizeof(*C.type.info)); + + /* builtins */ + C.builtin.vargs = (Decl) { + .pos = (Range) { + .beg = { + .col = 0, + .line = 0, + .path = "", + }, + .end = { + .col = 0, + .line = 0, + .path = "", + }, + }, + .kind = Dtype, + .spec = Mtype, + .type = 1, + .name = "__builtin_va_list", + }; + + intern(&C.builtin.vargs.name); +} + +void +initlx(Lexer *lx) +{ + int i; + + memset(lx, 0, sizeof(*lx)); + lx->b = lx->buf; + + /* predefine macros */ + dodefine(lx, "__LINE__"); + dodefine(lx, "__FILE__"); + lx->macline = (uintptr)lookup(&lx->sym, "__LINE__"); + lx->macfile = (uintptr)lookup(&lx->sym, "__FILE__"); + + for (i = 0; i < C.def.len; i++) + dodefine(lx, C.def.val[i]); + + lx->omit.len = 0; + lx->omit.cap = 100; + lx->omit.path = calloc(lx->omit.cap, sizeof(*C.inc.dir)); + + lx->new = lx->iostk; + lx->new->link = nil; + memset(lx->iostk, 0, sizeof(lx->iostk)); + + lx->sym = (SymTab){ 0 }; +} + +void +freelx(Lexer *lx) +{ + free(lx->omit.path); +} + +void +initp(Parser *p) +{ + /* initialize temporary buffers */ + memset(p->spstk, 0, sizeof(p->spstk)); + memset(p->nmstk, 0, sizeof(p->nmstk)); + memset(p->dtstk, 0, sizeof(p->dtstk)); + memset(p->ptstk, 0, sizeof(p->ptstk)); + + p->sp = p->spstk; + p->nm = p->nmstk; + p->dt = p->dtstk; + p->pt = p->ptstk; + + /* initialize ast */ + p->ast.cap = 0; + p->ast.len = 0; + p->ast.decls = nil; +} + +error +compile(byte *path) +{ + Lexer lx; + Parser p; + error err; + byte *sep, out[400]; + + intern(&path); + strcpy(out, path); + + sep = utf8·findrrune(out, '/'); + if (sep) + *sep++ = '\0'; + else + sep = out; + + if (!C.outfile) { + C.outfile = sep; + if (C.outfile) { + if ((sep = utf8·findrrune(C.outfile, '.'))) { + sep[0] = '.'; + sep[1] = 'o'; + sep[2] = '\0'; + } + } else { + C.outfile = "/dev/null"; + } + } + + initlx(&lx); + initp(&p); + + lx.io = openio(&lx, path); + lx.pos = (Pos){ + .path = path, + .line = 1, + .col = 1, + }; + + err = parse(&p, &lx); + freelx(&lx); + return err; +} + +error +main(int argc, byte *argv[]) +{ + byte *a, *src; + int err; + + init(); + + ARGBEGIN { + case 'o': + C.outfile = ARGF(); + break; + + case 'D': + a = ARGF(); + if (a) { + intern(&a); + if (C.def.len >= C.def.cap) { + C.def.cap += 20; + C.def.val = realloc(C.def.val, C.def.cap * sizeof(*C.def.val)); + } + C.def.val[C.def.len++] = a; + } + break; + + case 'I': + a = ARGF(); + if (a) + pushinclude(a); + break; + } ARGEND + + if (argc < 1 && C.outfile == nil) { + printf("usage: cc [-options] files\n"); + exit(1); + } + + // NOTE: This is just for my comfort during debugging. + pushinclude("/home/nolln/root/include"); + pushinclude("/home/nolln/root/include/vendor/libc"); + + src = (argc == 0) ? "" : argv[0]; + intern(&src); + + if ((err = compile(src)), err) { + exit(2); + } + + exit(0); +} diff --git a/src/cmd/cc/cc.h b/src/cmd/cc/cc.h new file mode 100644 index 0000000..8fc5f73 --- /dev/null +++ b/src/cmd/cc/cc.h @@ -0,0 +1,806 @@ +#pragma once + +#include +#include + +#define iota(x) 1 << (x) + +/* core types */ +typedef struct Io Io; +typedef struct Pos Pos; +typedef struct Range Range; +typedef struct Token Token; + +typedef struct Lexer Lexer; + +typedef struct Sym Sym; +typedef struct Type Type; +typedef struct Scope Scope; + +typedef struct Parser Parser; + +typedef struct Ptr Ptr; +typedef struct Name Name; +typedef struct Dtor Dtor; +typedef struct Field Field; + +typedef struct Node Node; +typedef struct Decl Decl; +typedef struct Stmt Stmt; +typedef struct Expr Expr; + +typedef struct SymTab SymTab; +typedef struct StrTab StrTab; + +typedef struct Compiler Compiler; + +/* keywords of language */ +#define KEYWORDS \ + KEYWORD(Kauto,"auto") \ + KEYWORD(Kregister,"register") \ + KEYWORD(Kstatic,"static") \ + KEYWORD(Kextern,"extern") \ + KEYWORD(Ktls,"thread_local") \ + KEYWORD(Ktypedef,"typedef") \ + KEYWORD(Kinline,"inline") \ + KEYWORD(Knoret,"_Noreturn") \ + KEYWORD(Kconst,"const") \ + KEYWORD(Kvolatile,"volatile") \ + KEYWORD(Krestrict,"restrict") \ + KEYWORD(Katomic,"_Atomic") \ + KEYWORD(Ksigned,"signed") \ + KEYWORD(Kunsigned,"unsigned") \ + KEYWORD(Kvoid,"void") \ + KEYWORD(Kbool,"_Bool") \ + KEYWORD(Kchar,"char") \ + KEYWORD(Kfloat,"float") \ + KEYWORD(Kdouble,"double") \ + KEYWORD(Kcomplex,"complex") \ + KEYWORD(Kimaginary,"imaginary") \ + KEYWORD(Kint,"int") \ + KEYWORD(Kshort,"short") \ + KEYWORD(Klong,"long") \ + KEYWORD(Kstruct,"struct") \ + KEYWORD(Kunion,"union") \ + KEYWORD(Kenum,"enum") \ + KEYWORD(Kfor,"for") \ + KEYWORD(Kdo,"do") \ + KEYWORD(Kwhile,"while") \ + KEYWORD(Kcontinue,"continue") \ + KEYWORD(Kif,"if") \ + KEYWORD(Kelse,"else") \ + KEYWORD(Kswitch,"switch") \ + KEYWORD(Kcase,"case") \ + KEYWORD(Kdefault,"default") \ + KEYWORD(Kbreak,"break") \ + KEYWORD(Kgoto,"goto") \ + KEYWORD(Kreturn,"return") \ + KEYWORD(Ksizeof,"sizeof") \ + KEYWORD(Kalignof,"alignof") \ + KEYWORD(Kalignas,"alignas") + +#define KEYWORD(a, b) a, +enum { KEYWORDS NUM_KEYWORDS }; +#undef KEYWORD + +extern byte *keywords[NUM_KEYWORDS]; + +// ----------------------------------------------------------------------- +// lexing: byte stream -> tokens +// pre-processor built in + +/* source position: error reporting */ +struct Pos +{ + int col; + int line; + string path; +}; + + +struct Range +{ + Pos beg; + Pos end; +}; + +void errorat(Pos x, byte *fmt, ...); +void warnat(Pos x, byte *fmt, ...); + +/* pre-processor */ +#define DIRECTIVES \ + DIRECTIVE(Dpragma,"pragma", ppprag) \ + DIRECTIVE(Dinclude,"include", ppinc) \ + DIRECTIVE(Ddefine,"define", ppdef) \ + DIRECTIVE(Dundef,"undef", ppund) \ + DIRECTIVE(Dif,"if", ppif0) \ + DIRECTIVE(Delif,"elif", ppif1) \ + DIRECTIVE(Delse, "else", ppif1) \ + DIRECTIVE(Difdef,"ifdef", ppif2) \ + DIRECTIVE(Difndef,"ifndef", ppif3) \ + DIRECTIVE(Dendif,"endif", ppend) + +#define DIRECTIVE(a, b, c) a, +enum { DIRECTIVES NUM_DIRECTIVES }; +#undef DIRECTIVE + +extern byte *directives[NUM_DIRECTIVES]; + +error domacro(Lexer*); +error dodefine(Lexer *lx, string s); +int expandmacro(Lexer *lx, Sym *s, byte *dst); + +extern error (*macros[NUM_DIRECTIVES])(Lexer*); + +/* tokenization of byte stream */ +#define TOKENS \ + TOK(Anil,"nil") \ + TOK(Aeof,"eof") \ + TOK(Aeq, "==") \ + TOK(Aneq, "!=") \ + TOK(Anot, "!") \ + TOK(Aneg, "~") \ + TOK(Axor, "^") \ + TOK(Aor, "|") \ + TOK(Aand, "&") \ + TOK(Aoror, "||") \ + TOK(Aandand, "&&") \ + TOK(Aadd,"+") \ + TOK(Asub,"-") \ + TOK(Astar,"*") \ + TOK(Adiv,"/") \ + TOK(Amod,"%") \ + TOK(Agt,">") \ + TOK(Alt,"<") \ + TOK(Agteq,">=") \ + TOK(Alteq,"<=") \ + TOK(Alsft,"<<") \ + TOK(Arsft,">>") \ + TOK(Ainc,"++") \ + TOK(Adec,"--") \ + TOK(Aasn,"=") \ + TOK(Aorasn,"|=") \ + TOK(Axorasn,"^=") \ + TOK(Aandasn,"&=") \ + TOK(Aaddasn,"+=") \ + TOK(Asubasn,"-=") \ + TOK(Amulasn,"*=") \ + TOK(Adivasn,"/=") \ + TOK(Amodasn,"%=") \ + TOK(Alsftasn,"<<=") \ + TOK(Arsftasn,">>=") \ + TOK(Acomma,",") \ + TOK(Acolon,":") \ + TOK(Asemi,";") \ + TOK(Alparen,"(") \ + TOK(Arparen,")") \ + TOK(Albrace,"{") \ + TOK(Arbrace,"}") \ + TOK(Albrakt,"[") \ + TOK(Arbrakt,"]") \ + TOK(Adot,".") \ + TOK(Aarrow,"->") \ + TOK(Aqmark,"?") \ + TOK(Aellip,"...") \ + TOK(Alit,"") \ + TOK(Aident,"") \ + TOK(Akeywd,"") \ + +#define TOK(a, b) a, +enum +{ + TOKENS + NUM_TOKENS, + + Vchar = iota(8), + Vrune = iota(9), + Vint = iota(10), + Vlong = iota(11), + Vvlong = iota(12), + Vun = iota(13), + Vfloat = iota(14), + Vstr = iota(15), + Vwstr = iota(16), + + Vmask = Vchar - 1, +}; +#undef TOK + +extern byte *tokens[NUM_TOKENS]; + +/* TODO: store literals in a big val */ +union Val +{ + byte *s; + double f; + vlong i; + uvlong ui; + int32 c; + uint32 uc; + rune r; +}; + +struct Token +{ + uint32 kind; + Pos pos; + union Val val; +}; + +enum +{ + Svar = iota(1), + Sfunc = iota(2), + Stype = iota(3), + Stag = iota(4), + Senum = iota(5), + Slabl = iota(6), + Smacro = iota(7), +}; + +struct Sym +{ + uint32 kind; + string name; + union { + string macro; + Decl *obj; + int32 type; + Stmt *blk; + Expr *val; + }; +}; + +struct SymTab +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + string *keys; + Sym **vals; +}; + +Sym *define(SymTab *tab, string ident, uint32 kind); +Sym *lookup(SymTab *tab, string ident); +error forget(SymTab *tab, string ident); +void forgetall(SymTab *tab); + +enum +{ + IOnil = iota(0), + IOfile = iota(1), + IObuff = iota(2), +}; + +struct Io +{ + io·Buffer rdr; + string path; + uint32 kind; + union { + Stream *f; + byte *b; + }; + + Pos store; + struct Io *link; +}; + +struct Lexer +{ + Pos pos; + SymTab sym; + byte *b; + byte buf[2*1024]; + + /* predefined dynamic macros */ + uintptr macfile; + uintptr macline; + + /* i/o data */ + Io *io, *new; + Io iostk[100]; + struct { + int cap; + int len; + string *path; + } omit; +}; + +/* lex.c functions */ +Token lex(Lexer *); + +int getbyte(Lexer *); +int getnsbyte(Lexer *l); +rune getrune(Lexer *); +byte ungetbyte(Lexer *); +rune ungetrune(Lexer *, rune r); + +Io* openio(Lexer *lx, byte *path); +void pushio(Lexer *lx, Io *new); +void popio(Lexer *lx); + +void puttok(Token); + +// ----------------------------------------------------------------------- +// parsing & type resolution +// tokens -> ast + +/* parent data */ +struct Node +{ + Range pos; + uint32 kind; +}; + +/* ast types */ +enum +{ + Nbad, + /* labels */ + Sempty, Slabel, Scase, + Sblock, + Sexpr, Sdecl, + Sselect, + /* loops */ + Sfor, Swhile, Sdo, + /* jumps */ + Sgoto, Scontin, Sbreak, Sreturn, + /* forks */ + Sif, Sswitch, + + + /* assignments */ + Xasn, Xmulasn, Xdivasn, Xmodasn, Xsubasn, Xaddasn, + Xlsftasn, Xrsftasn, Xandasn, Xxorasn, Xorasn, + /* conditional */ + Xternary, + /* unary prefix ops */ + Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec, + Xcast, + /* unary postfix ops */ + Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xinitlist, + /* binary ops */ + Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft, + Xadd, Xsub, Xmul, Xdiv, Xmod, + /* primary */ + Xparen, Xident, Xlit, + /* lists */ + Xcomma, + + + Dvar, + Dfunc, + Dtype, + Dlist = iota(20), + Dvars = Dvar | Dlist, + Dtypes = Dtype | Dlist, + + /* names (don't interact w/ final AST) */ + Nnil = 0, + Nident, + Nparen, + Nindex, + Ncall, +}; + +/* expressions */ +enum +{ + Keynil, + Keyidx, + Keysel, +}; + +struct Key +{ + uint kind : 2; + union { + Expr *x; + string s; + }; +}; + +struct Expr +{ + struct Node; + uint32 qual; + uint32 type; + union { + string name; + struct { + uint64 kind; + union { + union Val; + union Val v; + }; + } val; + struct { + int n; + struct Key *k; + Expr *v; + } init; + Expr *x; + struct { + Expr *l; + Expr *r; + } asn; + struct { + Expr *c; + Expr *t; + Expr *e; + } cond; + struct { + Expr *x; + union { + Expr *i; + string f; + }; + } idx; + struct { + Expr *fn; + int n; + Expr **arg; + } call; + union { + Expr *pre; + Expr *post; + } unary; + struct { + int type : 1; + union { + struct { + uint32 qual; + uint32 type; + } of; + Expr *x; + }; + } info; + struct { + struct { + uint32 qual; + uint32 type; + } to; + Expr *x; + } cast; + struct { + Expr *l; + Expr *r; + } binary; + struct { + Expr *x[2]; + } comma; + }; +}; + + +/* statements */ +struct Stmt +{ + struct Node; + union { + struct { + union { + string ident; + Expr *x; + }; + Node *stmt; + } lbl; + struct { + long n; + struct Node **item; + } blk; + Expr *x; + struct { + Node *init; + Expr *cond; + Expr *step; + Node *body; + } loop; + union{ + string lbl; + Expr *x; + } jmp; + struct { + Expr *cond; + Node *body; + Node *orelse; + } br; + }; +}; + +/* declarations */ + +/* + * specifiers + * the design is the following: + * type info is held w/in a 64 bit integer. + * the bottom 32 bits are associated to specializations + * the top 32 bits index into a type-info array held by the compiler. + */ +enum +{ + /* memory */ + Mauto = iota(Kauto), + Mstatic = iota(Kstatic), + Mreg = iota(Kregister), + Mtls = iota(Ktls), + Mtype = iota(Ktypedef), + Mextern = iota(Kextern), + + MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype | Mextern, + + /* qualifiers */ + Qconst = iota(Kconst), + Qrestr = iota(Krestrict), + Qvoltl = iota(Kvolatile), + Qatom = iota(Katomic), + + MaskQul = Qconst | Qrestr | Qvoltl | Qatom, + + Finlne = iota(Kinline), + Fnoret = iota(Knoret), + + MaskFcn = Finlne | Fnoret, + + /* types */ + Tsign = iota(Ksigned), + Tunsign = iota(Kunsigned), + + MaskSgn = Tsign | Tunsign, + + Tvoid = iota(Kvoid), + Tfloat = iota(Kfloat), + Tdouble = iota(Kdouble), + Tcmplx = iota(Kcomplex), + Timag = iota(Kimaginary), + + MaskFlt = Tfloat | Tdouble | Tcmplx | Timag, + + Tchar = iota(Kchar), + Tbool = iota(Kbool), + + Tshort = iota(Kshort), + Tint = iota(Kint), + Tlong = iota(Klong), + Tvlong = iota(Klong+1), + + MaskInt = Tshort | Tint | Tlong | Tvlong, + MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx, + /* + * NOTE IMPORTANT: vlong takes over the struct bit place + * DON'T MOVE KEYWORDS WITHOUT REORGANIZING + */ + Tstruct = iota(Kstruct+1), + Tunion = iota(Kunion+1), + Tenum = iota(Kenum+1), + Tname = iota(Kenum+2), + + Sbad = -1, +}; + +/* intermediate nodes */ +struct Ptr +{ + uint64 kind; + Ptr *link; +}; + +struct Name +{ + uint32 kind; + union { + string ident; + struct Dtor *paren; + struct { + Name *name; + union { + struct { + uint32 q; + Expr *x; + } idx; + struct { + int n; + int dots : 1; + Field *arg; + } call; + }; + } sfx; + }; +}; + +struct Dtor +{ + Ptr ptr; + Name *name; +}; + +/* final ast node */ + +struct Field +{ + uint32 qual; + uint32 type; + string name; +}; + +struct Decls +{ + string name; + uint32 type; + Expr *init; + struct Decls *link; +}; + + +struct Decl +{ + struct Node; + uint32 spec; + union { + struct { + string name; + uint32 type; + union { + Stmt *body; + Expr *init; + }; + }; + struct Decls list; + }; +}; + +enum +{ + Tbad, + Tbase, + Tdef, + Tptr, + Tarray, + Tfunc, +}; + +/* types */ +struct Type +{ + uint32 kind; + Sym *sym; + uintptr size; + uintptr max; + uint16 align : 8; + uint8 sign : 2; + union { + struct { + uint32 qual; + uint32 base; + } ptr; + struct { + int len; + uint32 qual; + uint32 *elt; + } arr; + struct { + int len; + Field *f; + Expr *x; + } aggr; + struct { + int len; + string *elt; + Expr *val; + } enm; + struct { + uint32 ret; + int n; + int dots : 1; + Field *arg; + } func; + }; +}; + +/* platform specific */ +extern Type pointer; +extern Type basetypes[24]; +/* mandated by C standard */ +extern uint64 validtypespec[38]; +extern int indextypespec[38]; + +struct Scope +{ + SymTab tags; + SymTab objs; +}; + +struct Parser +{ + Token tok[2]; + struct { + int cap; + int len; + Decl **decls; + } ast; + + /* static buffers/stacks */ + Scope *sp; + Scope spstk[40]; + + Name *nm; + Name nmstk[40]; + + Ptr *pt; + Ptr ptstk[10]; + + Dtor *dt; + Dtor dtstk[40]; +}; + +/* ast.c functions */ +error parse(Parser *, Lexer *); + +// ----------------------------------------------------------------------- +// global compiler data + +struct StrTab +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + string *keys; + int32 *vals; +}; + +#if 0 +struct TypeSet +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + Type **keys; +}; +#endif + +/* main data */ +struct Compiler +{ + mem·Arena *heap; + StrTab strs; + string outfile; + + struct { + int cap; + int len; + string *val; + } def; + + struct { + int cap; + int len; + string *dir; + } inc; + + struct { + int cap; + int len; + Type *info; + } type; + + /* TODO: make array */ + struct { + Decl vargs; + } builtin; +}; + +extern Compiler C; + +/* cc.c functions */ +void init(); +int32 intern(byte **str); +int32 type(); + +#undef iota diff --git a/src/cmd/cc/lex.c b/src/cmd/cc/lex.c new file mode 100644 index 0000000..33fc5d0 --- /dev/null +++ b/src/cmd/cc/lex.c @@ -0,0 +1,873 @@ +#include "cc.h" +#include + +// ----------------------------------------------------------------------- +// printing functions + +void +puttok(Token tok) +{ + if (tok.kind < Alit) + printf("%s", tokens[tok.kind]); + else if (tok.kind & Alit) { + if (tok.kind & Vchar) + if (tok.kind & Vint) + if (tok.kind & Vlong) + if (tok.kind & Vvlong) + printf("literal <%lld>", tok.val.i); + if (tok.kind & Vfloat) + printf("literal <%f>", tok.val.f); + printf("literal <%s>", tok.val.s); + } else + printf("ident <%s>", tok.val.s); +} + +// ----------------------------------------------------------------------- +// io buffer management + +#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x} + +// path should be absolute +Io* +openio(Lexer *lx, byte *path) +{ + string *it, *end; + + intern(&path); + + // See if we have already opened file; + // If so, and it hasn't been flagged return it + for (it = lx->omit.path, end = it + lx->omit.len; it < end; ++it) { + if ((uintptr)(*it) == (uintptr)(path)) + return nil; + } + + // TODO: See if we have already loaded the file + + if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1) + panicf("out of I/O space!"); + + lx->new->f = io·open(path, "r"); + if (!lx->new->f) + panicf("file %s not found", path); + + lx->new->kind = IOfile; + lx->new->path = path; + bufio·initreader(&lx->new->rdr, asrdr(io·read), lx->new->f); + + return lx->new++; +} + +static +Io* +makeio(Lexer *lx, byte *name) +{ + if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1) + panicf("out of I/O space!"); + + lx->new->path = name; + lx->new->rdr = (io·Buffer) { + .state = bufio·rdr | bufio·end, + .runesize = 0, + .h = nil, + .size = bufio·size, + .beg = lx->new->rdr.buf + bufio·ungets, + .pos = lx->new->rdr.buf + bufio·ungets, + .end = lx->new->rdr.buf + bufio·ungets, + }; + lx->new->b = lx->new->rdr.beg; + + return lx->new++; +} +#undef asrdr + +static +void +freeio(Lexer *lx, Io *io) +{ + if (io->kind & IOfile) { + io·close(io->f); + } + + io->rdr.state = 0; + io->kind = 0; + io->link = nil; + io->path = nil; + io->store = (Pos){ 0 }; + io->path = ""; +} + +void +pushio(Lexer *lx, Io *new) +{ + new->link = lx->io; + lx->io->store = lx->pos; + lx->io = new; + + lx->pos = (Pos){ + .line = 1, + .col = 1, + .path = new->path, + }; +} + +void +popio(Lexer *lx) +{ + Io *prev; + + assert(lx->io == lx->new-1); + --lx->new; + + prev = lx->io->link; + freeio(lx, lx->io); + + lx->io = prev; + if (!prev) { + return; + } + + lx->pos = prev->store; +} + +// ----------------------------------------------------------------------- +// simple wrappers + +int +getbyte(Lexer *lx) +{ + return bufio·getbyte(&lx->io->rdr); +} + +int +getnsbyte(Lexer *lx) +{ + int b; + b = getbyte(lx); + for (;;) { + if (b == EOF) { + if (lx->io->link) { + popio(lx); + assert(lx->io); + b = getbyte(lx); + continue; + } else + return b; + } + if (b >= RuneSelf || !isspace(b)) + return b; + if (b == '\n') + return b; + b = getbyte(lx); + } + return b; +} + +rune +getrune(Lexer *lx) +{ + return bufio·getrune(&lx->io->rdr); +} + +byte +ungetbyte(Lexer *lx) +{ + byte b; + return bufio·ungetbyte(&lx->io->rdr, b); +} + +rune +ungetrune(Lexer *l, rune r) +{ + return bufio·ungetrune(&l->io->rdr, r); +} + +// ----------------------------------------------------------------------- +// main lexer + +#define TOK(a, b) b, +byte *tokens[NUM_TOKENS] = { TOKENS }; +#undef TOK + +static uint8 Atoi[256] = +{ + ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5, + ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10, + ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13, + ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15, +}; + +static +error +escapechar(Lexer *lx, int x, int islong, int esc, vlong *val) +{ + int i, u, c; + vlong l; + + c = getrune(lx); + + switch (c) { + case '\\': + break; + case EOF: + errorat(lx->pos, "EOF in string"); + return 1; + case '\n': + errorat(lx->pos, "newline in string"); + return 1; + default: + if (c == x) + return 1; + *val = c; + return 0; + } + + u = 0; + c = getrune(lx); + + switch(c) { + case 'x': + i = islong ? 4 : 2; + goto hex; + + case 'u': + i = islong ? 8 : 4; + u = 1; + goto hex; + + case 'U': + i = 8; + u = 1; + goto hex; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + i = islong ? 4 : 2; + goto oct; + + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\\':c = '\\'; break; + + default: + if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c); + } + *val = c; + return 0; + +hex: + l = 0; + for(; i > 0; i--) { + c = getbyte(lx); + if (c >= '0' && c <= '9') { + l = l*16 + c-'0'; + continue; + } + if (c >= 'a' && c <= 'f') { + l = l*16 + c-'a' + 10; + continue; + } + if (c >= 'A' && c <= 'F') { + l = l*16 + c-'A' + 10; + continue; + } + ungetbyte(lx); + break; + } + if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) { + errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l); + l = RuneErr; + } + *val = l; + if (esc) + *val |= RuneMask + 1; + return 0; + +oct: + l = c - '0'; + for (; i > 0; i--) { + c = getbyte(lx); + if (c >= '0' && c <= '7') { + l = l*8 + c-'0'; + continue; + } + ungetbyte(lx); + break; + } + if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l); + + *val = l; + if (esc) + *val |= RuneMask + 1; + return 0; +} + +#define CASE1(stmt1, kind1) \ + case stmt1: \ + tok.kind = kind1; \ + goto Return + +#define CASE2(stmt1, kind1, b1, kind2) \ + case stmt1: \ + tok.kind = kind1; \ + b = getbyte(lx); \ + if (b == b1) \ + tok.kind = kind2; \ + else \ + ungetbyte(lx); \ + goto Return + +#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \ + case stmt1: \ + tok.kind = kind1; \ + b = getbyte(lx); \ + if (b == b1) \ + tok.kind = kind2; \ + else if (b == b2) \ + tok.kind = kind3; \ + else \ + ungetbyte(lx); \ + goto Return + +#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \ + case stmt1: \ + tok.kind = kind1; \ + b = getbyte(lx); \ + if (b == b1) \ + tok.kind = kind2; \ + else if (b == b2) \ + tok.kind = kind3; \ + else if (b == b3) \ + tok.kind = type4; \ + else \ + ungetbyte(lx); \ + goto Return + + +Token +lex(Lexer *lx) +{ + int b, n, f; + vlong v, _; + rune r; + string s; + double d; + byte *e; + Token tok; + Sym *sym; + Io *io; + +GetByte: + b = getbyte(lx); +Dispatch: + tok.pos = lx->pos; + + if ((b != EOF && b >= RuneSelf) || b == '_') + goto Talpha; + if (isalpha(b)) { + if (b != 'L') + goto Talpha; + + n = b; + b = getbyte(lx); + if (b == '\'') { + if (escapechar(lx, '\'', 1, 0, &v)) + b = '\''; + if (!escapechar(lx, '\'', 1, 0, &_)) { + errorat(lx->pos, "missing ' at end of character constant"); + } + tok.kind = Alit | Vrune; + tok.val.r = v; + goto Return; + } + if (b == '"') + goto TLstr; + ungetbyte(lx); + b = n; + + goto Talpha; + } + if (isdigit(b)) + goto Tnum; + + switch (b) { + case '\n': + lx->pos.line++; + case ' ': case '\r': case '\t': case '\v': case '\f': + while (b = getbyte(lx), isspace(b)) + if (b == '\n') + lx->pos.line++; + goto Dispatch; + + case '\\': + b = getbyte(lx); + if (b != '\n') + errorat(lx->pos, "'\\' without a trailing newline"); + goto GetByte; + + Tchar: + case '\'': + if (escapechar(lx, '\'', 0, 0, &v)) { + errorat(lx->pos, "empty literal or escaped ' in char literal"); + v = '\''; + } + if (!escapechar(lx, '\'', 0, 0, &_)) { + errorat(lx->pos, "missing '"); + ungetbyte(lx); + } + + if (v > 0xff) { + errorat(lx->pos, "overflowed character literal"); + v = 0; + } + tok.kind = Alit | Vchar; + tok.val.c = v; + goto Return; + + case '"': + s = str·makecap("", 0, 8); + for (;;) { + if (escapechar(lx, '"', 0, 1, &v)) + break; + + if (v & (RuneMask + 1)) + str·appendbyte(&s, v); + else { + r = v; + b = utf8·runelen(r); + utf8·runetobyte(lx->buf, &r); + str·appendlen(&s, b, lx->buf); + } + } + tok.kind = Alit | Vstr; + tok.val.s = s; + intern(&tok.val.s); + + str·free(s); + goto Return; + + TLstr: + s = str·makecap("", 0, 8); + // NOTE: this violates strict aliasing + for (;;) { + if (escapechar(lx, '"', 1, 0, &v)) + break; + str·appendlen(&s, sizeof(wchar_t), (byte*)&v); + } + tok.kind = Alit | Vwstr; + tok.val.s = s; + intern(&tok.val.s); + + str·free(s); + goto Return; + + case '.': + tok.kind = Adot; + b = getbyte(lx); + + if (isdigit(b)) { + // *lx->b++ = b; + goto Tflt; + } else if (b == '.') { + b = getbyte(lx); + if (b != '.') { + errorat(lx->pos, "invalid token '..'"); + tok.kind = Aellip; + break; + } + } + ungetbyte(lx); + goto Return; + + case '<': + tok.kind = Alt; + b = getbyte(lx); + + if (b == '<') { + tok.kind = Alsft; + b = getbyte(lx); + if (b == '=') + tok.kind = Alsftasn; + else + ungetbyte(lx); + } else if (b == '=') + tok.kind = Alteq; + else + ungetbyte(lx); + goto Return; + + case '>': + tok.kind = Agt; + b = getbyte(lx); + + if (b == '>') { + tok.kind = Arsft; + b = getbyte(lx); + if (b == '=') + tok.kind = Arsftasn; + else + ungetbyte(lx); + } else if (b == '=') + tok.kind = Agteq; + else + ungetbyte(lx); + goto Return; + + case '/': + tok.kind = Adiv; + b = getbyte(lx); + + if (b == '=') + tok.kind = Adivasn; + else if (b == '/') { + while (b != EOF && b != '\n') + b = getbyte(lx); + goto Dispatch; + } else if (b == '*') { + int level = 1; + b = getbyte(lx); + while (b != EOF && level > 0) { + if (b == '/') { + b = getbyte(lx); + if (b == '*') + level++; + } else if (b == '*') { + b = getbyte(lx); + if (b == '/') + level--; + } + if (b == '\n') + lx->pos.line++; + b = getbyte(lx); + } + goto Dispatch; + } else + ungetbyte(lx); + goto Return; + + case '#': + if (domacro(lx)) { + tok.kind = Anil; + errorat(lx->pos, "failed to perform preprocessor directive"); + return tok; + } + goto GetByte; + + case EOF: + popio(lx); + if (lx->io) + goto GetByte; + tok.kind = Aeof; + goto Return; + + CASE1('(', Alparen); + CASE1(')', Arparen); + CASE1('{', Albrace); + CASE1('}', Arbrace); + CASE1('[', Albrakt); + CASE1(']', Arbrakt); + CASE1(',', Acomma); + CASE1('?', Aqmark); + CASE1(';', Asemi); + CASE1('~', Aneg); + CASE1(':', Acolon); + CASE2('^', Axor, '=', Axorasn); + CASE2('!', Anot, '=', Aneq); + CASE2('*', Astar,'=', Amulasn); + CASE2('=', Aasn, '=', Aeq); + CASE2('%', Amod, '=', Amodasn); + CASE3('+', Aadd, '=', Aaddasn, '+', Ainc); + CASE3('&', Aand, '=', Aandasn, '&', Aandand); + CASE3('|', Aor, '=', Aorasn, '|', Aoror); + CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow); + + Tnum: + e = lx->buf + arrlen(lx->buf); + do { + if (lx->b >= e) { + errorat(lx->pos, "number overflows lexer buffer"); + goto Nospace; + } + *lx->b++ = b; + } while (b = getbyte(lx), isdigit(b) || b == '_'); + + if (b == '.' || tolower(b) == 'e') + goto Tflt; + Tint: + n = 10; + s = lx->buf; + if (*s == '0') { + switch (b) { + case 'x': n = 16; break; + case 'b': n = 2; break; + case 'o': n = 8; break; + default: goto Rint; + } + lx->b = s; + /* reparse number, now with base info */ + while (b = getbyte(lx), (isdigit(b) || + ('a' <= b && b <= 'f') || + ('A' <= b && b <= 'F') || + b == '_')) + *lx->b++ = b; + } + Rint: + v = 0; + r = b; + for (; s != lx->b ; s++) { + b = *s; + if (b == '_') continue; + + f = Atoi[b]; + if (f == 0 && b != '0') + break; + + if (f >= n) { + errorat(lx->pos, "digit '%c' out of range for base %d", b, n); + f = 0; + } + + if (v > (UINT64_MAX - f) / n) { + errorat(lx->pos, "integer literal overflow"); + v = 0; + break; + } + + v = v * n + f; + } + + b = r; + tok.kind = Alit; + tok.val.i = v; + + if (b == 'u' || b == 'U') { + tok.kind |= Vun; + b = getbyte(lx); + } + if (b == 'l' || b == 'L') { + r = getbyte(lx); + if (r == 'l' || r == 'L') { + if (r != b) + errorat(lx->pos, "mismatched case on long long integer suffix"); + tok.kind |= Vvlong; + r = getbyte(lx); + } else + tok.kind |= Vlong; + + if (r == 'u' || r == 'U') { + if (tok.kind & Vun) + errorat(lx->pos, "multiple unsigned designators on integer suffix"); + tok.kind |= Vun; + goto Return; + } + + ungetbyte(lx); + goto Return; + } + + tok.kind |= Vint; + ungetbyte(lx); + goto Return; + + Tflt: + if (b == '.') { + *lx->b++ = b; + b = getbyte(lx); + } + + while (isdigit(b)) { + *lx->b++ = b; + + if (lx->b >= e) { + errorat(lx->pos, "number overflows lexer buffer"); + goto Nospace; + } + } + + if (tolower(b) == 'e') { + b = getbyte(lx); + if (b == '-' || b == '+') + b = getbyte(lx); + + if (!isdigit(b)) + errorat(lx->pos, "expected number after exponent, found %c", b); + + do { + *lx->b++ = b; + } while (b = getbyte(lx), isdigit(b)); + } + *lx->b = '\0'; + d = strtod(lx->buf, nil); + ungetbyte(lx); + + tok.kind = Alit | Vfloat; + tok.val.f = d; + + goto Return; + + Talpha: + s = lx->buf; + e = lx->buf + arrlen(lx->buf); + for (;;) { + if (s >= e) { + errorat(lx->pos, "identifier too long for buffer: %s", s); + goto Nospace; + } + if (b != EOF && b >= RuneSelf) { + ungetbyte(lx); + r = getrune(lx); + if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) { + errorat(lx->pos, "invalid identifier character %d", r); + } + s += utf8·runetobyte(s, &r); + } else if (!isalnum(b) && b != '_') + break; + else + *s++ = b; + b = getbyte(lx); + } + *s = '\0'; + ungetbyte(lx); + + tok.kind = Aident; + tok.val.s = lx->buf; + + n = intern(&tok.val.s); + if (n < arrlen(keywords)) { + tok.kind = Akeywd; + tok.val.i = n; + goto Return; + } + + sym = lookup(&lx->sym, tok.val.s); + if (sym && ((uintptr)sym->name != (uintptr)lx->io->path)) { + if ((uintptr)sym == lx->macline) { + tok.kind = Alit | Vint; + tok.val.i = lx->pos.line; + goto Return; + } + if ((uintptr)sym == lx->macfile) { + tok.kind = Alit | Vstr; + tok.val.s = lx->pos.path; + goto Return; + } + io = makeio(lx, sym->name); + io->rdr.end += expandmacro(lx, sym, io->b); + printf("EXPANDED %s: %s\n", sym->name, io->rdr.beg); + *io->rdr.end++ = EOF; + pushio(lx, io); + goto GetByte; + } + goto Return; + + default: + tok.kind = Anil; + errorat(lx->pos, "invalid token, crashing"); + abort(); + } + +Return: + lx->b = lx->buf; + return tok; + +Nospace: + panicf("aborting compilation"); + exit(1); +} + +#undef CASE4 +#undef CASE3 +#undef CASE2 +#undef CASE1 + +// ----------------------------------------------------------------------- +// symbol tables + +#define PTR_HASH(p) (uintptr)(p) +#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2)) + +static +void +·free(void* _, void* ptr) { + return free(ptr); +} + +static +void * +·alloc(void* _, uint n, ulong size) { + return malloc(n*size); +} + +static +void * +·calloc(void* _, uint n, ulong size) { + return calloc(n, size); +} + +static +int +moresymtab(SymTab *tab, int n) +{ + MAP_GROW(tab, string, Sym*, n, PTR_HASH, sys·Memory, nil); +} + +static +int +putsym(SymTab *tab, Sym *sym, error *err) +{ + MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err); +} + +Sym* +define(SymTab *tab, string name, uint32 kind) +{ + int i; + Sym *sym; + error err; + + sym = mem·arenaalloc(C.heap, 1, sizeof(*sym)); + sym->name = name; + sym->kind = kind; + + i = putsym(tab, sym, &err); + tab->vals[i] = sym; + + return sym; +} + +Sym* +lookup(SymTab *tab, string ident) +{ + int idx; + MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL); + + if (idx < tab->n_buckets) + return tab->vals[idx]; + + return nil; +} + + +error +forget(SymTab *tab, string ident) +{ + int idx; + MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL); + + if (idx < tab->n_buckets) { + MAP_DEL(tab, idx); + return 0; + } + return 1; +} + +void +forgetall(SymTab *tab) +{ + MAP_RESET(tab); +} diff --git a/src/cmd/cc/pp.c b/src/cmd/cc/pp.c new file mode 100644 index 0000000..57c3501 --- /dev/null +++ b/src/cmd/cc/pp.c @@ -0,0 +1,1125 @@ +#include "cc.h" + +// ----------------------------------------------------------------------- +// helper functions + +static +void +pushomit(Lexer *lx, string omit) +{ + if (lx->omit.len == lx->omit.cap) { + lx->omit.cap += 20; + lx->omit.path = realloc(lx->omit.path, lx->omit.cap*sizeof(*lx->omit.path)); + } + lx->omit.path[lx->omit.len++] = omit; +} + +// NOTE: The iterator of lexer lx->b IS NOT reset. +// Its the caller's responsibility. +static +string +ident(Lexer *lx) +{ + int b; + byte *s; + + b = getnsbyte(lx); + if (!isalpha(b) && b != '_' && b < RuneSelf) { + ungetbyte(lx); + return ""; + } + + s = lx->b; + for (;;) { + *lx->b++ = b; + b = getbyte(lx); + if (isalnum(b) || b == '_' || b >= RuneSelf) + continue; + ungetbyte(lx); + break; + } + *lx->b++ = '\0'; + + return s; +} + +static +string +identdots(Lexer *lx, int *dots) +{ + int c; + byte *s; + + s = ident(lx); + if (*s != '\0') + return s; + + c = getnsbyte(lx); + if (c != '.') { + ungetbyte(lx); + return s; + } + + if (getbyte(lx) != '.' || getbyte(lx) != '.') + errorat(lx->pos, "incorrect '...' token in macro"); + + *dots = 1; + // TODO: should only run intern once... + s = "__VA_ARGS__"; + intern(&s); + return s; +} + +static +Sym* +defmacro(Lexer *lx, string name, string macro) +{ + Sym *mac; + + // printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path)); + mac = define(&lx->sym, name, Smacro); + mac->macro = macro; + + return mac; +} + +static vlong evalmacro(Lexer *lx, byte prec); + +static +vlong +opand(Lexer *lx) +{ + int b; + vlong v; + string s; + Token tok; + Sym *sym; + + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in macro expression"); + return 0; + } + ungetbyte(lx); + + tok = lex(lx); + + switch (tok.kind & Vmask) { + case Aneg: + return ~opand(lx); + + case Anot: + return !opand(lx); + + case Alparen: + v = evalmacro(lx, 1); + tok = lex(lx); + if (!(tok.kind & Arparen)) { + errorat(lx->pos, "unbalanced parenthesis in macro expression"); + return 0; + } + return v; + + case Alit: + switch (tok.kind & ~Vmask) { + case Vint: case Vlong: case Vvlong: + return tok.val.i; + case Vun|Vint : case Vun|Vlong : case Vun|Vvlong: + return tok.val.ui; + case Vrune: + return tok.val.r; + case Vchar: + return tok.val.c; + default: + errorat(lx->pos, "invalid literal of type '%s' in conditional macro", tokens[tok.kind & ~Vmask]); + return 0; + } + + case Aident: + sym = lookup(&lx->sym, tok.val.s); + if (!sym) { + /* calling lex directly would expand the operand here + * manually lex the result + */ + if (strcmp(tok.val.s, "defined") == 0) { + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in defined operand"); + return 0; + } + s = lx->buf; + if (b == '(') { + b = getnsbyte(lx); + while (b != ')') { + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + if (b == '(') { + errorat(lx->pos, "nested parens not allowed inside defined operator"); + return 0; + } + if (!isspace(b)) + *s++ = b; + b = getbyte(lx); + } + } else { + while (!isspace(b)) { + *s++ = b; + b = getbyte(lx); + + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + } + } + *s = '\0'; + s = lx->buf; + intern(&s); + return lookup(&lx->sym, s) != nil; + } + return 0; + } + panicf("unreachable"); + return 1; + + default: + errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]); + return 0; + } +} + +// recursively evaluates a macro +// reduced set of operators allowed here +static +vlong +evalmacro(Lexer *lx, byte prec) +{ + int b; + vlong l, r; + Token tok; + + l = opand(lx); + for (;;) { + b = getnsbyte(lx); + // NOTE: Either this or we pass in what are stopping byte is + // New line should always stop us... + // Is there any case where we SHOULDN'T STOP ON ')'? + if (b == '\n' || b == ')') { + ungetbyte(lx); + break; + } + ungetbyte(lx); + + tok = lex(lx); + // simplified jump table of precedence + // unpacked to evaluate inline + switch (tok.kind & Vmask) { + case Astar: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l * r; + continue; + + case Adiv: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l / r; + continue; + + case Amod: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l % r; + continue; + + case Aadd: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l + r; + continue; + + case Asub: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l - r; + continue; + + case Alsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l << r; + continue; + + case Arsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l >> r; + continue; + + case Alt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l < r; + continue; + + case Agt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l > r; + continue; + + case Agteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Alteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Aeq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l == r; + continue; + + case Aneq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l != r; + continue; + + case Aand: + if (prec > 5) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 5 + 1); + l = l & r; + continue; + + case Axor: + if (prec > 4) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 4 + 1); + l = l ^ r; + continue; + + case Aor: + if (prec > 3) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 3 + 1); + l = l | r; + continue; + + case Aandand: + if (prec > 2) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 2 + 1); + l = l && r; + continue; + + case Aoror: + if (prec > 1) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 1 + 1); + l = l || r; + continue; + + default: + errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]); + abort(); + return 0; + } + } + + return l; +} + +// ----------------------------------------------------------------------- +// preprocessor magic numbers + +enum +{ + PPbeg = 0x02, + PParg = 0x03, + PPcat = 0x04, + PPstr = 0x05, + + PPnarg = 30, +}; + +#define PPvar 0x80u + +// ----------------------------------------------------------------------- +// preprocessor functions + +/* #endif */ +static +error +ppend(Lexer *lx) +{ + int b; + do { + b = getnsbyte(lx); + } while (b > 0 && b != '\n'); + + if (b == '\n') + lx->pos.line++; + + return 0; +} + + +/* #undef */ +static +error +ppund(Lexer *lx) +{ + string s; + error err; + + s = ident(lx); + intern(&s); + lx->b = lx->buf; + + err = forget(&lx->sym, s); + if (err) + warnat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); + + ppend(lx); + return 0; +} + +/* #define */ +static +error +ppdef(Lexer *lx) +{ + int b; + Sym *sym; + int i, j, n, dot; + string s, a, base, end, buf, args[PPnarg]; + + s = ident(lx); + if (!s) { + errorat(lx->pos, "failed to parse defined identifer"); + goto Bad; + } + intern(&s); + printf("DEFINING %s\n", s); + lx->b = lx->buf; + + sym = lookup(&lx->sym, s); + if (sym) + warnat(lx->pos, "macro redefined: '%s'", sym->name); + + n = 0; + dot = 0; + b = getbyte(lx); + if (b == '(') { + b = getnsbyte(lx); + if (b != ')') { + ungetbyte(lx); + for (;;) { + // NOTE: This is a pointer into the lx->buffer. + // Can't reset lx->b while we hold the args! + a = identdots(lx, &dot); + if (a == nil) { + errorat(lx->pos, "macro syntax error: improper argument"); + goto Bad; + } + if (n >= PPnarg) { + errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg); + goto Bad; + } + + args[n++] = a; + b = getnsbyte(lx); + + if (b == ')') + break; + if (b != ',') { + errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b); + goto Bad; + } + } + } + b = getbyte(lx); + } + + if (isspace(b)) + if (b != '\n') + b = getnsbyte(lx); + + base = lx->b; + end = lx->buf + arrlen(lx->buf); + if (base >= end) { + errorat(lx->pos, "out of macro buffer space!"); + goto Bad; + } + buf = str·makef("%c%c", n, PPbeg); + for (;;) { + if (isalpha(b) || b == '_') { + lx->b = base; + *lx->b++ = b; + + b = getbyte(lx); + while (isalnum(b) || b == '_') { + *lx->b++ = b; + if (lx->b >= end) { + errorat(lx->pos, "out of macro buffer space!"); + goto Bad; + } + b = getbyte(lx); + } + *lx->b++ = '\0'; + + for (i = 0; i < n; i++) { + if (strcmp(base, args[i]) == 0) { + goto Arg; + } + } + str·appendlen(&buf, (lx->b - base - 1), base); + continue; + Arg: + str·appendbyte(&buf, PParg); + str·appendbyte(&buf, 'a' + i); + continue; + } + + if (b == '/') { + b = getbyte(lx); + if (b == '/') { + while (b = getbyte(lx), b != '\n'); + continue; + } + if (b == '*') { + b = getbyte(lx); + for (;;) { + if (b == '*') { + b = getbyte(lx); + if (b != '/') + continue; + b = getbyte(lx); + break; + } + if (b == '\n') { + errorat(lx->pos, "comment and newline found in define statement of %s", s); + break; + } + b = getbyte(lx); + } + continue; + } + str·appendbyte(&buf, '/'); + continue; + } + + if (b == '\\') { + b = getbyte(lx); + /* unix */ + if (b == '\n') { + lx->pos.line++; + b = getbyte(lx); + continue; + } + /* windows */ + if (b == '\r') { + b = getbyte(lx); + if (b == '\n') { + lx->pos.line++; + b = getbyte(lx); + continue; + } + } + str·appendbyte(&buf, '\\'); + } + if (b == '\n') { + lx->pos.line++; + break; + } + + if (b == '#') { + b = getnsbyte(lx); + if (b == '#') { + str·appendbyte(&buf, PPcat); + b = getbyte(lx); + continue; + } + + lx->b = base; + while (isalnum(b) || b == '_') { + *lx->b++ = b; + b = getbyte(lx); + } + *lx->b = '\0'; + + for (i = 0; i < n; i++) { + if (strcmp(base, args[i]) == 0) + goto Str; + } + errorat(lx->pos, "macro operator '#' must be followed by a valid variable identifier"); + goto Bad; + Str: + str·appendbyte(&buf, PPstr); + str·appendbyte(&buf, 'a' + i); + continue; + } + + str·appendbyte(&buf, b); + b = getbyte(lx); + if (b == EOF) { + errorat(lx->pos, "eof found in macro '%s'", s); + goto Bad; + } + } + if (dot) + *buf |= PPvar; + + lx->b = lx->buf; + sym = defmacro(lx, s, buf); + return 0; +Bad: + errorat(lx->pos, "failed parse of #define macro '%s'", s); + lx->b = lx->buf; + ppend(lx); + return 1; +} + +/* macro expansion */ +int +expandmacro(Lexer *lx, Sym *s, byte *dst) +{ + int n, lv, nargs, dots; + byte b, *it, *e, *arg[PPnarg]; + + /* not a function macro */ + if (s->macro[0] == '\0') { + if (s->macro[1] != PPbeg) { + errorat(lx->pos, "malformed macro"); + goto Bad; + } + strcpy(dst, s->macro + 2); + return str·len(s->macro)-2; + } + dots = (ubyte)s->macro[0] & PPvar; + nargs = (ubyte)s->macro[0] & (~PPvar); + + b = getnsbyte(lx); + if (b != '(') { + errorat(lx->pos, "macro function not given arguments"); + goto Bad; + } + + n = 0; + b = getbyte(lx); + if (b != ')') { + ungetbyte(lx); + lv = 0; + lx->b = lx->buf; + e = lx->buf + arrlen(lx->buf) - 4; + arg[n++] = lx->buf; + for (;;) { + if (lx->b >= e) + goto Nospace; + b = getbyte(lx); + if (b == '"') + for (;;) { + if (lx->b >= e) + goto Nospace; + *lx->b++ = b; + b = getbyte(lx); + if (b == '\\') { + *lx->b++ = b; + b = getbyte(lx); + continue; + } + if (b == '\n') { + errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); + goto Bad; + } + if (b == '"') + break; + } + if (b == '\'') + for (;;) { + if (lx->b >= e) + goto Nospace; + *lx->b++ = b; + b = getbyte(lx); + if (b == '\\') { + *lx->b++ = b; + b = getbyte(lx); + continue; + } + if (b == '\n') { + errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); + goto Bad; + } + if (b == '"') + break; + } + if (b == '/') { + b = getbyte(lx); + switch(b) { + case '*': + for (;;) { + b = getbyte(lx); + if (b == '*') { + b = getbyte(lx); + if (b == '/') + break; + } + } + *lx->b++ = ' '; + continue; + case '/': + while ((b = getbyte(lx)) != '\n') + ; + break; + + default: + ungetbyte(lx); + b = '/'; + } + } + if (lv == 0) { + if (b == ',') { + if (n == nargs && dots) { + *lx->b++ = ','; + continue; + } + *lx->b++ = '\0'; + arg[n++] = lx->b; + if (n > nargs) + break; + continue; + } + if (b == ')') + break; + } + if (b == '\n') + b = ' '; + *lx->b++ = b; + if (b == '(') + lv++; + if (b == ')') + lv--; + } + *lx->b = '\0'; + } + + if (n != nargs) { + errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name); + *dst = '\0'; + goto Bad; + } + + if (s->macro[1] != PPbeg) { + errorat(lx->pos, "corrupted macro buffer: %s", s->name); + *dst = '\0'; + goto Bad; + } + + it = s->macro+2; + e = dst; + for (;;) { + b = *it++; + if (b == '\n') + b = ' '; + switch (b) { + case PParg: + b = *it++; + b -= 'a'; + if (b < 0 && b > n) { + errorat(lx->pos, "malformed macro index: %s", s->name); + goto Bad; + } + strcpy(dst, arg[b]); + dst += strlen(arg[b]); + + break; + + case PPstr: + b = *it++; + b -= 'a'; + if (b < 0 && b > n) { + errorat(lx->pos, "malformed macro index: %s", s->name); + goto Bad; + } + *dst++ = '"'; + strcpy(dst, arg[b]); + *dst++ = '"'; + + break; + + case PPcat: + continue; + + case '\0': + goto End; + + default: + *dst++ = b; + continue; + } + } +End: + *dst = '\0'; + return dst - e; +Nospace: + errorat(lx->pos, "out of memory during macro expansion %s", s->name); +Bad: + ppend(lx); + lx->b = lx->buf; + errorat(lx->pos, "failed to expand macro %s", s->name); + return -1; +} + +/* #include */ +static +error +ppinc(Lexer *lx) +{ + int i; + byte b, end; + string s; + + Stream *f; + Io *io; + + b = getnsbyte(lx); + if (b != '"') { + end = b; + if (b != '<') { + errorat(lx->pos, "unrecognized token '%c' in include directive", b); + goto Bad; + } + end = '>'; + } else + end = '"'; + + lx->b = lx->buf; + for (;;) { + b = getbyte(lx); + if (b == end) + break; + if (b == '\n') { + errorat(lx->pos, "hit end of line before include directive completed"); + goto Bad; + } + *lx->b++ = b; + } + *lx->b = '\0'; + s = lx->buf; + intern(&s); // NOTE: we could use this to see if we already have the file + + lx->b = lx->buf; + for (i = 0; i < C.inc.len; i++) { + if (i == 0 && end == '>') + continue; + + strcpy(lx->buf, C.inc.dir[i]); + strcat(lx->buf, "/"); + + if (strcmp(lx->buf, "./") == 0) + lx->buf[0] = '\0'; + strcat(lx->buf, s); + + if (os·exists(lx->buf, ReadOK)) { + break; + } + } + if (i == C.inc.len) { + errorat(lx->pos, "could not find file '%s' on standard include search path", s); + goto Bad; + } + + io = openio(lx, lx->buf); + if (io != nil) { + pushio(lx, io); + } + + return 0; + +Bad: + ungetbyte(lx); + lx->b = lx->buf; + errorat(lx->pos, "failed include"); + ppend(lx); + return 1; +} + +/* #pragma */ +static +error +ppprag(Lexer *lx) +{ + string s; + + s = ident(lx); + if (s == nil) { + errorat(lx->pos, "failed to parse pragma identifier"); + goto Bad; + } + lx->b = lx->buf; + if (strcmp(s, "once") == 0) { + pushomit(lx, lx->io->path); + return 0; + } +Bad: + lx->b = lx->buf; + errorat(lx->pos, "unrecognized pragma '%s'", s); + ppend(lx); + return 1; +} + +/* all #if statements */ +static +error +ppif(Lexer *lx, int f) +{ + Sym *sym; + string s; + int c, l, b; + +Eval: + if (f == 0) { + b = evalmacro(lx, 1); + if (b) { + ppend(lx); + return 0; + } + goto Skip; + } + + if (f == 1) + goto Skip; + + s = ident(lx); + if (s == nil) { + errorat(lx->pos, "failed to parse preprocessor identifier"); + goto Bad; + } + intern(&s); + lx->b = lx->buf; + + sym = lookup(&lx->sym, s); + if ((!sym && (f == 3)) || (sym && (f == 2))) + return 0; + +Skip: + b = 1; + l = 0; + for (;;) { + c = getbyte(lx); + if (c != '#') { + if (!isspace(c)) + b = 0; + if (c == '\n') { + lx->pos.line++; + b = 1; + } + if (c == EOF) { + errorat(lx->pos, "EOF hit while skipping if block. Missing endif"); + goto Bad; + } + continue; + } + if (!b) + continue; + s = ident(lx); + lx->b = lx->buf; + if (!s) + continue; + + if (l == 0 && (strcmp(s, "elif") == 0)) { + f = 0; + goto Eval; + } + + if (strcmp(s, "endif") == 0) { + if (l) { + l--; + continue; + } + ppend(lx); + return 0; + } + if (strcmp(s, "if") == 0 || + strcmp(s, "ifdef") == 0 || + strcmp(s, "ifndef") == 0) { + l++; + continue; + } + + if (l == 0 && f != 1 && strcmp(s, "else") == 0) { + return 0; + } + } + +Bad: + lx->b = lx->buf; + errorat(lx->pos, "bad syntax in preprocessor conditional directive"); + ppend(lx); + return 1; +} + +/* #if */ +static +error +ppif0(Lexer *lx) +{ + return ppif(lx, 0); +} + +/* #else */ +static +error +ppif1(Lexer *lx) +{ + return ppif(lx, 1); +} + +/* #ifdef */ +static +error +ppif2(Lexer *lx) +{ + return ppif(lx, 2); +} + +/* #ifndef */ +static +error +ppif3(Lexer *lx) +{ + return ppif(lx, 3); +} + +// ----------------------------------------------------------------------- +// dispatch function + +#define DIRECTIVE(a, b, c) c, +error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES }; +#undef DIRECTIVE + +/* reads an identifier into the lexer's buffer */ +/* caller must intern */ + +error +domacro(Lexer *lx) +{ + int n; + error err; + string s; + + s = ident(lx); + intern(&s); + lx->b = lx->buf; + for (n = 0; n < NUM_DIRECTIVES; n++) { + if ((uintptr)s == (uintptr)directives[n]) { + goto Do; + } + } + errorat(lx->pos, "unrecognized directive name '%s'", s); + return 1; +Do: + err = macros[n](lx); + return err; +} + +error +dodefine(Lexer *lx, string s) +{ + int n; + byte *c, *def; + Sym *sym; + + strcpy(lx->buf, s); + c = strchr(lx->buf, '='); + if (c) { + *c++ = '\0'; + sym = lookup(&lx->sym, lx->buf); + if (sym) { + errorf("redefinition of symbol '%s'", sym->name); + return 1; + } + sym = define(&lx->sym, lx->buf, Smacro); + n = strlen(c) + 2; + sym->macro = str·makelen("", n); + str·appendbyte(&sym->macro, '\0'); + str·append(&sym->macro, c); + } else { + sym = lookup(&lx->sym, lx->buf); + if (sym) { + errorf("redefinition of symbol '%s'", sym->name); + return 1; + } + sym = define(&lx->sym, s, Smacro); + sym->macro = "\00\02"; + } + + return 0; +} diff --git a/src/cmd/cc/rules.mk b/src/cmd/cc/rules.mk new file mode 100644 index 0000000..b7b4688 --- /dev/null +++ b/src/cmd/cc/rules.mk @@ -0,0 +1,21 @@ +include share/push.mk + +# local sources +SRCS_$(d):=\ + $(d)/pp.c\ + $(d)/lex.c\ + $(d)/ast.c\ + $(d)/bits.c\ + $(d)/cc.c +TEST_$(d) := + +# outputs +BINS_$(d) := $(d)/cc + +include share/paths.mk + +# Local rules +$(BINS_$(d)): $(OBJS_$(d)) $(OBJ_DIR)/libn/libn.a + $(COMPLINK) + +include share/pop.mk diff --git a/src/cmd/cc/scratch.c b/src/cmd/cc/scratch.c new file mode 100644 index 0000000..b37d9a5 --- /dev/null +++ b/src/cmd/cc/scratch.c @@ -0,0 +1,7 @@ +#define XXX(a, b, c) a ## b ## c + +int +main() +{ + XXX(d, e, f); +} diff --git a/src/cmd/cc/util.c b/src/cmd/cc/util.c new file mode 100644 index 0000000..cca16f2 --- /dev/null +++ b/src/cmd/cc/util.c @@ -0,0 +1,21 @@ +#include "cc.h" + +void +·free(void* _, void* ptr) { + return free(ptr); +} + +void * +·alloc(void* _, uint n, ulong size) { + return malloc(n*size); +} + +void * +·calloc(void* _, uint n, ulong size) { + return calloc(n, size); +} + +void * +·realloc(void* _, void *ptr, uint n, ulong size) { + return realloc(ptr, n*size); +} -- cgit v1.2.1