From 038ea2d0a34fb362f577e6c9884c710ebaf4042e Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 29 May 2020 17:18:16 -0700 Subject: feat: cc parser prototype finished --- sys/cmd/cc/ast.c | 375 +++++++++++++++++++++++++++++++++++------------------- sys/cmd/cc/bits.c | 45 +++---- sys/cmd/cc/cc.c | 2 - sys/cmd/cc/cc.h | 48 ++++--- sys/cmd/cc/lex.c | 2 +- sys/cmd/rules.mk | 8 +- 6 files changed, 303 insertions(+), 177 deletions(-) (limited to 'sys') diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c index f8ab328..df390ba 100644 --- a/sys/cmd/cc/ast.c +++ b/sys/cmd/cc/ast.c @@ -220,10 +220,17 @@ nomatch(Token t, vlong kind) // needed forward declarations static error spec(Parser *, Lexer *, uint64 *); +static uint32 basetype(Parser *, Lexer *, uint64 *s); +static string namedecl(Parser *, Lexer *, uint32 *, int); +static uint32 typename(Parser *, Lexer *, uint32 *); + static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab); static uint32 typeofdtor(Dtor *, uint32); + static Decl *decl(Parser *, Lexer *); + +static Expr *ternary(Parser *, Lexer *); static Expr *expr(Parser *, Lexer *); static error blkstmt(Parser *, Lexer *, Stmt **); @@ -277,6 +284,127 @@ Bad: return nil; } +static +int +istypename(Parser *p, Token t) +{ + Sym *sym; + + if (t.kind == Akeywd && (Kconst <= t.val.i && t.val.i <= Kenum)) + return 1; + if (t.kind == Aident) { + sym = lookupobj(p, t.val.s); + return (sym != nil) && sym->kind == Stype; + } + + return 0; +} + +static Expr* initx(Parser *p, Lexer *lx); + +static +Expr* +initlist(Parser *p, Lexer *lx) +{ + Token t; + int c, n; + Expr *x, **a; + struct Key *k; + + MAKEX(x, initlist); + x->pos.beg = lx->pos; + x->init.n = 0; + if (t.kind == Arbrace) { + x->init.k = nil; + x->init.v = nil; + return x; + } + + c = 0; + n = 0; + a = nil; + k = nil; +Key0: + if (n >= c) { + c += 20; + k = realloc(k, c * sizeof(*k)); + a = realloc(a, c * sizeof(*a)); + } +Key1: + switch (t.kind) { + case Adot: + t = advance(p, lx); + if (t.kind != Aident) { + errorat(t.pos, "dot designator must be followed by identifier"); + goto Bad; + } + k[n++] = (struct Key) { + .kind = (uint32)x->init.n, + .s = t.val.s, + }; + t = advance(p, lx); + goto Key0; + + case Albrakt: + t = advance(p, lx); + k[n++] = (struct Key) { + .kind = (uint32)x->init.n | (1ULL << 32), + .x = expr(p, lx), + }; + t = peek(p, 0); + goto Key0; + + case Aeq: + t = advance(p, lx); + /* fallthrough */ + default: + a[x->init.n++] = initx(p, lx); + + t = peek(p, 0); + switch (t.kind) { + case Arbrace: + break; + case Acomma: + advance(p, lx); + /* fallthrough */ + default: + goto Key0; + } + break; + + case Acomma: + t = advance(p, lx); + break; + } + movearray(x->init.k, k, n); + movearray(x->init.v, a, x->init.n); +Bad: + errorat(t.pos, "could not parse initilaizer list"); + return nil; +} + +static +Expr* +initx(Parser *p, Lexer *lx) +{ + Expr *x; + Token t; + + t = peek(p, 0); + if (t.kind != Albrace) + return ternary(p, lx); + + advance(p, lx); + x = initlist(p, lx); + t = peek(p, 0); + if (nomatch(t, Arbrace)) { + errorat(t.pos, "unmatched brace in initializer list, found %s instead", tokens[t.kind]); + advance(p, lx); + } + + return x; +} + static Expr* postfix(Parser *p, Lexer *lx) @@ -284,9 +412,38 @@ postfix(Parser *p, Lexer *lx) Pos b; Token t; int c, n; + uint32 type, qual; Expr *x, *y, **a; - struct Key *k; + t = peek(p, 0); + if (t.kind == Alparen) + if (istypename(p, peek(p, 1))) { + t = advance(p, lx); + type = typename(p, lx, &qual); + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "unmatched paren: found %s instead", tokens[t.kind]); + goto Bad; + } + t = advance(p, lx); + if (nomatch(t, Albrace)) { + errorat(lx->pos, "bad initializer list: found %s", tokens[t.kind]); + goto Bad; + } + + x = initlist(p, lx); + + t = peek(p, 0); + if (nomatch(t, Arbrace)) { + errorat(lx->pos, "unmatched brace: found %s instead", tokens[t.kind]); + goto Bad; + } + + x->type = type; + x->qual = qual; + return x; + } + x = primary(p, lx); for (;;) { b = x->pos.beg; @@ -375,118 +532,37 @@ postfix(Parser *p, Lexer *lx) x = y, y = nil; break; - case Albrace: - t = advance(p, lx); - MAKEX(y, cmpdlit); - y->pos.beg = b; - y->cmpd.n = 0; - if (t.kind == Arbrace) { - x->cmpd.key = nil; - x->cmpd.val = nil; - goto EndCmpd; - } - c = 0, n = 0; - a = nil, k = nil; - Key0: - if (n >= c) { - c += 20; - k = realloc(k, c * sizeof(*k)); - a = realloc(a, c * sizeof(*a)); - } - Key1: - switch (t.kind) { - case Adot: - t = advance(p, lx); - if (t.kind != Aident) { - errorat(t.pos, "dot designator must be followed by identifier"); - goto Bad; - } - k[n++] = (struct Key) { - .kind = (uint32)x->cmpd.n, - .s = t.val.s, - }; - t = advance(p, lx); - goto Key0; - - case Albrakt: - t = advance(p, lx); - k[n++] = (struct Key) { - .kind = (uint32)x->cmpd.n | (1ULL << 32), - .x = expr(p, lx), - }; - t = peek(p, 0); - goto Key0; - - case Aeq: - t = advance(p, lx); - if (t.kind == Albrakt) - a[x->cmpd.n++] = postfix(p, lx); - else - a[x->cmpd.n++] = expr(p, lx); - - t = peek(p, 0); - switch (t.kind) { - case Arbrakt: - break; - case Acomma: - advance(p, lx); - default: - goto Key0; - } - break; - - default: - errorat(t.pos, "unrecognized token '%s' in compound literal", tokens[t.kind]); - goto Bad; - } - movearray(x->cmpd.key, k, n); - movearray(x->cmpd.val, a, x->cmpd.n); - EndCmpd: - break; - default: ; } t = advance(p, lx); } - return x; Bad: errorat(lx->pos, "failed to parse primary expression"); return nil; } -#if 0 static -Type* -type(Parser *p, Lexer *lx) +uint32 +typename(Parser *p, Lexer *lx, uint32 *spec) { - uint64 sp; - Dtor *dt; + uint32 base; + uint64 s; - dt = getdtor(p); - if (spec(p, lx, &sp)) { - errorat(lx->pos, "invalid type specification"); - goto Bad; - } - if (sp & MaskMem) { - errorat(lx->pos, "invalid type specifier"); - goto Bad; - } - if (dtor(p, lx, dt, 1)) { - errorat(lx->pos, "invalid declarator"); - goto Bad; - } - if (nameof(dt->name) != nil) { - errorat(lx->pos, "abstract declarator can not have an identifier"); - goto Bad; + base = basetype(p, lx, &s); + if (!base) { + errorat(lx->pos, "failed to parse type name specifiers"); + return 0; } -Bad: - errorat(lx->pos, "failed to parse type expression"); - return nil; + *spec = (uint32)s; + namedecl(p, lx, &base, 1); + + return base; } -#endif + +static Expr* cast(Parser *p, Lexer *lx); static Expr* @@ -497,26 +573,71 @@ unary(Parser *p, Lexer *lx) t = peek(p, 0); switch (t.kind) { - case Aneg: MAKEX(x, neg); goto Prefix; - case Aand: MAKEX(x, ref); goto Prefix; - case Anot: MAKEX(x, not); goto Prefix; - case Astar: MAKEX(x, star); goto Prefix; - case Aadd: MAKEX(x, plus); goto Prefix; - case Asub: MAKEX(x, minus); goto Prefix; case Ainc: MAKEX(x, preinc); goto Prefix; - case Adec: MAKEX(x, predec); goto Prefix; + case Adec: MAKEX(x, predec); /* fallthrough */ Prefix: x->pos.beg = t.pos; x->unary.pre = unary(p, lx); x->pos.end = x->unary.pre->pos.end; return x; - case Alparen: - advance(p, lx); + case Aneg: MAKEX(x, neg); goto Unary; + case Aand: MAKEX(x, ref); goto Unary; + case Anot: MAKEX(x, not); goto Unary; + case Astar: MAKEX(x, star); goto Unary; + case Aadd: MAKEX(x, plus); goto Unary; + case Asub: MAKEX(x, minus); /* fallthrough */ + Unary: + x->pos.beg = t.pos; + x->unary.pre = cast(p, lx); + x->pos.end = x->unary.pre->pos.end; + return x; + + case Akeywd: + switch (t.val.i) { + case Ksizeof: + MAKEX(x, sizeof); + goto Key; + case Kalignof: + MAKEX(x, alignof); + /* fallthrough */ + Key: + t = advance(p, lx); + if (t.kind == Alparen) + if (istypename(p, peek(p, 1))) { + t = advance(p, lx); + x->info.type = 0; + x->info.of.type = typename(p, lx, &x->info.of.qual); + break; + } + + x->info.type = 0; + x->info.x = unary(p, lx); + default: + ; + } + default: + return postfix(p, lx); + } +Bad: + return nil; +} + +static +Expr* +cast(Parser *p, Lexer *lx) +{ + Expr *x; + Token t; + + + t = peek(p, 0); + if (t.kind == Alparen) { + t = advance(p, lx); MAKEX(x, cast); - x->pos.beg = t.pos; - x->cast.to = type(p, lx); - if (!x->cast.to) { + x->pos.beg = t.pos; + x->cast.to.type = typename(p, lx, &x->cast.to.qual); + if (!x->cast.to.type) { errorat(lx->pos, "invalid type operand of cast"); goto Bad; } @@ -531,21 +652,11 @@ unary(Parser *p, Lexer *lx) x->cast.x = unary(p, lx); x->pos.beg = lx->pos; return x; - - case Akeywd: - switch (t.val.i) { - case Ksizeof: MAKEX(x, sizeof); goto Key; - case Kalignof: MAKEX(x, alignof); goto Key; - Key: - advance(p, lx); - x->unary.post = unary(p, lx); - default: - ; - } - default: - return postfix(p, lx); } + return cast(p, lx); + Bad: + errorat(lx->pos, "failed to parse cast expression"); return nil; } @@ -593,7 +704,7 @@ binary(Parser *p, Lexer *lx, int prec) int k, np; Expr *l, *r, *x; - l = unary(p, lx); + l = cast(p, lx); for (;;) { t = peek(p, 0); k = t.kind; @@ -642,7 +753,7 @@ ternary(Parser *p, Lexer *lx) goto Bad; } t = advance(p, lx); - x->cond.e = expr(p, lx); + x->cond.e = expr(p, lx); x->pos.end = lx->pos; break; @@ -1234,7 +1345,7 @@ basetype(Parser *p, Lexer *lx, uint64 *s) static string -namedecl(Parser *p, Lexer *lx, uint32 *base) +namedecl(Parser *p, Lexer *lx, uint32 *base, int noname) { Dtor *dt; string name; @@ -1242,7 +1353,7 @@ namedecl(Parser *p, Lexer *lx, uint32 *base) dt = getdtor(p); name = nil; - if (dtor(p, lx, dt, 0)) { + if (dtor(p, lx, dt, noname)) { errorat(lx->pos, "invalid declarator"); goto End; } @@ -1315,7 +1426,7 @@ aggregate(Parser *p, Lexer *lx, string name, int kind) fs[n].type = basetype(p, lx, &s); fs[n].qual = (uint32)s; Field: - fs[n].name = namedecl(p, lx, &fs[n].type); + fs[n].name = namedecl(p, lx, &fs[n].type, 0); tk = peek(p, 0); switch (tk.kind) { case Acolon: @@ -1674,7 +1785,7 @@ name(Parser *p, Lexer *lx, Name **nmp, int ab) goto Bad; } args[nm->sfx.call.n].qual = (uint32)s; - args[nm->sfx.call.n].name = namedecl(p, lx, &args[nm->sfx.call.n].type); + args[nm->sfx.call.n].name = namedecl(p, lx, &args[nm->sfx.call.n].type, 0); nm->sfx.call.n++; } while (t = peek(p, 0), t.kind == Acomma); @@ -1782,9 +1893,9 @@ decl(Parser *p, Lexer *lx) Token t; Decl *d; Expr *x; - uint32 base, type; string name; struct Decls *ds; + uint32 base, type; alloc(d); @@ -1800,12 +1911,12 @@ decl(Parser *p, Lexer *lx) x = nil; d->spec = (uint32)s; d->type = base; - d->name = namedecl(p, lx, &d->type); + d->name = namedecl(p, lx, &d->type, 0); switch (t = peek(p, 0), t.kind) { case Aeq: t = advance(p, lx); - x = expr(p, lx); + x = initx(p, lx); if (t.kind != Acomma) { d->kind = Dobj; d->init = x; @@ -1828,12 +1939,12 @@ decl(Parser *p, Lexer *lx) alloc(ds->link); ds = ds->link; ds->type = base; - ds->name = namedecl(p, lx, &ds->type); + ds->name = namedecl(p, lx, &ds->type, 0); t = peek(p, 0); if (t.kind == Aeq) { t = advance(p, lx); - ds->init = expr(p, lx); + ds->init = initx(p, lx); } else ds->init = nil; } diff --git a/sys/cmd/cc/bits.c b/sys/cmd/cc/bits.c index a13938c..2fe7295 100644 --- a/sys/cmd/cc/bits.c +++ b/sys/cmd/cc/bits.c @@ -16,34 +16,34 @@ enum * enumerated type specifers * see https://en.wikipedia.org/wiki/C_data_types */ -#define VOID X(Tvoid, 0) +#define VOID X(Tvoid, 1) -#define BOOL X(Tbool, 1) -#define CHAR X(Tchar, 2) -#define SCHAR X(Tsign|Tchar, 3) -#define UCHAR X(Tunsign|Tchar, 4) +#define BOOL X(Tbool, 2) +#define CHAR X(Tchar, 3) +#define SCHAR X(Tsign|Tchar, 4) +#define UCHAR X(Tunsign|Tchar, 5) -#define SHORT X(Tshort, 5), X(Tshort|Tint, 5) -#define SSHORT X(Tsign|Tshort, 6), X(Tsign|Tshort|Tint, 6) -#define USHORT X(Tunsign|Tshort, 7), X(Tunsign|Tshort|Tint, 7) +#define SHORT X(Tshort, 6), X(Tshort|Tint, 6) +#define SSHORT X(Tsign|Tshort, 7), X(Tsign|Tshort|Tint, 7) +#define USHORT X(Tunsign|Tshort, 8), X(Tunsign|Tshort|Tint, 8) -#define INT X(0, 8), X(Tint, 8) -#define SINT X(Tsign, 9), X(Tsign|Tint, 9) -#define UINT X(Tunsign, 10), X(Tunsign|Tint, 10) +#define INT X(0, 9), X(Tint, 9) +#define SINT X(Tsign, 10), X(Tsign|Tint, 10) +#define UINT X(Tunsign, 11), X(Tunsign|Tint, 11) -#define LONG X(Tlong, 11), X(Tlong|Tint, 11) -#define SLONG X(Tsign|Tlong, 12), X(Tsign|Tlong|Tint, 12) -#define ULONG X(Tunsign|Tlong, 13), X(Tunsign|Tlong|Tint, 13) +#define LONG X(Tlong, 12), X(Tlong|Tint, 12) +#define SLONG X(Tsign|Tlong, 13), X(Tsign|Tlong|Tint, 13) +#define ULONG X(Tunsign|Tlong, 14), X(Tunsign|Tlong|Tint, 14) -#define VLONG X(Tvlong, 14), X(Tvlong|Tint, 14) -#define SVLONG X(Tsign|Tvlong, 15), X(Tsign|Tvlong|Tint, 15) -#define UVLONG X(Tunsign|Tvlong, 16), X(Tunsign|Tvlong|Tint, 16) +#define VLONG X(Tvlong, 15), X(Tvlong|Tint, 15) +#define SVLONG X(Tsign|Tvlong, 16), X(Tsign|Tvlong|Tint, 16) +#define UVLONG X(Tunsign|Tvlong, 16), X(Tunsign|Tvlong|Tint, 17) -#define FLOAT X(Tfloat, 17) -#define DOUBLE X(Tdouble, 18) -#define LONGDB X(Tlong|Tdouble, 19) -#define COMPLEX X(Tcmplx, 20) -#define IMAGINARY X(Timag, 21) +#define FLOAT X(Tfloat, 18) +#define DOUBLE X(Tdouble, 19) +#define LONGDB X(Tlong|Tdouble, 20) +#define COMPLEX X(Tcmplx, 21) +#define IMAGINARY X(Timag, 22) /* fixed width definitions */ #define DEF(sz, aln, mx, sgn) {.size=sz, .align=aln, .max=mx, .sign=sgn } @@ -91,6 +91,7 @@ Type pointer = {.size=8, .align=8, .max=0xffffffffffffffff, .sign=0}; /* pack architecture specific definitions into exported arrays */ #define TYPE(a, ...) a, Type basetypes[] = { + { 0 }, /* sentinel value for bad types */ TYPES }; #undef TYPE diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c index 2dcf897..796d3a8 100644 --- a/sys/cmd/cc/cc.c +++ b/sys/cmd/cc/cc.c @@ -215,8 +215,6 @@ init(void) C.type.info = calloc(C.type.cap, sizeof(*C.type.info)); memcpy(C.type.info, basetypes, C.type.len * sizeof(*C.type.info)); - for (i = 0; i < arrlen(basetypes); i++) - intern(&C.type.info[i].ident); } void diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 44b0037..cf97233 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -361,7 +361,7 @@ enum Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec, Xcast, /* unary postfix ops */ - Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xcmpdlit, + Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xinitlist, /* binary ops */ Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft, Xadd, Xsub, Xmul, Xdiv, Xmod, @@ -383,14 +383,16 @@ enum }; /* expressions */ +enum +{ + Keynil, + Keyidx, + Keysel, +}; + struct Key { - /* - * NOTE: bitpacked - * lower 32 -> index into designator array - * upper 32 -> type - */ - uint64 kind; + uint kind : 2; union { Expr *x; string s; @@ -400,17 +402,18 @@ struct Key struct Expr { struct Node; - int32 type; + uint32 qual; + uint32 type; union { struct { uint64 kind; union Val; } val; struct { - int n; - struct Key *key; - Expr *val; - } cmpd; + int n; + struct Key *k; + Expr *v; + } init; Expr *x; struct { Expr *l; @@ -436,10 +439,23 @@ struct Expr union { Expr *pre; Expr *post; - } unary; + } unary; struct { - int32 to; - Expr *x; + int type : 1; + union { + struct { + uint32 qual; + uint32 type; + } of; + Expr *x; + }; + } info; + struct { + struct { + uint32 qual; + uint32 type; + } to; + Expr *x; } cast; struct { Expr *l; @@ -677,7 +693,7 @@ struct Type /* platform specific */ extern Type pointer; -extern Type basetypes[22]; +extern Type basetypes[23]; /* mandated by C standard */ extern uint64 validtypespec[40]; extern int indextypespec[40]; diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c index dd6b476..cb65aa0 100644 --- a/sys/cmd/cc/lex.c +++ b/sys/cmd/cc/lex.c @@ -805,7 +805,7 @@ putsym(SymTab *tab, Sym *sym, error *err) } Sym* -define(SymTab *tab, string name, int kind) +define(SymTab *tab, string name, uint32 kind) { int i; Sym *sym; diff --git a/sys/cmd/rules.mk b/sys/cmd/rules.mk index 6e4a3cb..c648907 100644 --- a/sys/cmd/rules.mk +++ b/sys/cmd/rules.mk @@ -5,12 +5,12 @@ include share/push.mk DIR := $(d)/cat include $(DIR)/rules.mk -# DIR := $(d)/cc -# include $(DIR)/rules.mk - -DIR := $(d)/edo +DIR := $(d)/cc include $(DIR)/rules.mk +# DIR := $(d)/edo +# include $(DIR)/rules.mk + DIR := $(d)/rc include $(DIR)/rules.mk -- cgit v1.2.1