From 50d3df1fafd26305742373a71022fdc4dd0d0ed4 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 22 May 2020 16:28:55 -0700 Subject: checkin: going to rework declaration structure to be more transparent --- sys/cmd/cc/ast.c | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++---- sys/cmd/cc/cc.h | 143 ++++++++++++++++-------- 2 files changed, 400 insertions(+), 65 deletions(-) (limited to 'sys/cmd/cc') diff --git a/sys/cmd/cc/ast.c b/sys/cmd/cc/ast.c index 2d741ea..4553b96 100644 --- a/sys/cmd/cc/ast.c +++ b/sys/cmd/cc/ast.c @@ -150,8 +150,10 @@ nomatch(Token t, vlong kind) } // ----------------------------------------------------------------------- -// forward declarations +// needed forward declarations +static error spec(Parser *, Lexer *, uint64 *); +static error dtor(Parser *p, Lexer *lx, Dtor *d, int ab); static Decl *decl(Parser *, Lexer *); static Expr *expr(Parser *, Lexer *); static error blkstmt(Parser *, Lexer *, Stmt **); @@ -164,7 +166,253 @@ static Expr* primary(Parser *p, Lexer *lx) { + int k; + Expr *x; + Token t; + Pos b; + + t = peek(p, 0); + b = t.pos; + switch (k = (t.kind & Vmask)) { + case Aident: + MAKEX(x, ident); + x->pos.beg = b; + x->pos.end = lx->pos; + return x; + + case Alit: + MAKEX(x, lit); + x->pos.beg = b; + x->pos.end = lx->pos; + x->val.kind = t.kind & ~Vmask; + return x; + + case Alparen: + advance(p, lx); + x = expr(p, lx); + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "unterminated paren expression"); + goto Bad; + } + advance(p, lx); + return x; + + default: + ; + } + panicf("unreachable"); +Bad: + errorat(lx->pos, "unable to parse operand expression"); + return nil; +} + +static +Expr* +postfix(Parser *p, Lexer *lx) +{ + Pos b; + Token t; + int c, n; + Expr *x, *y, **a; + struct Key *k; + + x = primary(p, lx); + for (;;) { + b = x->pos.beg; + switch (t.kind) { + case Ainc: + MAKEX(y, postinc); + goto Postfix; + case Adec: + MAKEX(y, postdec); + Postfix: + y->pos.beg = b; + y->pos.end = lx->pos; + y->unary.post = x; + x = y, y = nil; + break; + + case Adot: + MAKEX(y, self); + goto Select; + case Aarrow: + MAKEX(y, selp); + Select: + t = advance(p, lx); + if (t.kind != Aident) { + errorat(t.pos, "invalid operand of selector expression"); + goto Bad; + } + y->pos.beg = b; + y->pos.end = lx->pos; + + y->idx.f = t.val.s; + y->idx.x = x; + x = y, y = nil; + break; + + case Albrakt: + t = advance(p, lx); + if (t.kind == Arbrakt) { + errorat(t.pos, "empty index expression"); + goto Bad; + } + MAKEX(y, index); + y->idx.x = x; + y->idx.i = expr(p, lx); + + t = peek(p, 0); + if (t.kind != Albrakt) { + errorat(t.pos, "malformed index expression"); + goto Bad; + } + + x = y, y = nil; + break; + + case Alparen: + t = advance(p, lx); + MAKEX(y, call); + y->call.fn = x; + y->pos.beg = b; + y->call.n = 0; + if (t.kind == Arparen) { + y->call.arg = nil; + goto Endfunc; + } + c = 0; + a = nil; + Arg: + if (y->call.n >= c) { + c += 20; + a = realloc(a, c * sizeof(*a)); + } + a[y->call.n++] = expr(p, lx); + t = peek(p, 0); + if (t.kind == Acomma) { + advance(p, lx); + goto Arg; + } + if (t.kind != Arparen) { + errorat(t.pos, "invalid token '%s' found in call argument"); + goto Bad; + } + movearray(y->call.arg, a, y->call.n); + Endfunc: + advance(p, lx); + y->pos.end = lx->pos; + x = y, y = nil; + break; + + case Albrace: + t = advance(p, lx); + MAKEX(y, cmpdlit); + y->pos.beg = b; + y->cmpd.n = 0; + if (t.kind == Arbrace) { + x->cmpd.key = nil; + x->cmpd.val = nil; + goto EndCmpd; + } + c = 0, n = 0; + a = nil, k = nil; + Key0: + if (n >= c) { + c += 20; + k = realloc(k, c * sizeof(*k)); + a = realloc(a, c * sizeof(*a)); + } + Key1: + switch (t.kind) { + case Adot: + t = advance(p, lx); + if (t.kind != Aident) { + errorat(t.pos, "dot designator must be followed by identifier"); + goto Bad; + } + k[n++] = (struct Key) { + .kind = (uint32)x->cmpd.n, + .s = t.val.s, + }; + t = advance(p, lx); + goto Key0; + + case Albrakt: + t = advance(p, lx); + k[n++] = (struct Key) { + .kind = (uint32)x->cmpd.n | (1ULL << 32), + .x = expr(p, lx), + }; + t = peek(p, 0); + goto Key0; + + case Aeq: + t = advance(p, lx); + if (t.kind == Albrakt) + a[x->cmpd.n++] = postfix(p, lx); + else + a[x->cmpd.n++] = expr(p, lx); + + t = peek(p, 0); + switch (t.kind) { + case Arbrakt: + break; + case Acomma: + advance(p, lx); + default: + goto Key0; + } + break; + + default: + errorat(t.pos, "unrecognized token '%s' in compound literal", tokens[t.kind]); + goto Bad; + } + movearray(x->cmpd.key, k, n); + movearray(x->cmpd.val, a, x->cmpd.n); + EndCmpd: + break; + + default: + ; + } + t = advance(p, lx); + } + + return x; +Bad: + errorat(lx->pos, "failed to parse primary expression"); + return nil; +} + +static +Type* +type(Parser *p, Lexer *lx) +{ + Dtor dt; + uint64 sp; + + if (spec(p, lx, &sp)) { + errorat(lx->pos, "invalid type specification"); + goto Bad; + } + if (sp & MaskMem) { + errorat(lx->pos, "invalid type specifier"); + goto Bad; + } + if (dtor(p, lx, &dt, 1)) { + errorat(lx->pos, "invalid declarator"); + goto Bad; + } + if (nameof(&dt.name) != nil) { + errorat(lx->pos, "abstract declarator can not have an identifier"); + goto Bad; + } +Bad: + errorat(lx->pos, "failed to parse type expression"); + return nil; } static @@ -191,19 +439,48 @@ unary(Parser *p, Lexer *lx) return x; case Alparen: - panicf("cast not implemented"); + advance(p, lx); + MAKEX(x, cast); + x->pos.beg = t.pos; + x->cast.to = type(p, lx); + if (!x->cast.to) { + errorat(lx->pos, "invalid type operand of cast"); + goto Bad; + } + + t = peek(p, 0); + if (nomatch(t, Arparen)) { + errorat(lx->pos, "missing closing paren after cast expression"); + goto Bad; + } + advance(p, lx); + + x->cast.x = unary(p, lx); + x->pos.beg = lx->pos; + return x; + case Akeywd: - panicf("sizeof/align of not implemented"); + switch (t.val.i) { + case Ksizeof: MAKEX(x, sizeof); goto Key; + case Kalignof: MAKEX(x, alignof); goto Key; + Key: + advance(p, lx); + x->unary.post = unary(p, lx); + default: + ; + } default: - return primary(p, lx); + return postfix(p, lx); } +Bad: + return nil; } /* static data for binary operators */ -#define OPERATORS \ - OPERATOR(Astar, 10, Xmul) \ - OPERATOR(Adiv, 10, Xdiv) \ - OPERATOR(Amod, 10, Xmod) \ +#define OPERATORS \ + OPERATOR(Astar, 10, Xmul) \ + OPERATOR(Adiv, 10, Xdiv) \ + OPERATOR(Amod, 10, Xmod) \ OPERATOR(Aadd, 9, Xadd) \ OPERATOR(Asub, 9, Xsub) \ OPERATOR(Alsft, 8, Xlsft) \ @@ -217,17 +494,17 @@ unary(Parser *p, Lexer *lx) OPERATOR(Aand, 5, Xand) \ OPERATOR(Axor, 4, Xxor) \ OPERATOR(Aor, 3, Xor) \ - OPERATOR(Aandand, 2, Xandand) \ + OPERATOR(Aandand, 2, Xandand) \ OPERATOR(Aoror, 1, Xoror) -static int prectab[] = +static int prectab[NUM_TOKENS] = { #define OPERATOR(a, b, c) [a] = b, OPERATORS #undef OPERATOR }; -static int optab[] = +static int optab[NUM_TOKENS] = { #define OPERATOR(a, b, c) [a] = c, OPERATORS @@ -800,12 +1077,10 @@ Bad: return 1; } -/* predeclaration */ -static error dtor(Parser *p, Lexer *lx, Dtor *d); - +/* name declaration */ static error -name(Parser *p, Lexer *lx, Name *nm) +name(Parser *p, Lexer *lx, Name *nm, int ab) { int n, k; Token t; @@ -820,7 +1095,7 @@ name(Parser *p, Lexer *lx, Name *nm) case Alparen: nm->kind = Nparen; alloc(nm->paren); - if (dtor(p, lx, nm->paren)) { + if (dtor(p, lx, nm->paren, ab)) { /* we are using an arena allocator so can't clean up */ errorat(lx->pos, "invalid declarator in parenthesis"); nm->paren = nil; @@ -833,6 +1108,11 @@ name(Parser *p, Lexer *lx, Name *nm) } break; + case Albrakt: + if (ab) + break; + + errorat(lx->pos, "missing identifier in non-abstract declarator"); default: errorat(lx->pos, "invalid token '%s' in name declaration", tokens[k]); goto Bad; @@ -862,7 +1142,7 @@ Bad: /* pointer kind is partitioned into 8x6 regions */ static error -dtor(Parser *p, Lexer *lx, Dtor *d) +dtor(Parser *p, Lexer *lx, Dtor *d, int ab) { int n, k; error err; @@ -917,7 +1197,7 @@ Key: goto Bad; } Name: - return name(p, lx, &d->name); + return name(p, lx, &d->name, ab); Bad: return err; } @@ -942,14 +1222,14 @@ decl(Parser *p, Lexer *lx) dtp = &dt; curr = &d->var.link; Dtor: - if (dtor(p, lx, dtp)) { + if (dtor(p, lx, dtp, 0)) { errorat(lx->pos, "invalid declarator"); goto Bad; } t = peek(p, 0); if (t.kind == Aeq) { - t = advance(p, lx); + t = advance(p, lx); d->kind = (d->kind != Dvars) ? Dvar : Dvars; d->var.init = expr(p, lx); } @@ -972,8 +1252,10 @@ Dtor: errorat(lx->pos, "attempting to define a function for a variable declaration"); goto Bad; } + d->kind = Dfunc; alloc(d->func.body); + openscope(p); if (blkstmt(p, lx, &d->func.body)) { errorat(lx->pos, "failed to parse function body"); diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 1412302..0d4c876 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -207,19 +207,22 @@ enum extern byte *tokens[NUM_TOKENS]; /* TODO: store literals in a big val */ +union Val +{ + byte *s; + double f; + vlong i; + uvlong ui; + int32 c; + uint32 uc; + rune r; +}; + struct Token { - uint32 kind; - Pos pos; - union { - byte *s; - double f; - vlong i; - uvlong ui; - int32 c; - uint32 uc; - rune r; - } val; + uint32 kind; + Pos pos; + union Val val; }; enum @@ -321,10 +324,10 @@ enum /* conditional */ Xternary, /* unary prefix ops */ - Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalnof, Xpreinc, Xpredec, + Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec, Xcast, /* unary postfix ops */ - Xpostinc, Xpostdec, Xindex, Xcall, Xsel, Xcmpndlit, + Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xcmpdlit, /* binary ops */ Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft, Xadd, Xsub, Xmul, Xdiv, Xmod, @@ -347,11 +350,35 @@ enum }; /* expressions */ +struct Key +{ + /* + * NOTE: bitpacked + * lower 32 -> index into designator array + * upper 32 -> type + */ + uint64 kind; + union { + Expr *x; + string s; + }; +}; + struct Expr { - struct Node; - uint64 type; + struct Node; + Type *type; union { + struct { + uint64 kind; + union Val; + } val; + struct { + int n; + struct Key *key; + Expr *val; + } cmpd; + Expr *x; struct { Expr *l; Expr *r; @@ -361,13 +388,25 @@ struct Expr Expr *t; Expr *e; } cond; + struct { + Expr *x; + union { + Expr *i; + string f; + }; + } idx; + struct { + Expr *fn; + int n; + Expr **arg; + } call; union { Expr *pre; Expr *post; } unary; struct { - uint64 to; - Expr *x; + Type *to; + Expr *x; } cast; struct { Expr *l; @@ -417,33 +456,8 @@ struct Stmt /* declarations */ -struct Ptr -{ - uint64 kind; - Ptr *link; -}; - -struct Name -{ - struct Node; - union { - string ident; - struct Dtor *paren; - }; - union { - Expr *i; - Expr *p; - }; -}; - -struct Dtor -{ - Ptr ptr; - Name name; -}; - -// specifiers /* + * specifiers * the design is the following: * type info is held w/in a 64 bit integer. * the bottom 32 bits are associated to specializations @@ -510,6 +524,31 @@ enum Sbad = -1, }; +struct Ptr +{ + uint64 kind; + Ptr *link; +}; + +struct Name +{ + struct Node; + union { + string ident; + struct Dtor *paren; + }; + union { + Expr *i; + Expr *p; + }; +}; + +struct Dtor +{ + Ptr ptr; + Name name; +}; + struct Decls { union { @@ -538,24 +577,38 @@ struct Decl }; }; +enum +{ + Tbad, + Tbase, + Tarray, + Tptr, +}; + /* types */ struct Type { Sym *sym; string ident; + uint64 kind; uintptr size; uint16 align; union { + Type *ptr; + struct { + int len; + Type *elt; + } arr; struct { int len; Dtor *d; Expr *x; - } su; + } agr; struct { int len; string *s; Expr *x; - } en; + } enm; }; }; -- cgit v1.2.1