From 73c04db73163d1d2719bb97a6b8c133065df75c3 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Mon, 18 May 2020 18:22:42 -0700 Subject: feat: macro expansion and constant evaluation prototype --- sys/cmd/cc/cc.c | 1 + sys/cmd/cc/cc.h | 16 +- sys/cmd/cc/lex.c | 42 ++++-- sys/cmd/cc/pp.c | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++---- sys/libn/bufio.c | 4 +- sys/libn/string.c | 8 +- 6 files changed, 460 insertions(+), 49 deletions(-) (limited to 'sys') diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c index 3dae0fd..6bc363c 100644 --- a/sys/cmd/cc/cc.c +++ b/sys/cmd/cc/cc.c @@ -114,6 +114,7 @@ openio(byte *path) return it; } } + printf("OPENING PATH %s\n", path); if ((C.io - C.iostk) >= arrlen(C.iostk)-1) panicf("out of I/O space!"); diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 9871e99..84f173f 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -100,6 +100,7 @@ void errorat(Pos x, byte *fmt, ...); DIRECTIVE(Ddefine,"define", ppdef) \ DIRECTIVE(Dundef,"undef", ppund) \ DIRECTIVE(Dif,"if", ppif0) \ + DIRECTIVE(Delif,"elif", ppif1) \ DIRECTIVE(Delse, "else", ppif1) \ DIRECTIVE(Difdef,"ifdef", ppif2) \ DIRECTIVE(Difndef,"ifndef", ppif3) \ @@ -184,6 +185,8 @@ enum Vusgn = iota(12), Vfloat = iota(13), Vstr = iota(14), + + Vmask = Vchar - 1, }; #undef TOK @@ -221,6 +224,17 @@ struct Sym }; }; +struct SymTab +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + string *keys; + Sym **vals; +}; + Sym *lookup(SymTab *tab, string ident); Sym *define(SymTab *tab, string ident, int kind); @@ -228,7 +242,7 @@ struct Lexer { Pos pos; Io *io; - SymTab *sym; + SymTab sym; byte *b; byte buf[2*1024]; }; diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c index 6d3da59..90f282a 100644 --- a/sys/cmd/cc/lex.c +++ b/sys/cmd/cc/lex.c @@ -26,23 +26,32 @@ puttok(Token tok) // simple wrappers byte -getbyte(Lexer *l) +getbyte(Lexer *lx) { - return bufio·getbyte(&l->io->buf); + return bufio·getbyte(&lx->io->buf); } byte -getnsbyte(Lexer *l) +getnsbyte(Lexer *lx) { - byte b; - while (b = bufio·getbyte(&l->io->buf), isspace(b)); + int b; + b = getbyte(lx); + for (;;) { + if (b >= RuneSelf || !isspace(b)) + return b; + if (b == '\n') { + lx->pos.line++; + return b; + } + b = getbyte(lx); + } return b; } rune -getrune(Lexer *l) +getrune(Lexer *lx) { - return bufio·getrune(&l->io->buf); + return bufio·getrune(&lx->io->buf); } byte @@ -242,7 +251,7 @@ GetByte: Dispatch: tok.pos.beg = lx->pos; - if (b >= RuneSelf || isalpha(b)) + if (b >= RuneSelf || isalpha(b) || b == '_') goto TAlpha; if (isdigit(b)) goto TNum; @@ -434,6 +443,7 @@ Dispatch: r = b; n = 10; s = lx->buf; + ungetbyte(lx); if (*s == '0') { b = *++s; switch (b) { @@ -510,6 +520,7 @@ Dispatch: } *lx->b = '\0'; d = strtod(lx->buf, nil); + ungetbyte(lx); tok.kind = Alit | Vfloat; tok.val.f = d; @@ -539,6 +550,8 @@ Dispatch: u = getbyte(lx); } *s = '\0'; + ungetbyte(lx); + tok.kind = Aident; tok.val.s = lx->buf; @@ -547,7 +560,7 @@ Dispatch: tok.kind = Akeywd; } - sym = lookup(lx->sym, tok.val.s); + sym = lookup(&lx->sym, tok.val.s); if (sym) { io = makeio(); io->buf.end += expandmacro(lx, sym, io->b); @@ -606,11 +619,6 @@ popio(Lexer *lx) #define PTR_HASH(p) (uintptr)(p) #define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2)) -struct SymTab -{ - MAP_STRUCT_BODY(string, Sym*); -}; - Sym* lookup(SymTab *tab, string ident) { @@ -640,14 +648,16 @@ putsym(SymTab *tab, Sym *sym, error *err) Sym* define(SymTab *tab, string name, int kind) { - Sym *sym; + int i; + Sym *sym; error err; sym = mem·arenaalloc(C.heap, 1, sizeof(*sym)); sym->name = name; sym->kind = kind; - putsym(tab, sym, &err); + i = putsym(tab, sym, &err); + tab->vals[i] = sym; return sym; } diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c index d1a5d83..ceb9d66 100644 --- a/sys/cmd/cc/pp.c +++ b/sys/cmd/cc/pp.c @@ -7,13 +7,24 @@ static string ident(Lexer *lx) { - byte b; + int b; + byte *s; - lx->b = lx->buf; - for (b = getnsbyte(lx); !isspace(b); b = getbyte(lx)) { - *lx->b++ = b; + b = getnsbyte(lx); + if (!isalpha(b) && b != '_' && b < RuneSelf) { + ungetbyte(lx); + return ""; } - *lx->b = '\0'; + + for (s = lx->buf;;) { + *s++ = b; + b = getbyte(lx); + if (isalnum(b) || b == '_' || b >= RuneSelf) + continue; + ungetbyte(lx); + break; + } + *s = '\0'; return lx->buf; } @@ -51,14 +62,320 @@ defmacro(Lexer *lx, string name, string macro) { Sym *mac; - mac = define(lx->sym, name, Smacro); + printf("DEFINING MACRO %s\n", name); + mac = define(&lx->sym, name, Smacro); mac->macro = macro; return mac; } +static vlong evalmacro(Lexer *lx, byte prec); + +static +vlong +opand(Lexer *lx) +{ + int b; + vlong v; + string s; + Token tok; + Sym *sym; + + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in macro expression"); + return 0; + } + ungetbyte(lx); + + tok = lex(lx); + + switch (tok.kind & Vmask) { + case Aneg: + return ~opand(lx); + + case Anot: + return !opand(lx); + + case Alparen: + v = evalmacro(lx, 1); + tok = lex(lx); + if (!(tok.kind & Arparen)) { + errorat(lx->pos, "unbalanced parenthesis in macro expression"); + return 0; + } + return v; + + case Alit: + switch (tok.kind & ~Vmask) { + case Vint: + return tok.val.i; + case Vchar: + return tok.val.c; + default: + errorat(lx->pos, "invalid literal of type '%d' in conditional macro", tok.kind & ~Vmask); + return 0; + } + + case Aident: + sym = lookup(&lx->sym, tok.val.s); + if (!sym) { + /* calling lex directly would expand the operand here + * manually lex the result + */ + if (strcmp(tok.val.s, "defined") == 0) { + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in defined operand"); + return 0; + } + s = lx->buf; + if (b == '(') { + b = getnsbyte(lx); + while (b != ')') { + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + if (b == '(') { + errorat(lx->pos, "nested parens not allowed inside defined operator"); + return 0; + } + if (!isspace(b)) + *s++ = b; + b = getbyte(lx); + } + } else { + while (!isspace(b)) { + *s++ = b; + b = getbyte(lx); + + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + } + } + *s = '\0'; + return lookup(&lx->sym, lx->buf) != nil; + } + return 0; + } + panicf("unreachable"); + return 1; + + default: + errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]); + return 0; + } +} + +// recursively evaluates a macro +// reduced set of operators allowed here +static +vlong +evalmacro(Lexer *lx, byte prec) +{ + int b; + vlong l, r; + Token tok; + + l = opand(lx); + for (;;) { + b = getnsbyte(lx); + if (b == '\n') { + ungetbyte(lx); + break; + } + ungetbyte(lx); + + tok = lex(lx); + // simplified jump table of precedence + // unpacked to evaluate inline + // NOTE: You might have to unget bytes to replace token on stack + switch (tok.kind & Vmask) { + case Astar: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l * r; + continue; + + case Adiv: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l / r; + continue; + + case Amod: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l % r; + continue; + + case Aadd: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l + r; + continue; + + case Asub: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l - r; + continue; + + case Alsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l << r; + continue; + + case Arsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l >> r; + continue; + + case Alt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l < r; + continue; + + case Agt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l > r; + continue; + + case Agteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Alteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Aeq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l == r; + continue; + + case Aneq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l != r; + continue; + + case Aand: + if (prec > 5) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 5 + 1); + l = l & r; + continue; + + case Axor: + if (prec > 4) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 4 + 1); + l = l ^ r; + continue; + + case Aor: + if (prec > 3) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 3 + 1); + l = l | r; + continue; + + case Aandand: + if (prec > 2) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 2 + 1); + l = l && r; + continue; + + case Aoror: + if (prec > 1) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 1 + 1); + l = l || r; + continue; + + default: + errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]); + abort(); + return 0; + } + } + + return l; +} + // ----------------------------------------------------------------------- -// preprocessor definitions +// preprocessor magic numbers enum { @@ -74,7 +391,21 @@ enum // ----------------------------------------------------------------------- // preprocessor functions +static +error +ppend(Lexer *lx) +{ + int b; + do { + b = getnsbyte(lx); + } while (b > 0 && b != '\n'); + + return 0; +} + + /* #undef */ +static error ppund(Lexer *lx) { @@ -84,14 +415,16 @@ ppund(Lexer *lx) s = ident(lx); intern(&s); - sym = lookup(lx->sym, s); + sym = lookup(&lx->sym, s); if (!sym) { errorat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); } + ppend(lx); return 0; } /* #define */ +static error ppdef(Lexer *lx) { @@ -107,7 +440,7 @@ ppdef(Lexer *lx) } intern(&s); - sym = lookup(lx->sym, s); + sym = lookup(&lx->sym, s); if (sym) { errorat(lx->pos, "macro redefined: '%s'", sym->name); goto Bad; @@ -240,6 +573,7 @@ ppdef(Lexer *lx) return 0; Bad: errorat(lx->pos, "failed parse of #define macro '%s'", s); + ppend(lx); return 1; } @@ -409,11 +743,13 @@ End: Nospace: errorf("out of memory during macro expansion %s", s->name); Bad: + ppend(lx); errorf("failed to expand macro %s", s->name); return -1; } /* #include */ +static error ppinc(Lexer *lx) { @@ -479,11 +815,13 @@ ppinc(Lexer *lx) Bad: ungetbyte(lx); - errorf("failed include"); + errorat(lx->pos, "failed include"); + ppend(lx); return 1; } /* #pragma */ +static error ppprag(Lexer *lx) { @@ -500,6 +838,7 @@ ppprag(Lexer *lx) } Bad: errorat(lx->pos, "unrecognized pragma '%s'", s); + ppend(lx); return 1; } @@ -510,6 +849,16 @@ ppif(Lexer *lx, int f) { Sym *sym; string s; + int c, l, b; + + if (f == 0) { + b = evalmacro(lx, 1); + if (b) { + ppend(lx); + return 0; + } + goto Skip; + } if (f == 1) goto Skip; @@ -521,19 +870,60 @@ ppif(Lexer *lx, int f) } intern(&s); - sym = lookup(lx->sym, s); - if ((!sym && f == 3) || sym && (f == 2)) - return 1; - + sym = lookup(&lx->sym, s); + if ((!sym && (f == 3)) || (sym && (f == 2))) + return 0; Skip: + b = 1; + l = 0; + for (;;) { + c = getbyte(lx); + if (c != '#') { + if (!isspace(c)) + b = 0; + if (c == '\n') + b = 1; + continue; + } + if (!b) + continue; + s = ident(lx); + if (!s) + continue; + + if ((strcmp(s, "elif") == 0) && l == 0) + ppif(lx, 0); + + if (strcmp(s, "endif") == 0) { + if (l) { + l--; + continue; + } + ppend(lx); + return 0; + } + if (strcmp(s, "if") == 0 || + strcmp(s, "elif") == 0 || + strcmp(s, "ifdef") == 0 || + strcmp(s, "ifndef") == 0) { + l++; + continue; + } + + if (l == 0 && f != 1 && strcmp(s, "else") == 0) { + return 0; + } + } Bad: - errorat(lx->pos, "bad syntax in preprocessor if directive"); - return 0; + errorat(lx->pos, "bad syntax in preprocessor conditional directive"); + ppend(lx); + return 1; } /* #if */ +static error ppif0(Lexer *lx) { @@ -541,6 +931,7 @@ ppif0(Lexer *lx) } /* #else */ +static error ppif1(Lexer *lx) { @@ -548,6 +939,7 @@ ppif1(Lexer *lx) } /* #ifdef */ +static error ppif2(Lexer *lx) { @@ -555,19 +947,13 @@ ppif2(Lexer *lx) } /* #ifndef */ +static error ppif3(Lexer *lx) { return ppif(lx, 3); } -/* #endif */ -error -ppend(Lexer *lx) -{ - return 0; -} - // ----------------------------------------------------------------------- // dispatch function @@ -610,18 +996,18 @@ dodefine(Lexer *lx, string s) c = strchr(lx->buf, '='); if (c) { *c++ = '\0'; - sym = lookup(lx->sym, lx->buf); + sym = lookup(&lx->sym, lx->buf); if (sym) { errorf("redefinition of symbol '%s'", sym->name); return 1; } - sym = define(lx->sym, lx->buf, Smacro); + sym = define(&lx->sym, lx->buf, Smacro); n = strlen(c) + 2; sym->macro = str·makelen("", n); str·appendbyte(&sym->macro, '\0'); str·append(&sym->macro, c); } else { - sym = lookup(lx->sym, lx->buf); + sym = lookup(&lx->sym, lx->buf); if (sym) { errorf("redefinition of symbol '%s'", sym->name); return 1; diff --git a/sys/libn/bufio.c b/sys/libn/bufio.c index 05b6068..cde56b7 100644 --- a/sys/libn/bufio.c +++ b/sys/libn/bufio.c @@ -75,8 +75,8 @@ getbyte: error bufio·ungetbyte(io·Buffer *buf, byte c) { - buf->state ^= bufio·end; - if (buf->state & bufio·rdr) { + buf->state &= ~bufio·end; + if (!(buf->state & bufio·rdr)) { errorf("attempted to unget on non-active reader"); return bufio·err; } diff --git a/sys/libn/string.c b/sys/libn/string.c index fb92a04..4c8c903 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -355,11 +355,11 @@ str·appendlen(string *s, vlong n, const byte* b) str·grow(s, n); if (*s == nil) return; - Hdr* h = (Hdr*)(s - sizeof(Hdr)); + Hdr* h = (Hdr*)(*s - sizeof(Hdr)); memcpy(*s + str·len(*s), b, n); h->len += n; - *s[h->len] = '\0'; + (*s)[h->len] = '\0'; } // Append will append the given null terminated C string to the string data @@ -379,11 +379,11 @@ str·appendbyte(string *s, const byte b) str·grow(s, 1); if (*s == nil) return; - Hdr* h = (Hdr*)(s - sizeof(Hdr)); + Hdr* h = (Hdr*)(*s - sizeof(Hdr)); *(*s + str·len(*s)) = b; h->len++; - *s[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? + (*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? } /* -- cgit v1.2.1