From 73c04db73163d1d2719bb97a6b8c133065df75c3 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Mon, 18 May 2020 18:22:42 -0700 Subject: feat: macro expansion and constant evaluation prototype --- sys/cmd/cc/pp.c | 438 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 412 insertions(+), 26 deletions(-) (limited to 'sys/cmd/cc/pp.c') diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c index d1a5d83..ceb9d66 100644 --- a/sys/cmd/cc/pp.c +++ b/sys/cmd/cc/pp.c @@ -7,13 +7,24 @@ static string ident(Lexer *lx) { - byte b; + int b; + byte *s; - lx->b = lx->buf; - for (b = getnsbyte(lx); !isspace(b); b = getbyte(lx)) { - *lx->b++ = b; + b = getnsbyte(lx); + if (!isalpha(b) && b != '_' && b < RuneSelf) { + ungetbyte(lx); + return ""; } - *lx->b = '\0'; + + for (s = lx->buf;;) { + *s++ = b; + b = getbyte(lx); + if (isalnum(b) || b == '_' || b >= RuneSelf) + continue; + ungetbyte(lx); + break; + } + *s = '\0'; return lx->buf; } @@ -51,14 +62,320 @@ defmacro(Lexer *lx, string name, string macro) { Sym *mac; - mac = define(lx->sym, name, Smacro); + printf("DEFINING MACRO %s\n", name); + mac = define(&lx->sym, name, Smacro); mac->macro = macro; return mac; } +static vlong evalmacro(Lexer *lx, byte prec); + +static +vlong +opand(Lexer *lx) +{ + int b; + vlong v; + string s; + Token tok; + Sym *sym; + + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in macro expression"); + return 0; + } + ungetbyte(lx); + + tok = lex(lx); + + switch (tok.kind & Vmask) { + case Aneg: + return ~opand(lx); + + case Anot: + return !opand(lx); + + case Alparen: + v = evalmacro(lx, 1); + tok = lex(lx); + if (!(tok.kind & Arparen)) { + errorat(lx->pos, "unbalanced parenthesis in macro expression"); + return 0; + } + return v; + + case Alit: + switch (tok.kind & ~Vmask) { + case Vint: + return tok.val.i; + case Vchar: + return tok.val.c; + default: + errorat(lx->pos, "invalid literal of type '%d' in conditional macro", tok.kind & ~Vmask); + return 0; + } + + case Aident: + sym = lookup(&lx->sym, tok.val.s); + if (!sym) { + /* calling lex directly would expand the operand here + * manually lex the result + */ + if (strcmp(tok.val.s, "defined") == 0) { + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in defined operand"); + return 0; + } + s = lx->buf; + if (b == '(') { + b = getnsbyte(lx); + while (b != ')') { + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + if (b == '(') { + errorat(lx->pos, "nested parens not allowed inside defined operator"); + return 0; + } + if (!isspace(b)) + *s++ = b; + b = getbyte(lx); + } + } else { + while (!isspace(b)) { + *s++ = b; + b = getbyte(lx); + + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + } + } + *s = '\0'; + return lookup(&lx->sym, lx->buf) != nil; + } + return 0; + } + panicf("unreachable"); + return 1; + + default: + errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]); + return 0; + } +} + +// recursively evaluates a macro +// reduced set of operators allowed here +static +vlong +evalmacro(Lexer *lx, byte prec) +{ + int b; + vlong l, r; + Token tok; + + l = opand(lx); + for (;;) { + b = getnsbyte(lx); + if (b == '\n') { + ungetbyte(lx); + break; + } + ungetbyte(lx); + + tok = lex(lx); + // simplified jump table of precedence + // unpacked to evaluate inline + // NOTE: You might have to unget bytes to replace token on stack + switch (tok.kind & Vmask) { + case Astar: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l * r; + continue; + + case Adiv: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l / r; + continue; + + case Amod: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l % r; + continue; + + case Aadd: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l + r; + continue; + + case Asub: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l - r; + continue; + + case Alsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l << r; + continue; + + case Arsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l >> r; + continue; + + case Alt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l < r; + continue; + + case Agt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l > r; + continue; + + case Agteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Alteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Aeq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l == r; + continue; + + case Aneq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l != r; + continue; + + case Aand: + if (prec > 5) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 5 + 1); + l = l & r; + continue; + + case Axor: + if (prec > 4) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 4 + 1); + l = l ^ r; + continue; + + case Aor: + if (prec > 3) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 3 + 1); + l = l | r; + continue; + + case Aandand: + if (prec > 2) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 2 + 1); + l = l && r; + continue; + + case Aoror: + if (prec > 1) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 1 + 1); + l = l || r; + continue; + + default: + errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]); + abort(); + return 0; + } + } + + return l; +} + // ----------------------------------------------------------------------- -// preprocessor definitions +// preprocessor magic numbers enum { @@ -74,7 +391,21 @@ enum // ----------------------------------------------------------------------- // preprocessor functions +static +error +ppend(Lexer *lx) +{ + int b; + do { + b = getnsbyte(lx); + } while (b > 0 && b != '\n'); + + return 0; +} + + /* #undef */ +static error ppund(Lexer *lx) { @@ -84,14 +415,16 @@ ppund(Lexer *lx) s = ident(lx); intern(&s); - sym = lookup(lx->sym, s); + sym = lookup(&lx->sym, s); if (!sym) { errorat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); } + ppend(lx); return 0; } /* #define */ +static error ppdef(Lexer *lx) { @@ -107,7 +440,7 @@ ppdef(Lexer *lx) } intern(&s); - sym = lookup(lx->sym, s); + sym = lookup(&lx->sym, s); if (sym) { errorat(lx->pos, "macro redefined: '%s'", sym->name); goto Bad; @@ -240,6 +573,7 @@ ppdef(Lexer *lx) return 0; Bad: errorat(lx->pos, "failed parse of #define macro '%s'", s); + ppend(lx); return 1; } @@ -409,11 +743,13 @@ End: Nospace: errorf("out of memory during macro expansion %s", s->name); Bad: + ppend(lx); errorf("failed to expand macro %s", s->name); return -1; } /* #include */ +static error ppinc(Lexer *lx) { @@ -479,11 +815,13 @@ ppinc(Lexer *lx) Bad: ungetbyte(lx); - errorf("failed include"); + errorat(lx->pos, "failed include"); + ppend(lx); return 1; } /* #pragma */ +static error ppprag(Lexer *lx) { @@ -500,6 +838,7 @@ ppprag(Lexer *lx) } Bad: errorat(lx->pos, "unrecognized pragma '%s'", s); + ppend(lx); return 1; } @@ -510,6 +849,16 @@ ppif(Lexer *lx, int f) { Sym *sym; string s; + int c, l, b; + + if (f == 0) { + b = evalmacro(lx, 1); + if (b) { + ppend(lx); + return 0; + } + goto Skip; + } if (f == 1) goto Skip; @@ -521,19 +870,60 @@ ppif(Lexer *lx, int f) } intern(&s); - sym = lookup(lx->sym, s); - if ((!sym && f == 3) || sym && (f == 2)) - return 1; - + sym = lookup(&lx->sym, s); + if ((!sym && (f == 3)) || (sym && (f == 2))) + return 0; Skip: + b = 1; + l = 0; + for (;;) { + c = getbyte(lx); + if (c != '#') { + if (!isspace(c)) + b = 0; + if (c == '\n') + b = 1; + continue; + } + if (!b) + continue; + s = ident(lx); + if (!s) + continue; + + if ((strcmp(s, "elif") == 0) && l == 0) + ppif(lx, 0); + + if (strcmp(s, "endif") == 0) { + if (l) { + l--; + continue; + } + ppend(lx); + return 0; + } + if (strcmp(s, "if") == 0 || + strcmp(s, "elif") == 0 || + strcmp(s, "ifdef") == 0 || + strcmp(s, "ifndef") == 0) { + l++; + continue; + } + + if (l == 0 && f != 1 && strcmp(s, "else") == 0) { + return 0; + } + } Bad: - errorat(lx->pos, "bad syntax in preprocessor if directive"); - return 0; + errorat(lx->pos, "bad syntax in preprocessor conditional directive"); + ppend(lx); + return 1; } /* #if */ +static error ppif0(Lexer *lx) { @@ -541,6 +931,7 @@ ppif0(Lexer *lx) } /* #else */ +static error ppif1(Lexer *lx) { @@ -548,6 +939,7 @@ ppif1(Lexer *lx) } /* #ifdef */ +static error ppif2(Lexer *lx) { @@ -555,19 +947,13 @@ ppif2(Lexer *lx) } /* #ifndef */ +static error ppif3(Lexer *lx) { return ppif(lx, 3); } -/* #endif */ -error -ppend(Lexer *lx) -{ - return 0; -} - // ----------------------------------------------------------------------- // dispatch function @@ -610,18 +996,18 @@ dodefine(Lexer *lx, string s) c = strchr(lx->buf, '='); if (c) { *c++ = '\0'; - sym = lookup(lx->sym, lx->buf); + sym = lookup(&lx->sym, lx->buf); if (sym) { errorf("redefinition of symbol '%s'", sym->name); return 1; } - sym = define(lx->sym, lx->buf, Smacro); + sym = define(&lx->sym, lx->buf, Smacro); n = strlen(c) + 2; sym->macro = str·makelen("", n); str·appendbyte(&sym->macro, '\0'); str·append(&sym->macro, c); } else { - sym = lookup(lx->sym, lx->buf); + sym = lookup(&lx->sym, lx->buf); if (sym) { errorf("redefinition of symbol '%s'", sym->name); return 1; -- cgit v1.2.1