From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/cmd/cc/pp.c | 1125 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1125 insertions(+) create mode 100644 src/cmd/cc/pp.c (limited to 'src/cmd/cc/pp.c') diff --git a/src/cmd/cc/pp.c b/src/cmd/cc/pp.c new file mode 100644 index 0000000..57c3501 --- /dev/null +++ b/src/cmd/cc/pp.c @@ -0,0 +1,1125 @@ +#include "cc.h" + +// ----------------------------------------------------------------------- +// helper functions + +static +void +pushomit(Lexer *lx, string omit) +{ + if (lx->omit.len == lx->omit.cap) { + lx->omit.cap += 20; + lx->omit.path = realloc(lx->omit.path, lx->omit.cap*sizeof(*lx->omit.path)); + } + lx->omit.path[lx->omit.len++] = omit; +} + +// NOTE: The iterator of lexer lx->b IS NOT reset. +// Its the caller's responsibility. +static +string +ident(Lexer *lx) +{ + int b; + byte *s; + + b = getnsbyte(lx); + if (!isalpha(b) && b != '_' && b < RuneSelf) { + ungetbyte(lx); + return ""; + } + + s = lx->b; + for (;;) { + *lx->b++ = b; + b = getbyte(lx); + if (isalnum(b) || b == '_' || b >= RuneSelf) + continue; + ungetbyte(lx); + break; + } + *lx->b++ = '\0'; + + return s; +} + +static +string +identdots(Lexer *lx, int *dots) +{ + int c; + byte *s; + + s = ident(lx); + if (*s != '\0') + return s; + + c = getnsbyte(lx); + if (c != '.') { + ungetbyte(lx); + return s; + } + + if (getbyte(lx) != '.' || getbyte(lx) != '.') + errorat(lx->pos, "incorrect '...' token in macro"); + + *dots = 1; + // TODO: should only run intern once... + s = "__VA_ARGS__"; + intern(&s); + return s; +} + +static +Sym* +defmacro(Lexer *lx, string name, string macro) +{ + Sym *mac; + + // printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path)); + mac = define(&lx->sym, name, Smacro); + mac->macro = macro; + + return mac; +} + +static vlong evalmacro(Lexer *lx, byte prec); + +static +vlong +opand(Lexer *lx) +{ + int b; + vlong v; + string s; + Token tok; + Sym *sym; + + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in macro expression"); + return 0; + } + ungetbyte(lx); + + tok = lex(lx); + + switch (tok.kind & Vmask) { + case Aneg: + return ~opand(lx); + + case Anot: + return !opand(lx); + + case Alparen: + v = evalmacro(lx, 1); + tok = lex(lx); + if (!(tok.kind & Arparen)) { + errorat(lx->pos, "unbalanced parenthesis in macro expression"); + return 0; + } + return v; + + case Alit: + switch (tok.kind & ~Vmask) { + case Vint: case Vlong: case Vvlong: + return tok.val.i; + case Vun|Vint : case Vun|Vlong : case Vun|Vvlong: + return tok.val.ui; + case Vrune: + return tok.val.r; + case Vchar: + return tok.val.c; + default: + errorat(lx->pos, "invalid literal of type '%s' in conditional macro", tokens[tok.kind & ~Vmask]); + return 0; + } + + case Aident: + sym = lookup(&lx->sym, tok.val.s); + if (!sym) { + /* calling lex directly would expand the operand here + * manually lex the result + */ + if (strcmp(tok.val.s, "defined") == 0) { + b = getnsbyte(lx); + if (b == '\n') { + errorat(lx->pos, "new line in defined operand"); + return 0; + } + s = lx->buf; + if (b == '(') { + b = getnsbyte(lx); + while (b != ')') { + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + if (b == '(') { + errorat(lx->pos, "nested parens not allowed inside defined operator"); + return 0; + } + if (!isspace(b)) + *s++ = b; + b = getbyte(lx); + } + } else { + while (!isspace(b)) { + *s++ = b; + b = getbyte(lx); + + if (b == '\n') { + errorat(lx->pos, "new line inside defined operand"); + return 0; + } + } + } + *s = '\0'; + s = lx->buf; + intern(&s); + return lookup(&lx->sym, s) != nil; + } + return 0; + } + panicf("unreachable"); + return 1; + + default: + errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]); + return 0; + } +} + +// recursively evaluates a macro +// reduced set of operators allowed here +static +vlong +evalmacro(Lexer *lx, byte prec) +{ + int b; + vlong l, r; + Token tok; + + l = opand(lx); + for (;;) { + b = getnsbyte(lx); + // NOTE: Either this or we pass in what are stopping byte is + // New line should always stop us... + // Is there any case where we SHOULDN'T STOP ON ')'? + if (b == '\n' || b == ')') { + ungetbyte(lx); + break; + } + ungetbyte(lx); + + tok = lex(lx); + // simplified jump table of precedence + // unpacked to evaluate inline + switch (tok.kind & Vmask) { + case Astar: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l * r; + continue; + + case Adiv: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l / r; + continue; + + case Amod: + if (prec > 10) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 10 + 1); + l = l % r; + continue; + + case Aadd: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l + r; + continue; + + case Asub: + if (prec > 9) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 9 + 1); + l = l - r; + continue; + + case Alsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l << r; + continue; + + case Arsft: + if (prec > 8) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 8 + 1); + l = l >> r; + continue; + + case Alt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l < r; + continue; + + case Agt: + if (prec > 7) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l > r; + continue; + + case Agteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Alteq: + if (prec > 7) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 7 + 1); + l = l >= r; + continue; + + case Aeq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l == r; + continue; + + case Aneq: + if (prec > 6) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 6 + 1); + l = l != r; + continue; + + case Aand: + if (prec > 5) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 5 + 1); + l = l & r; + continue; + + case Axor: + if (prec > 4) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 4 + 1); + l = l ^ r; + continue; + + case Aor: + if (prec > 3) { + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 3 + 1); + l = l | r; + continue; + + case Aandand: + if (prec > 2) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 2 + 1); + l = l && r; + continue; + + case Aoror: + if (prec > 1) { + ungetbyte(lx); + ungetbyte(lx); + return l; + } + r = evalmacro(lx, 1 + 1); + l = l || r; + continue; + + default: + errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]); + abort(); + return 0; + } + } + + return l; +} + +// ----------------------------------------------------------------------- +// preprocessor magic numbers + +enum +{ + PPbeg = 0x02, + PParg = 0x03, + PPcat = 0x04, + PPstr = 0x05, + + PPnarg = 30, +}; + +#define PPvar 0x80u + +// ----------------------------------------------------------------------- +// preprocessor functions + +/* #endif */ +static +error +ppend(Lexer *lx) +{ + int b; + do { + b = getnsbyte(lx); + } while (b > 0 && b != '\n'); + + if (b == '\n') + lx->pos.line++; + + return 0; +} + + +/* #undef */ +static +error +ppund(Lexer *lx) +{ + string s; + error err; + + s = ident(lx); + intern(&s); + lx->b = lx->buf; + + err = forget(&lx->sym, s); + if (err) + warnat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); + + ppend(lx); + return 0; +} + +/* #define */ +static +error +ppdef(Lexer *lx) +{ + int b; + Sym *sym; + int i, j, n, dot; + string s, a, base, end, buf, args[PPnarg]; + + s = ident(lx); + if (!s) { + errorat(lx->pos, "failed to parse defined identifer"); + goto Bad; + } + intern(&s); + printf("DEFINING %s\n", s); + lx->b = lx->buf; + + sym = lookup(&lx->sym, s); + if (sym) + warnat(lx->pos, "macro redefined: '%s'", sym->name); + + n = 0; + dot = 0; + b = getbyte(lx); + if (b == '(') { + b = getnsbyte(lx); + if (b != ')') { + ungetbyte(lx); + for (;;) { + // NOTE: This is a pointer into the lx->buffer. + // Can't reset lx->b while we hold the args! + a = identdots(lx, &dot); + if (a == nil) { + errorat(lx->pos, "macro syntax error: improper argument"); + goto Bad; + } + if (n >= PPnarg) { + errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg); + goto Bad; + } + + args[n++] = a; + b = getnsbyte(lx); + + if (b == ')') + break; + if (b != ',') { + errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b); + goto Bad; + } + } + } + b = getbyte(lx); + } + + if (isspace(b)) + if (b != '\n') + b = getnsbyte(lx); + + base = lx->b; + end = lx->buf + arrlen(lx->buf); + if (base >= end) { + errorat(lx->pos, "out of macro buffer space!"); + goto Bad; + } + buf = str·makef("%c%c", n, PPbeg); + for (;;) { + if (isalpha(b) || b == '_') { + lx->b = base; + *lx->b++ = b; + + b = getbyte(lx); + while (isalnum(b) || b == '_') { + *lx->b++ = b; + if (lx->b >= end) { + errorat(lx->pos, "out of macro buffer space!"); + goto Bad; + } + b = getbyte(lx); + } + *lx->b++ = '\0'; + + for (i = 0; i < n; i++) { + if (strcmp(base, args[i]) == 0) { + goto Arg; + } + } + str·appendlen(&buf, (lx->b - base - 1), base); + continue; + Arg: + str·appendbyte(&buf, PParg); + str·appendbyte(&buf, 'a' + i); + continue; + } + + if (b == '/') { + b = getbyte(lx); + if (b == '/') { + while (b = getbyte(lx), b != '\n'); + continue; + } + if (b == '*') { + b = getbyte(lx); + for (;;) { + if (b == '*') { + b = getbyte(lx); + if (b != '/') + continue; + b = getbyte(lx); + break; + } + if (b == '\n') { + errorat(lx->pos, "comment and newline found in define statement of %s", s); + break; + } + b = getbyte(lx); + } + continue; + } + str·appendbyte(&buf, '/'); + continue; + } + + if (b == '\\') { + b = getbyte(lx); + /* unix */ + if (b == '\n') { + lx->pos.line++; + b = getbyte(lx); + continue; + } + /* windows */ + if (b == '\r') { + b = getbyte(lx); + if (b == '\n') { + lx->pos.line++; + b = getbyte(lx); + continue; + } + } + str·appendbyte(&buf, '\\'); + } + if (b == '\n') { + lx->pos.line++; + break; + } + + if (b == '#') { + b = getnsbyte(lx); + if (b == '#') { + str·appendbyte(&buf, PPcat); + b = getbyte(lx); + continue; + } + + lx->b = base; + while (isalnum(b) || b == '_') { + *lx->b++ = b; + b = getbyte(lx); + } + *lx->b = '\0'; + + for (i = 0; i < n; i++) { + if (strcmp(base, args[i]) == 0) + goto Str; + } + errorat(lx->pos, "macro operator '#' must be followed by a valid variable identifier"); + goto Bad; + Str: + str·appendbyte(&buf, PPstr); + str·appendbyte(&buf, 'a' + i); + continue; + } + + str·appendbyte(&buf, b); + b = getbyte(lx); + if (b == EOF) { + errorat(lx->pos, "eof found in macro '%s'", s); + goto Bad; + } + } + if (dot) + *buf |= PPvar; + + lx->b = lx->buf; + sym = defmacro(lx, s, buf); + return 0; +Bad: + errorat(lx->pos, "failed parse of #define macro '%s'", s); + lx->b = lx->buf; + ppend(lx); + return 1; +} + +/* macro expansion */ +int +expandmacro(Lexer *lx, Sym *s, byte *dst) +{ + int n, lv, nargs, dots; + byte b, *it, *e, *arg[PPnarg]; + + /* not a function macro */ + if (s->macro[0] == '\0') { + if (s->macro[1] != PPbeg) { + errorat(lx->pos, "malformed macro"); + goto Bad; + } + strcpy(dst, s->macro + 2); + return str·len(s->macro)-2; + } + dots = (ubyte)s->macro[0] & PPvar; + nargs = (ubyte)s->macro[0] & (~PPvar); + + b = getnsbyte(lx); + if (b != '(') { + errorat(lx->pos, "macro function not given arguments"); + goto Bad; + } + + n = 0; + b = getbyte(lx); + if (b != ')') { + ungetbyte(lx); + lv = 0; + lx->b = lx->buf; + e = lx->buf + arrlen(lx->buf) - 4; + arg[n++] = lx->buf; + for (;;) { + if (lx->b >= e) + goto Nospace; + b = getbyte(lx); + if (b == '"') + for (;;) { + if (lx->b >= e) + goto Nospace; + *lx->b++ = b; + b = getbyte(lx); + if (b == '\\') { + *lx->b++ = b; + b = getbyte(lx); + continue; + } + if (b == '\n') { + errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); + goto Bad; + } + if (b == '"') + break; + } + if (b == '\'') + for (;;) { + if (lx->b >= e) + goto Nospace; + *lx->b++ = b; + b = getbyte(lx); + if (b == '\\') { + *lx->b++ = b; + b = getbyte(lx); + continue; + } + if (b == '\n') { + errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); + goto Bad; + } + if (b == '"') + break; + } + if (b == '/') { + b = getbyte(lx); + switch(b) { + case '*': + for (;;) { + b = getbyte(lx); + if (b == '*') { + b = getbyte(lx); + if (b == '/') + break; + } + } + *lx->b++ = ' '; + continue; + case '/': + while ((b = getbyte(lx)) != '\n') + ; + break; + + default: + ungetbyte(lx); + b = '/'; + } + } + if (lv == 0) { + if (b == ',') { + if (n == nargs && dots) { + *lx->b++ = ','; + continue; + } + *lx->b++ = '\0'; + arg[n++] = lx->b; + if (n > nargs) + break; + continue; + } + if (b == ')') + break; + } + if (b == '\n') + b = ' '; + *lx->b++ = b; + if (b == '(') + lv++; + if (b == ')') + lv--; + } + *lx->b = '\0'; + } + + if (n != nargs) { + errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name); + *dst = '\0'; + goto Bad; + } + + if (s->macro[1] != PPbeg) { + errorat(lx->pos, "corrupted macro buffer: %s", s->name); + *dst = '\0'; + goto Bad; + } + + it = s->macro+2; + e = dst; + for (;;) { + b = *it++; + if (b == '\n') + b = ' '; + switch (b) { + case PParg: + b = *it++; + b -= 'a'; + if (b < 0 && b > n) { + errorat(lx->pos, "malformed macro index: %s", s->name); + goto Bad; + } + strcpy(dst, arg[b]); + dst += strlen(arg[b]); + + break; + + case PPstr: + b = *it++; + b -= 'a'; + if (b < 0 && b > n) { + errorat(lx->pos, "malformed macro index: %s", s->name); + goto Bad; + } + *dst++ = '"'; + strcpy(dst, arg[b]); + *dst++ = '"'; + + break; + + case PPcat: + continue; + + case '\0': + goto End; + + default: + *dst++ = b; + continue; + } + } +End: + *dst = '\0'; + return dst - e; +Nospace: + errorat(lx->pos, "out of memory during macro expansion %s", s->name); +Bad: + ppend(lx); + lx->b = lx->buf; + errorat(lx->pos, "failed to expand macro %s", s->name); + return -1; +} + +/* #include */ +static +error +ppinc(Lexer *lx) +{ + int i; + byte b, end; + string s; + + Stream *f; + Io *io; + + b = getnsbyte(lx); + if (b != '"') { + end = b; + if (b != '<') { + errorat(lx->pos, "unrecognized token '%c' in include directive", b); + goto Bad; + } + end = '>'; + } else + end = '"'; + + lx->b = lx->buf; + for (;;) { + b = getbyte(lx); + if (b == end) + break; + if (b == '\n') { + errorat(lx->pos, "hit end of line before include directive completed"); + goto Bad; + } + *lx->b++ = b; + } + *lx->b = '\0'; + s = lx->buf; + intern(&s); // NOTE: we could use this to see if we already have the file + + lx->b = lx->buf; + for (i = 0; i < C.inc.len; i++) { + if (i == 0 && end == '>') + continue; + + strcpy(lx->buf, C.inc.dir[i]); + strcat(lx->buf, "/"); + + if (strcmp(lx->buf, "./") == 0) + lx->buf[0] = '\0'; + strcat(lx->buf, s); + + if (os·exists(lx->buf, ReadOK)) { + break; + } + } + if (i == C.inc.len) { + errorat(lx->pos, "could not find file '%s' on standard include search path", s); + goto Bad; + } + + io = openio(lx, lx->buf); + if (io != nil) { + pushio(lx, io); + } + + return 0; + +Bad: + ungetbyte(lx); + lx->b = lx->buf; + errorat(lx->pos, "failed include"); + ppend(lx); + return 1; +} + +/* #pragma */ +static +error +ppprag(Lexer *lx) +{ + string s; + + s = ident(lx); + if (s == nil) { + errorat(lx->pos, "failed to parse pragma identifier"); + goto Bad; + } + lx->b = lx->buf; + if (strcmp(s, "once") == 0) { + pushomit(lx, lx->io->path); + return 0; + } +Bad: + lx->b = lx->buf; + errorat(lx->pos, "unrecognized pragma '%s'", s); + ppend(lx); + return 1; +} + +/* all #if statements */ +static +error +ppif(Lexer *lx, int f) +{ + Sym *sym; + string s; + int c, l, b; + +Eval: + if (f == 0) { + b = evalmacro(lx, 1); + if (b) { + ppend(lx); + return 0; + } + goto Skip; + } + + if (f == 1) + goto Skip; + + s = ident(lx); + if (s == nil) { + errorat(lx->pos, "failed to parse preprocessor identifier"); + goto Bad; + } + intern(&s); + lx->b = lx->buf; + + sym = lookup(&lx->sym, s); + if ((!sym && (f == 3)) || (sym && (f == 2))) + return 0; + +Skip: + b = 1; + l = 0; + for (;;) { + c = getbyte(lx); + if (c != '#') { + if (!isspace(c)) + b = 0; + if (c == '\n') { + lx->pos.line++; + b = 1; + } + if (c == EOF) { + errorat(lx->pos, "EOF hit while skipping if block. Missing endif"); + goto Bad; + } + continue; + } + if (!b) + continue; + s = ident(lx); + lx->b = lx->buf; + if (!s) + continue; + + if (l == 0 && (strcmp(s, "elif") == 0)) { + f = 0; + goto Eval; + } + + if (strcmp(s, "endif") == 0) { + if (l) { + l--; + continue; + } + ppend(lx); + return 0; + } + if (strcmp(s, "if") == 0 || + strcmp(s, "ifdef") == 0 || + strcmp(s, "ifndef") == 0) { + l++; + continue; + } + + if (l == 0 && f != 1 && strcmp(s, "else") == 0) { + return 0; + } + } + +Bad: + lx->b = lx->buf; + errorat(lx->pos, "bad syntax in preprocessor conditional directive"); + ppend(lx); + return 1; +} + +/* #if */ +static +error +ppif0(Lexer *lx) +{ + return ppif(lx, 0); +} + +/* #else */ +static +error +ppif1(Lexer *lx) +{ + return ppif(lx, 1); +} + +/* #ifdef */ +static +error +ppif2(Lexer *lx) +{ + return ppif(lx, 2); +} + +/* #ifndef */ +static +error +ppif3(Lexer *lx) +{ + return ppif(lx, 3); +} + +// ----------------------------------------------------------------------- +// dispatch function + +#define DIRECTIVE(a, b, c) c, +error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES }; +#undef DIRECTIVE + +/* reads an identifier into the lexer's buffer */ +/* caller must intern */ + +error +domacro(Lexer *lx) +{ + int n; + error err; + string s; + + s = ident(lx); + intern(&s); + lx->b = lx->buf; + for (n = 0; n < NUM_DIRECTIVES; n++) { + if ((uintptr)s == (uintptr)directives[n]) { + goto Do; + } + } + errorat(lx->pos, "unrecognized directive name '%s'", s); + return 1; +Do: + err = macros[n](lx); + return err; +} + +error +dodefine(Lexer *lx, string s) +{ + int n; + byte *c, *def; + Sym *sym; + + strcpy(lx->buf, s); + c = strchr(lx->buf, '='); + if (c) { + *c++ = '\0'; + sym = lookup(&lx->sym, lx->buf); + if (sym) { + errorf("redefinition of symbol '%s'", sym->name); + return 1; + } + sym = define(&lx->sym, lx->buf, Smacro); + n = strlen(c) + 2; + sym->macro = str·makelen("", n); + str·appendbyte(&sym->macro, '\0'); + str·append(&sym->macro, c); + } else { + sym = lookup(&lx->sym, lx->buf); + if (sym) { + errorf("redefinition of symbol '%s'", sym->name); + return 1; + } + sym = define(&lx->sym, s, Smacro); + sym->macro = "\00\02"; + } + + return 0; +} -- cgit v1.2.1