From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/cmd/cc/pp.c | 1125 ------------------------------------------------------- 1 file changed, 1125 deletions(-) delete mode 100644 sys/cmd/cc/pp.c (limited to 'sys/cmd/cc/pp.c') diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c deleted file mode 100644 index 57c3501..0000000 --- a/sys/cmd/cc/pp.c +++ /dev/null @@ -1,1125 +0,0 @@ -#include "cc.h" - -// ----------------------------------------------------------------------- -// helper functions - -static -void -pushomit(Lexer *lx, string omit) -{ - if (lx->omit.len == lx->omit.cap) { - lx->omit.cap += 20; - lx->omit.path = realloc(lx->omit.path, lx->omit.cap*sizeof(*lx->omit.path)); - } - lx->omit.path[lx->omit.len++] = omit; -} - -// NOTE: The iterator of lexer lx->b IS NOT reset. -// Its the caller's responsibility. -static -string -ident(Lexer *lx) -{ - int b; - byte *s; - - b = getnsbyte(lx); - if (!isalpha(b) && b != '_' && b < RuneSelf) { - ungetbyte(lx); - return ""; - } - - s = lx->b; - for (;;) { - *lx->b++ = b; - b = getbyte(lx); - if (isalnum(b) || b == '_' || b >= RuneSelf) - continue; - ungetbyte(lx); - break; - } - *lx->b++ = '\0'; - - return s; -} - -static -string -identdots(Lexer *lx, int *dots) -{ - int c; - byte *s; - - s = ident(lx); - if (*s != '\0') - return s; - - c = getnsbyte(lx); - if (c != '.') { - ungetbyte(lx); - return s; - } - - if (getbyte(lx) != '.' || getbyte(lx) != '.') - errorat(lx->pos, "incorrect '...' token in macro"); - - *dots = 1; - // TODO: should only run intern once... - s = "__VA_ARGS__"; - intern(&s); - return s; -} - -static -Sym* -defmacro(Lexer *lx, string name, string macro) -{ - Sym *mac; - - // printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path)); - mac = define(&lx->sym, name, Smacro); - mac->macro = macro; - - return mac; -} - -static vlong evalmacro(Lexer *lx, byte prec); - -static -vlong -opand(Lexer *lx) -{ - int b; - vlong v; - string s; - Token tok; - Sym *sym; - - b = getnsbyte(lx); - if (b == '\n') { - errorat(lx->pos, "new line in macro expression"); - return 0; - } - ungetbyte(lx); - - tok = lex(lx); - - switch (tok.kind & Vmask) { - case Aneg: - return ~opand(lx); - - case Anot: - return !opand(lx); - - case Alparen: - v = evalmacro(lx, 1); - tok = lex(lx); - if (!(tok.kind & Arparen)) { - errorat(lx->pos, "unbalanced parenthesis in macro expression"); - return 0; - } - return v; - - case Alit: - switch (tok.kind & ~Vmask) { - case Vint: case Vlong: case Vvlong: - return tok.val.i; - case Vun|Vint : case Vun|Vlong : case Vun|Vvlong: - return tok.val.ui; - case Vrune: - return tok.val.r; - case Vchar: - return tok.val.c; - default: - errorat(lx->pos, "invalid literal of type '%s' in conditional macro", tokens[tok.kind & ~Vmask]); - return 0; - } - - case Aident: - sym = lookup(&lx->sym, tok.val.s); - if (!sym) { - /* calling lex directly would expand the operand here - * manually lex the result - */ - if (strcmp(tok.val.s, "defined") == 0) { - b = getnsbyte(lx); - if (b == '\n') { - errorat(lx->pos, "new line in defined operand"); - return 0; - } - s = lx->buf; - if (b == '(') { - b = getnsbyte(lx); - while (b != ')') { - if (b == '\n') { - errorat(lx->pos, "new line inside defined operand"); - return 0; - } - if (b == '(') { - errorat(lx->pos, "nested parens not allowed inside defined operator"); - return 0; - } - if (!isspace(b)) - *s++ = b; - b = getbyte(lx); - } - } else { - while (!isspace(b)) { - *s++ = b; - b = getbyte(lx); - - if (b == '\n') { - errorat(lx->pos, "new line inside defined operand"); - return 0; - } - } - } - *s = '\0'; - s = lx->buf; - intern(&s); - return lookup(&lx->sym, s) != nil; - } - return 0; - } - panicf("unreachable"); - return 1; - - default: - errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]); - return 0; - } -} - -// recursively evaluates a macro -// reduced set of operators allowed here -static -vlong -evalmacro(Lexer *lx, byte prec) -{ - int b; - vlong l, r; - Token tok; - - l = opand(lx); - for (;;) { - b = getnsbyte(lx); - // NOTE: Either this or we pass in what are stopping byte is - // New line should always stop us... - // Is there any case where we SHOULDN'T STOP ON ')'? - if (b == '\n' || b == ')') { - ungetbyte(lx); - break; - } - ungetbyte(lx); - - tok = lex(lx); - // simplified jump table of precedence - // unpacked to evaluate inline - switch (tok.kind & Vmask) { - case Astar: - if (prec > 10) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 10 + 1); - l = l * r; - continue; - - case Adiv: - if (prec > 10) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 10 + 1); - l = l / r; - continue; - - case Amod: - if (prec > 10) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 10 + 1); - l = l % r; - continue; - - case Aadd: - if (prec > 9) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 9 + 1); - l = l + r; - continue; - - case Asub: - if (prec > 9) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 9 + 1); - l = l - r; - continue; - - case Alsft: - if (prec > 8) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 8 + 1); - l = l << r; - continue; - - case Arsft: - if (prec > 8) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 8 + 1); - l = l >> r; - continue; - - case Alt: - if (prec > 7) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 7 + 1); - l = l < r; - continue; - - case Agt: - if (prec > 7) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 7 + 1); - l = l > r; - continue; - - case Agteq: - if (prec > 7) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 7 + 1); - l = l >= r; - continue; - - case Alteq: - if (prec > 7) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 7 + 1); - l = l >= r; - continue; - - case Aeq: - if (prec > 6) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 6 + 1); - l = l == r; - continue; - - case Aneq: - if (prec > 6) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 6 + 1); - l = l != r; - continue; - - case Aand: - if (prec > 5) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 5 + 1); - l = l & r; - continue; - - case Axor: - if (prec > 4) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 4 + 1); - l = l ^ r; - continue; - - case Aor: - if (prec > 3) { - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 3 + 1); - l = l | r; - continue; - - case Aandand: - if (prec > 2) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 2 + 1); - l = l && r; - continue; - - case Aoror: - if (prec > 1) { - ungetbyte(lx); - ungetbyte(lx); - return l; - } - r = evalmacro(lx, 1 + 1); - l = l || r; - continue; - - default: - errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]); - abort(); - return 0; - } - } - - return l; -} - -// ----------------------------------------------------------------------- -// preprocessor magic numbers - -enum -{ - PPbeg = 0x02, - PParg = 0x03, - PPcat = 0x04, - PPstr = 0x05, - - PPnarg = 30, -}; - -#define PPvar 0x80u - -// ----------------------------------------------------------------------- -// preprocessor functions - -/* #endif */ -static -error -ppend(Lexer *lx) -{ - int b; - do { - b = getnsbyte(lx); - } while (b > 0 && b != '\n'); - - if (b == '\n') - lx->pos.line++; - - return 0; -} - - -/* #undef */ -static -error -ppund(Lexer *lx) -{ - string s; - error err; - - s = ident(lx); - intern(&s); - lx->b = lx->buf; - - err = forget(&lx->sym, s); - if (err) - warnat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); - - ppend(lx); - return 0; -} - -/* #define */ -static -error -ppdef(Lexer *lx) -{ - int b; - Sym *sym; - int i, j, n, dot; - string s, a, base, end, buf, args[PPnarg]; - - s = ident(lx); - if (!s) { - errorat(lx->pos, "failed to parse defined identifer"); - goto Bad; - } - intern(&s); - printf("DEFINING %s\n", s); - lx->b = lx->buf; - - sym = lookup(&lx->sym, s); - if (sym) - warnat(lx->pos, "macro redefined: '%s'", sym->name); - - n = 0; - dot = 0; - b = getbyte(lx); - if (b == '(') { - b = getnsbyte(lx); - if (b != ')') { - ungetbyte(lx); - for (;;) { - // NOTE: This is a pointer into the lx->buffer. - // Can't reset lx->b while we hold the args! - a = identdots(lx, &dot); - if (a == nil) { - errorat(lx->pos, "macro syntax error: improper argument"); - goto Bad; - } - if (n >= PPnarg) { - errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg); - goto Bad; - } - - args[n++] = a; - b = getnsbyte(lx); - - if (b == ')') - break; - if (b != ',') { - errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b); - goto Bad; - } - } - } - b = getbyte(lx); - } - - if (isspace(b)) - if (b != '\n') - b = getnsbyte(lx); - - base = lx->b; - end = lx->buf + arrlen(lx->buf); - if (base >= end) { - errorat(lx->pos, "out of macro buffer space!"); - goto Bad; - } - buf = str·makef("%c%c", n, PPbeg); - for (;;) { - if (isalpha(b) || b == '_') { - lx->b = base; - *lx->b++ = b; - - b = getbyte(lx); - while (isalnum(b) || b == '_') { - *lx->b++ = b; - if (lx->b >= end) { - errorat(lx->pos, "out of macro buffer space!"); - goto Bad; - } - b = getbyte(lx); - } - *lx->b++ = '\0'; - - for (i = 0; i < n; i++) { - if (strcmp(base, args[i]) == 0) { - goto Arg; - } - } - str·appendlen(&buf, (lx->b - base - 1), base); - continue; - Arg: - str·appendbyte(&buf, PParg); - str·appendbyte(&buf, 'a' + i); - continue; - } - - if (b == '/') { - b = getbyte(lx); - if (b == '/') { - while (b = getbyte(lx), b != '\n'); - continue; - } - if (b == '*') { - b = getbyte(lx); - for (;;) { - if (b == '*') { - b = getbyte(lx); - if (b != '/') - continue; - b = getbyte(lx); - break; - } - if (b == '\n') { - errorat(lx->pos, "comment and newline found in define statement of %s", s); - break; - } - b = getbyte(lx); - } - continue; - } - str·appendbyte(&buf, '/'); - continue; - } - - if (b == '\\') { - b = getbyte(lx); - /* unix */ - if (b == '\n') { - lx->pos.line++; - b = getbyte(lx); - continue; - } - /* windows */ - if (b == '\r') { - b = getbyte(lx); - if (b == '\n') { - lx->pos.line++; - b = getbyte(lx); - continue; - } - } - str·appendbyte(&buf, '\\'); - } - if (b == '\n') { - lx->pos.line++; - break; - } - - if (b == '#') { - b = getnsbyte(lx); - if (b == '#') { - str·appendbyte(&buf, PPcat); - b = getbyte(lx); - continue; - } - - lx->b = base; - while (isalnum(b) || b == '_') { - *lx->b++ = b; - b = getbyte(lx); - } - *lx->b = '\0'; - - for (i = 0; i < n; i++) { - if (strcmp(base, args[i]) == 0) - goto Str; - } - errorat(lx->pos, "macro operator '#' must be followed by a valid variable identifier"); - goto Bad; - Str: - str·appendbyte(&buf, PPstr); - str·appendbyte(&buf, 'a' + i); - continue; - } - - str·appendbyte(&buf, b); - b = getbyte(lx); - if (b == EOF) { - errorat(lx->pos, "eof found in macro '%s'", s); - goto Bad; - } - } - if (dot) - *buf |= PPvar; - - lx->b = lx->buf; - sym = defmacro(lx, s, buf); - return 0; -Bad: - errorat(lx->pos, "failed parse of #define macro '%s'", s); - lx->b = lx->buf; - ppend(lx); - return 1; -} - -/* macro expansion */ -int -expandmacro(Lexer *lx, Sym *s, byte *dst) -{ - int n, lv, nargs, dots; - byte b, *it, *e, *arg[PPnarg]; - - /* not a function macro */ - if (s->macro[0] == '\0') { - if (s->macro[1] != PPbeg) { - errorat(lx->pos, "malformed macro"); - goto Bad; - } - strcpy(dst, s->macro + 2); - return str·len(s->macro)-2; - } - dots = (ubyte)s->macro[0] & PPvar; - nargs = (ubyte)s->macro[0] & (~PPvar); - - b = getnsbyte(lx); - if (b != '(') { - errorat(lx->pos, "macro function not given arguments"); - goto Bad; - } - - n = 0; - b = getbyte(lx); - if (b != ')') { - ungetbyte(lx); - lv = 0; - lx->b = lx->buf; - e = lx->buf + arrlen(lx->buf) - 4; - arg[n++] = lx->buf; - for (;;) { - if (lx->b >= e) - goto Nospace; - b = getbyte(lx); - if (b == '"') - for (;;) { - if (lx->b >= e) - goto Nospace; - *lx->b++ = b; - b = getbyte(lx); - if (b == '\\') { - *lx->b++ = b; - b = getbyte(lx); - continue; - } - if (b == '\n') { - errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); - goto Bad; - } - if (b == '"') - break; - } - if (b == '\'') - for (;;) { - if (lx->b >= e) - goto Nospace; - *lx->b++ = b; - b = getbyte(lx); - if (b == '\\') { - *lx->b++ = b; - b = getbyte(lx); - continue; - } - if (b == '\n') { - errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); - goto Bad; - } - if (b == '"') - break; - } - if (b == '/') { - b = getbyte(lx); - switch(b) { - case '*': - for (;;) { - b = getbyte(lx); - if (b == '*') { - b = getbyte(lx); - if (b == '/') - break; - } - } - *lx->b++ = ' '; - continue; - case '/': - while ((b = getbyte(lx)) != '\n') - ; - break; - - default: - ungetbyte(lx); - b = '/'; - } - } - if (lv == 0) { - if (b == ',') { - if (n == nargs && dots) { - *lx->b++ = ','; - continue; - } - *lx->b++ = '\0'; - arg[n++] = lx->b; - if (n > nargs) - break; - continue; - } - if (b == ')') - break; - } - if (b == '\n') - b = ' '; - *lx->b++ = b; - if (b == '(') - lv++; - if (b == ')') - lv--; - } - *lx->b = '\0'; - } - - if (n != nargs) { - errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name); - *dst = '\0'; - goto Bad; - } - - if (s->macro[1] != PPbeg) { - errorat(lx->pos, "corrupted macro buffer: %s", s->name); - *dst = '\0'; - goto Bad; - } - - it = s->macro+2; - e = dst; - for (;;) { - b = *it++; - if (b == '\n') - b = ' '; - switch (b) { - case PParg: - b = *it++; - b -= 'a'; - if (b < 0 && b > n) { - errorat(lx->pos, "malformed macro index: %s", s->name); - goto Bad; - } - strcpy(dst, arg[b]); - dst += strlen(arg[b]); - - break; - - case PPstr: - b = *it++; - b -= 'a'; - if (b < 0 && b > n) { - errorat(lx->pos, "malformed macro index: %s", s->name); - goto Bad; - } - *dst++ = '"'; - strcpy(dst, arg[b]); - *dst++ = '"'; - - break; - - case PPcat: - continue; - - case '\0': - goto End; - - default: - *dst++ = b; - continue; - } - } -End: - *dst = '\0'; - return dst - e; -Nospace: - errorat(lx->pos, "out of memory during macro expansion %s", s->name); -Bad: - ppend(lx); - lx->b = lx->buf; - errorat(lx->pos, "failed to expand macro %s", s->name); - return -1; -} - -/* #include */ -static -error -ppinc(Lexer *lx) -{ - int i; - byte b, end; - string s; - - Stream *f; - Io *io; - - b = getnsbyte(lx); - if (b != '"') { - end = b; - if (b != '<') { - errorat(lx->pos, "unrecognized token '%c' in include directive", b); - goto Bad; - } - end = '>'; - } else - end = '"'; - - lx->b = lx->buf; - for (;;) { - b = getbyte(lx); - if (b == end) - break; - if (b == '\n') { - errorat(lx->pos, "hit end of line before include directive completed"); - goto Bad; - } - *lx->b++ = b; - } - *lx->b = '\0'; - s = lx->buf; - intern(&s); // NOTE: we could use this to see if we already have the file - - lx->b = lx->buf; - for (i = 0; i < C.inc.len; i++) { - if (i == 0 && end == '>') - continue; - - strcpy(lx->buf, C.inc.dir[i]); - strcat(lx->buf, "/"); - - if (strcmp(lx->buf, "./") == 0) - lx->buf[0] = '\0'; - strcat(lx->buf, s); - - if (os·exists(lx->buf, ReadOK)) { - break; - } - } - if (i == C.inc.len) { - errorat(lx->pos, "could not find file '%s' on standard include search path", s); - goto Bad; - } - - io = openio(lx, lx->buf); - if (io != nil) { - pushio(lx, io); - } - - return 0; - -Bad: - ungetbyte(lx); - lx->b = lx->buf; - errorat(lx->pos, "failed include"); - ppend(lx); - return 1; -} - -/* #pragma */ -static -error -ppprag(Lexer *lx) -{ - string s; - - s = ident(lx); - if (s == nil) { - errorat(lx->pos, "failed to parse pragma identifier"); - goto Bad; - } - lx->b = lx->buf; - if (strcmp(s, "once") == 0) { - pushomit(lx, lx->io->path); - return 0; - } -Bad: - lx->b = lx->buf; - errorat(lx->pos, "unrecognized pragma '%s'", s); - ppend(lx); - return 1; -} - -/* all #if statements */ -static -error -ppif(Lexer *lx, int f) -{ - Sym *sym; - string s; - int c, l, b; - -Eval: - if (f == 0) { - b = evalmacro(lx, 1); - if (b) { - ppend(lx); - return 0; - } - goto Skip; - } - - if (f == 1) - goto Skip; - - s = ident(lx); - if (s == nil) { - errorat(lx->pos, "failed to parse preprocessor identifier"); - goto Bad; - } - intern(&s); - lx->b = lx->buf; - - sym = lookup(&lx->sym, s); - if ((!sym && (f == 3)) || (sym && (f == 2))) - return 0; - -Skip: - b = 1; - l = 0; - for (;;) { - c = getbyte(lx); - if (c != '#') { - if (!isspace(c)) - b = 0; - if (c == '\n') { - lx->pos.line++; - b = 1; - } - if (c == EOF) { - errorat(lx->pos, "EOF hit while skipping if block. Missing endif"); - goto Bad; - } - continue; - } - if (!b) - continue; - s = ident(lx); - lx->b = lx->buf; - if (!s) - continue; - - if (l == 0 && (strcmp(s, "elif") == 0)) { - f = 0; - goto Eval; - } - - if (strcmp(s, "endif") == 0) { - if (l) { - l--; - continue; - } - ppend(lx); - return 0; - } - if (strcmp(s, "if") == 0 || - strcmp(s, "ifdef") == 0 || - strcmp(s, "ifndef") == 0) { - l++; - continue; - } - - if (l == 0 && f != 1 && strcmp(s, "else") == 0) { - return 0; - } - } - -Bad: - lx->b = lx->buf; - errorat(lx->pos, "bad syntax in preprocessor conditional directive"); - ppend(lx); - return 1; -} - -/* #if */ -static -error -ppif0(Lexer *lx) -{ - return ppif(lx, 0); -} - -/* #else */ -static -error -ppif1(Lexer *lx) -{ - return ppif(lx, 1); -} - -/* #ifdef */ -static -error -ppif2(Lexer *lx) -{ - return ppif(lx, 2); -} - -/* #ifndef */ -static -error -ppif3(Lexer *lx) -{ - return ppif(lx, 3); -} - -// ----------------------------------------------------------------------- -// dispatch function - -#define DIRECTIVE(a, b, c) c, -error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES }; -#undef DIRECTIVE - -/* reads an identifier into the lexer's buffer */ -/* caller must intern */ - -error -domacro(Lexer *lx) -{ - int n; - error err; - string s; - - s = ident(lx); - intern(&s); - lx->b = lx->buf; - for (n = 0; n < NUM_DIRECTIVES; n++) { - if ((uintptr)s == (uintptr)directives[n]) { - goto Do; - } - } - errorat(lx->pos, "unrecognized directive name '%s'", s); - return 1; -Do: - err = macros[n](lx); - return err; -} - -error -dodefine(Lexer *lx, string s) -{ - int n; - byte *c, *def; - Sym *sym; - - strcpy(lx->buf, s); - c = strchr(lx->buf, '='); - if (c) { - *c++ = '\0'; - sym = lookup(&lx->sym, lx->buf); - if (sym) { - errorf("redefinition of symbol '%s'", sym->name); - return 1; - } - sym = define(&lx->sym, lx->buf, Smacro); - n = strlen(c) + 2; - sym->macro = str·makelen("", n); - str·appendbyte(&sym->macro, '\0'); - str·append(&sym->macro, c); - } else { - sym = lookup(&lx->sym, lx->buf); - if (sym) { - errorf("redefinition of symbol '%s'", sym->name); - return 1; - } - sym = define(&lx->sym, s, Smacro); - sym->macro = "\00\02"; - } - - return 0; -} -- cgit v1.2.1