#include "cc.h" // ----------------------------------------------------------------------- // helper functions static void pushomit(Lexer *lx, string omit) { if (lx->omit.len == lx->omit.cap) { lx->omit.cap += 20; lx->omit.path = realloc(lx->omit.path, lx->omit.cap*sizeof(*lx->omit.path)); } lx->omit.path[lx->omit.len++] = omit; } // NOTE: The iterator of lexer lx->b IS NOT reset. // Its the caller's responsibility. static string ident(Lexer *lx) { int b; byte *s; b = getnsbyte(lx); if (!isalpha(b) && b != '_' && b < RuneSelf) { ungetbyte(lx); return ""; } s = lx->b; for (;;) { *lx->b++ = b; b = getbyte(lx); if (isalnum(b) || b == '_' || b >= RuneSelf) continue; ungetbyte(lx); break; } *lx->b++ = '\0'; return s; } static string identdots(Lexer *lx, int *dots) { int c; byte *s; s = ident(lx); if (*s != '\0') return s; c = getnsbyte(lx); if (c != '.') { ungetbyte(lx); return s; } if (getbyte(lx) != '.' || getbyte(lx) != '.') errorat(lx->pos, "incorrect '...' token in macro"); *dots = 1; // TODO: should only run intern once... s = "__VA_ARGS__"; intern(&s); return s; } static Sym* defmacro(Lexer *lx, string name, string macro) { Sym *mac; // printf("DEFINING MACRO %s ON LINE %d, file %s\n", name, lx->pos.line, os·basename(lx->pos.path)); mac = define(&lx->sym, name, Smacro); mac->macro = macro; return mac; } static vlong evalmacro(Lexer *lx, byte prec); static vlong opand(Lexer *lx) { int b; vlong v; string s; Token tok; Sym *sym; b = getnsbyte(lx); if (b == '\n') { errorat(lx->pos, "new line in macro expression"); return 0; } ungetbyte(lx); tok = lex(lx); switch (tok.kind & Vmask) { case Aneg: return ~opand(lx); case Anot: return !opand(lx); case Alparen: v = evalmacro(lx, 1); tok = lex(lx); if (!(tok.kind & Arparen)) { errorat(lx->pos, "unbalanced parenthesis in macro expression"); return 0; } return v; case Alit: switch (tok.kind & ~Vmask) { case Vint: case Vlong: case Vvlong: return tok.val.i; case Vun|Vint : case Vun|Vlong : case Vun|Vvlong: return tok.val.ui; case Vrune: return tok.val.r; case Vchar: return tok.val.c; default: errorat(lx->pos, "invalid literal of type '%s' in conditional macro", tokens[tok.kind & ~Vmask]); return 0; } case Aident: sym = lookup(&lx->sym, tok.val.s); if (!sym) { /* calling lex directly would expand the operand here * manually lex the result */ if (strcmp(tok.val.s, "defined") == 0) { b = getnsbyte(lx); if (b == '\n') { errorat(lx->pos, "new line in defined operand"); return 0; } s = lx->buf; if (b == '(') { b = getnsbyte(lx); while (b != ')') { if (b == '\n') { errorat(lx->pos, "new line inside defined operand"); return 0; } if (b == '(') { errorat(lx->pos, "nested parens not allowed inside defined operator"); return 0; } if (!isspace(b)) *s++ = b; b = getbyte(lx); } } else { while (!isspace(b)) { *s++ = b; b = getbyte(lx); if (b == '\n') { errorat(lx->pos, "new line inside defined operand"); return 0; } } } *s = '\0'; s = lx->buf; intern(&s); return lookup(&lx->sym, s) != nil; } return 0; } panicf("unreachable"); return 1; default: errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]); return 0; } } // recursively evaluates a macro // reduced set of operators allowed here static vlong evalmacro(Lexer *lx, byte prec) { int b; vlong l, r; Token tok; l = opand(lx); for (;;) { b = getnsbyte(lx); // NOTE: Either this or we pass in what are stopping byte is // New line should always stop us... // Is there any case where we SHOULDN'T STOP ON ')'? if (b == '\n' || b == ')') { ungetbyte(lx); break; } ungetbyte(lx); tok = lex(lx); // simplified jump table of precedence // unpacked to evaluate inline switch (tok.kind & Vmask) { case Astar: if (prec > 10) { ungetbyte(lx); return l; } r = evalmacro(lx, 10 + 1); l = l * r; continue; case Adiv: if (prec > 10) { ungetbyte(lx); return l; } r = evalmacro(lx, 10 + 1); l = l / r; continue; case Amod: if (prec > 10) { ungetbyte(lx); return l; } r = evalmacro(lx, 10 + 1); l = l % r; continue; case Aadd: if (prec > 9) { ungetbyte(lx); return l; } r = evalmacro(lx, 9 + 1); l = l + r; continue; case Asub: if (prec > 9) { ungetbyte(lx); return l; } r = evalmacro(lx, 9 + 1); l = l - r; continue; case Alsft: if (prec > 8) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 8 + 1); l = l << r; continue; case Arsft: if (prec > 8) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 8 + 1); l = l >> r; continue; case Alt: if (prec > 7) { ungetbyte(lx); return l; } r = evalmacro(lx, 7 + 1); l = l < r; continue; case Agt: if (prec > 7) { ungetbyte(lx); return l; } r = evalmacro(lx, 7 + 1); l = l > r; continue; case Agteq: if (prec > 7) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 7 + 1); l = l >= r; continue; case Alteq: if (prec > 7) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 7 + 1); l = l >= r; continue; case Aeq: if (prec > 6) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 6 + 1); l = l == r; continue; case Aneq: if (prec > 6) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 6 + 1); l = l != r; continue; case Aand: if (prec > 5) { ungetbyte(lx); return l; } r = evalmacro(lx, 5 + 1); l = l & r; continue; case Axor: if (prec > 4) { ungetbyte(lx); return l; } r = evalmacro(lx, 4 + 1); l = l ^ r; continue; case Aor: if (prec > 3) { ungetbyte(lx); return l; } r = evalmacro(lx, 3 + 1); l = l | r; continue; case Aandand: if (prec > 2) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 2 + 1); l = l && r; continue; case Aoror: if (prec > 1) { ungetbyte(lx); ungetbyte(lx); return l; } r = evalmacro(lx, 1 + 1); l = l || r; continue; default: errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]); abort(); return 0; } } return l; } // ----------------------------------------------------------------------- // preprocessor magic numbers enum { PPbeg = 0x02, PParg = 0x03, PPcat = 0x04, PPstr = 0x05, PPnarg = 30, }; #define PPvar 0x80u // ----------------------------------------------------------------------- // preprocessor functions /* #endif */ static error ppend(Lexer *lx) { int b; do { b = getnsbyte(lx); } while (b > 0 && b != '\n'); if (b == '\n') lx->pos.line++; return 0; } /* #undef */ static error ppund(Lexer *lx) { string s; error err; s = ident(lx); intern(&s); lx->b = lx->buf; err = forget(&lx->sym, s); if (err) warnat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); ppend(lx); return 0; } /* #define */ static error ppdef(Lexer *lx) { int b; Sym *sym; int i, j, n, dot; string s, a, base, end, buf, args[PPnarg]; s = ident(lx); if (!s) { errorat(lx->pos, "failed to parse defined identifer"); goto Bad; } intern(&s); printf("DEFINING %s\n", s); lx->b = lx->buf; sym = lookup(&lx->sym, s); if (sym) warnat(lx->pos, "macro redefined: '%s'", sym->name); n = 0; dot = 0; b = getbyte(lx); if (b == '(') { b = getnsbyte(lx); if (b != ')') { ungetbyte(lx); for (;;) { // NOTE: This is a pointer into the lx->buffer. // Can't reset lx->b while we hold the args! a = identdots(lx, &dot); if (a == nil) { errorat(lx->pos, "macro syntax error: improper argument"); goto Bad; } if (n >= PPnarg) { errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg); goto Bad; } args[n++] = a; b = getnsbyte(lx); if (b == ')') break; if (b != ',') { errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b); goto Bad; } } } b = getbyte(lx); } if (isspace(b)) if (b != '\n') b = getnsbyte(lx); base = lx->b; end = lx->buf + arrlen(lx->buf); if (base >= end) { errorat(lx->pos, "out of macro buffer space!"); goto Bad; } buf = str·makef("%c%c", n, PPbeg); for (;;) { if (isalpha(b) || b == '_') { lx->b = base; *lx->b++ = b; b = getbyte(lx); while (isalnum(b) || b == '_') { *lx->b++ = b; if (lx->b >= end) { errorat(lx->pos, "out of macro buffer space!"); goto Bad; } b = getbyte(lx); } *lx->b++ = '\0'; for (i = 0; i < n; i++) { if (strcmp(base, args[i]) == 0) { goto Arg; } } str·appendlen(&buf, (lx->b - base - 1), base); continue; Arg: str·appendbyte(&buf, PParg); str·appendbyte(&buf, 'a' + i); continue; } if (b == '/') { b = getbyte(lx); if (b == '/') { while (b = getbyte(lx), b != '\n'); continue; } if (b == '*') { b = getbyte(lx); for (;;) { if (b == '*') { b = getbyte(lx); if (b != '/') continue; b = getbyte(lx); break; } if (b == '\n') { errorat(lx->pos, "comment and newline found in define statement of %s", s); break; } b = getbyte(lx); } continue; } str·appendbyte(&buf, '/'); continue; } if (b == '\\') { b = getbyte(lx); /* unix */ if (b == '\n') { lx->pos.line++; b = getbyte(lx); continue; } /* windows */ if (b == '\r') { b = getbyte(lx); if (b == '\n') { lx->pos.line++; b = getbyte(lx); continue; } } str·appendbyte(&buf, '\\'); } if (b == '\n') { lx->pos.line++; break; } if (b == '#') { b = getnsbyte(lx); if (b == '#') { str·appendbyte(&buf, PPcat); b = getbyte(lx); continue; } lx->b = base; while (isalnum(b) || b == '_') { *lx->b++ = b; b = getbyte(lx); } *lx->b = '\0'; for (i = 0; i < n; i++) { if (strcmp(base, args[i]) == 0) goto Str; } errorat(lx->pos, "macro operator '#' must be followed by a valid variable identifier"); goto Bad; Str: str·appendbyte(&buf, PPstr); str·appendbyte(&buf, 'a' + i); continue; } str·appendbyte(&buf, b); b = getbyte(lx); if (b == EOF) { errorat(lx->pos, "eof found in macro '%s'", s); goto Bad; } } if (dot) *buf |= PPvar; lx->b = lx->buf; sym = defmacro(lx, s, buf); return 0; Bad: errorat(lx->pos, "failed parse of #define macro '%s'", s); lx->b = lx->buf; ppend(lx); return 1; } /* macro expansion */ int expandmacro(Lexer *lx, Sym *s, byte *dst) { int n, lv, nargs, dots; byte b, *it, *e, *arg[PPnarg]; /* not a function macro */ if (s->macro[0] == '\0') { if (s->macro[1] != PPbeg) { errorat(lx->pos, "malformed macro"); goto Bad; } strcpy(dst, s->macro + 2); return str·len(s->macro)-2; } dots = (ubyte)s->macro[0] & PPvar; nargs = (ubyte)s->macro[0] & (~PPvar); b = getnsbyte(lx); if (b != '(') { errorat(lx->pos, "macro function not given arguments"); goto Bad; } n = 0; b = getbyte(lx); if (b != ')') { ungetbyte(lx); lv = 0; lx->b = lx->buf; e = lx->buf + arrlen(lx->buf) - 4; arg[n++] = lx->buf; for (;;) { if (lx->b >= e) goto Nospace; b = getbyte(lx); if (b == '"') for (;;) { if (lx->b >= e) goto Nospace; *lx->b++ = b; b = getbyte(lx); if (b == '\\') { *lx->b++ = b; b = getbyte(lx); continue; } if (b == '\n') { errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); goto Bad; } if (b == '"') break; } if (b == '\'') for (;;) { if (lx->b >= e) goto Nospace; *lx->b++ = b; b = getbyte(lx); if (b == '\\') { *lx->b++ = b; b = getbyte(lx); continue; } if (b == '\n') { errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); goto Bad; } if (b == '"') break; } if (b == '/') { b = getbyte(lx); switch(b) { case '*': for (;;) { b = getbyte(lx); if (b == '*') { b = getbyte(lx); if (b == '/') break; } } *lx->b++ = ' '; continue; case '/': while ((b = getbyte(lx)) != '\n') ; break; default: ungetbyte(lx); b = '/'; } } if (lv == 0) { if (b == ',') { if (n == nargs && dots) { *lx->b++ = ','; continue; } *lx->b++ = '\0'; arg[n++] = lx->b; if (n > nargs) break; continue; } if (b == ')') break; } if (b == '\n') b = ' '; *lx->b++ = b; if (b == '(') lv++; if (b == ')') lv--; } *lx->b = '\0'; } if (n != nargs) { errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name); *dst = '\0'; goto Bad; } if (s->macro[1] != PPbeg) { errorat(lx->pos, "corrupted macro buffer: %s", s->name); *dst = '\0'; goto Bad; } it = s->macro+2; e = dst; for (;;) { b = *it++; if (b == '\n') b = ' '; switch (b) { case PParg: b = *it++; b -= 'a'; if (b < 0 && b > n) { errorat(lx->pos, "malformed macro index: %s", s->name); goto Bad; } strcpy(dst, arg[b]); dst += strlen(arg[b]); break; case PPstr: b = *it++; b -= 'a'; if (b < 0 && b > n) { errorat(lx->pos, "malformed macro index: %s", s->name); goto Bad; } *dst++ = '"'; strcpy(dst, arg[b]); *dst++ = '"'; break; case PPcat: continue; case '\0': goto End; default: *dst++ = b; continue; } } End: *dst = '\0'; return dst - e; Nospace: errorat(lx->pos, "out of memory during macro expansion %s", s->name); Bad: ppend(lx); lx->b = lx->buf; errorat(lx->pos, "failed to expand macro %s", s->name); return -1; } /* #include */ static error ppinc(Lexer *lx) { int i; byte b, end; string s; Stream *f; Io *io; b = getnsbyte(lx); if (b != '"') { end = b; if (b != '<') { errorat(lx->pos, "unrecognized token '%c' in include directive", b); goto Bad; } end = '>'; } else end = '"'; lx->b = lx->buf; for (;;) { b = getbyte(lx); if (b == end) break; if (b == '\n') { errorat(lx->pos, "hit end of line before include directive completed"); goto Bad; } *lx->b++ = b; } *lx->b = '\0'; s = lx->buf; intern(&s); // NOTE: we could use this to see if we already have the file lx->b = lx->buf; for (i = 0; i < C.inc.len; i++) { if (i == 0 && end == '>') continue; strcpy(lx->buf, C.inc.dir[i]); strcat(lx->buf, "/"); if (strcmp(lx->buf, "./") == 0) lx->buf[0] = '\0'; strcat(lx->buf, s); if (os·exists(lx->buf, ReadOK)) { break; } } if (i == C.inc.len) { errorat(lx->pos, "could not find file '%s' on standard include search path", s); goto Bad; } io = openio(lx, lx->buf); if (io != nil) { pushio(lx, io); } return 0; Bad: ungetbyte(lx); lx->b = lx->buf; errorat(lx->pos, "failed include"); ppend(lx); return 1; } /* #pragma */ static error ppprag(Lexer *lx) { string s; s = ident(lx); if (s == nil) { errorat(lx->pos, "failed to parse pragma identifier"); goto Bad; } lx->b = lx->buf; if (strcmp(s, "once") == 0) { pushomit(lx, lx->io->path); return 0; } Bad: lx->b = lx->buf; errorat(lx->pos, "unrecognized pragma '%s'", s); ppend(lx); return 1; } /* all #if statements */ static error ppif(Lexer *lx, int f) { Sym *sym; string s; int c, l, b; Eval: if (f == 0) { b = evalmacro(lx, 1); if (b) { ppend(lx); return 0; } goto Skip; } if (f == 1) goto Skip; s = ident(lx); if (s == nil) { errorat(lx->pos, "failed to parse preprocessor identifier"); goto Bad; } intern(&s); lx->b = lx->buf; sym = lookup(&lx->sym, s); if ((!sym && (f == 3)) || (sym && (f == 2))) return 0; Skip: b = 1; l = 0; for (;;) { c = getbyte(lx); if (c != '#') { if (!isspace(c)) b = 0; if (c == '\n') { lx->pos.line++; b = 1; } if (c == EOF) { errorat(lx->pos, "EOF hit while skipping if block. Missing endif"); goto Bad; } continue; } if (!b) continue; s = ident(lx); lx->b = lx->buf; if (!s) continue; if (l == 0 && (strcmp(s, "elif") == 0)) { f = 0; goto Eval; } if (strcmp(s, "endif") == 0) { if (l) { l--; continue; } ppend(lx); return 0; } if (strcmp(s, "if") == 0 || strcmp(s, "ifdef") == 0 || strcmp(s, "ifndef") == 0) { l++; continue; } if (l == 0 && f != 1 && strcmp(s, "else") == 0) { return 0; } } Bad: lx->b = lx->buf; errorat(lx->pos, "bad syntax in preprocessor conditional directive"); ppend(lx); return 1; } /* #if */ static error ppif0(Lexer *lx) { return ppif(lx, 0); } /* #else */ static error ppif1(Lexer *lx) { return ppif(lx, 1); } /* #ifdef */ static error ppif2(Lexer *lx) { return ppif(lx, 2); } /* #ifndef */ static error ppif3(Lexer *lx) { return ppif(lx, 3); } // ----------------------------------------------------------------------- // dispatch function #define DIRECTIVE(a, b, c) c, error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES }; #undef DIRECTIVE /* reads an identifier into the lexer's buffer */ /* caller must intern */ error domacro(Lexer *lx) { int n; error err; string s; s = ident(lx); intern(&s); lx->b = lx->buf; for (n = 0; n < NUM_DIRECTIVES; n++) { if ((uintptr)s == (uintptr)directives[n]) { goto Do; } } errorat(lx->pos, "unrecognized directive name '%s'", s); return 1; Do: err = macros[n](lx); return err; } error dodefine(Lexer *lx, string s) { int n; byte *c, *def; Sym *sym; strcpy(lx->buf, s); c = strchr(lx->buf, '='); if (c) { *c++ = '\0'; sym = lookup(&lx->sym, lx->buf); if (sym) { errorf("redefinition of symbol '%s'", sym->name); return 1; } sym = define(&lx->sym, lx->buf, Smacro); n = strlen(c) + 2; sym->macro = str·makelen("", n); str·appendbyte(&sym->macro, '\0'); str·append(&sym->macro, c); } else { sym = lookup(&lx->sym, lx->buf); if (sym) { errorf("redefinition of symbol '%s'", sym->name); return 1; } sym = define(&lx->sym, s, Smacro); sym->macro = "\00\02"; } return 0; }