From c92c89280d036240a75ff122084dd58cde473394 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sun, 17 May 2020 15:49:08 -0700 Subject: feat: prototype of preprocessor --- sys/cmd/cc/pp.c | 633 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 sys/cmd/cc/pp.c (limited to 'sys/cmd/cc') diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c new file mode 100644 index 0000000..abef3c6 --- /dev/null +++ b/sys/cmd/cc/pp.c @@ -0,0 +1,633 @@ +#include "cc.h" + +// ----------------------------------------------------------------------- +// helper functions + +static +string +ident(Lexer *lx) +{ + byte b; + + lx->b = lx->buf; + for (b = getnsbyte(lx); !isspace(b); b = getbyte(lx)) { + *lx->b++ = b; + } + *lx->b = '\0'; + + return lx->buf; +} + +static +string +identdots(Lexer *lx, int *dots) +{ + int c; + string s; + + s = ident(lx); + if (s != nil) + return s; + + c = getnsbyte(lx); + if (c != '.') { + ungetbyte(lx); + return s; + } + + if (getbyte(lx) != '.' || getbyte(lx) != '.') + errorat(lx->pos, "incorrect '...' token in macro"); + + *dots = 1; + // TODO: should only run intern once... + s = "__VA_ARGS__"; + intern(&s); + return s; +} + +static +Sym* +defmacro(Lexer *lx, string name, string macro) +{ + Sym *mac; + + mac = define(lx->sym, name, Smacro); + mac->macro = macro; + + return mac; +} + +// ----------------------------------------------------------------------- +// preprocessor definitions + +enum +{ + PPbeg = 0x02, + PParg = 0x03, + PPtok = 0x04, + PPstr = 0x05, + PPvar = 0x80, + + PPnarg = 30, +}; + +// ----------------------------------------------------------------------- +// preprocessor functions + +/* #undef */ +error +ppund(Lexer *lx) +{ + Sym *sym; + string s; + + s = ident(lx); + intern(&s); + + sym = lookup(lx->sym, s); + if (!sym) { + errorat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s); + } + return 0; +} + +/* #define */ +error +ppdef(Lexer *lx) +{ + byte b; + Sym *sym; + int i, n, dot; + string s, a, buf, args[PPnarg]; + + s = ident(lx); + if (!s) { + errorat(lx->pos, "failed to parse defined identifer"); + goto Bad; + } + intern(&s); + + sym = lookup(lx->sym, s); + if (sym) { + errorat(lx->pos, "macro redefined: '%s'", sym->name); + goto Bad; + } + + n = 0; + b = getbyte(lx); + if (b == '(') { + b = getnsbyte(lx); + if (b != ')') { + ungetbyte(lx); + for (;;) { + a = identdots(lx, &dot); + if (a == nil) { + errorat(lx->pos, "macro syntax error: improper argument"); + goto Bad; + } + if (n >= PPnarg) { + errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg); + goto Bad; + } + + args[n++] = a; + b = getnsbyte(lx); + + if (b == ')') + break; + if (b != ',') { + errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b); + goto Bad; + } + } + } + b = getbyte(lx); + } + + if (isspace(b)) + if (b != '\n') + b = getnsbyte(lx); + + buf = str·makef("%d%c", n, PPbeg); + for (;;) { + if (isalpha(b) || b == '_') { + lx->b = lx->buf; + *lx->b++ = b; + + b = getbyte(lx); + while (isalnum(b) || b == '_') { + *lx->b++ = b; + b = getbyte(lx); + } + *lx->b = '\0'; + + for (i = 0; i < n; i++) { + if (strcmp(lx->buf, args[i]) == 0) { + goto Arg; + } + } + str·appendlen(&buf, (lx->b - lx->buf), lx->buf); + continue; + Arg: + str·appendbyte(&buf, PParg); + str·appendbyte(&buf, 'a' + i); + continue; + } + if (b == '/') { + b = getbyte(lx); + if (b == '/') { + while (b = getbyte(lx), b != '\n'); + continue; + } + if (b == '*') { + b = getbyte(lx); + for (;;) { + if (b == '*') { + b = getbyte(lx); + if (b != '/') + continue; + b = getbyte(lx); + break; + } + if (b == '\n') { + errorat(lx->pos, "comment and newline found in define statement of %s", s); + break; + } + b = getbyte(lx); + } + continue; + } + str·appendbyte(&buf, '/'); + continue; + } + if (b == '\\') { + b = getbyte(lx); + /* unix */ + if (b == '\n') { + b = getbyte(lx); + continue; + } + /* windows */ + if (b == '\r') { + b = getbyte(lx); + if (b == '\n') { + b = getbyte(lx); + continue; + } + } + str·appendbyte(&buf, '\\'); + } + if (b == '\n') + break; + + if (b == '#' && n > 0) { + panicf("needs implementation"); + } + + str·appendbyte(&buf, b); + b = getbyte(lx); + if (b == '\n') + lx->pos.line++; + if (b == EOF) { + errorat(lx->pos, "eof found in macro '%s'", s); + goto Bad; + } + } + if (dot) + *buf |= PPvar; + + sym = defmacro(lx, s, buf); + return 0; +Bad: + errorat(lx->pos, "failed parse of #define macro '%s'", s); + return 1; +} + +/* macro expansion */ +int +expandmacro(Lexer *lx, Sym *s, byte *dst) +{ + int n, lv, nargs, dots; + byte b, *it, *e, *arg[PPnarg]; + + /* not a function macro */ + if (*s->macro == '\0') { + strcpy(dst, s->macro + 1); + return 0; + } + + dots = *s->macro & PPvar; + nargs = (*s->macro & ~PPvar) - 1; + + b = getnsbyte(lx); + if (b != '(') + goto Bad; + n = 0; + b = getbyte(lx); + if (b != ')') { + ungetbyte(lx); + lv = 0; + lx->b = lx->buf; + e = lx->buf + arrlen(lx->buf) - 4; + arg[n++] = lx->buf; + for (;;) { + if (lx->b >= e) + goto Nospace; + b = getbyte(lx); + if (b == '"') + for (;;) { + if (lx->b >= e) + goto Nospace; + *lx->b++ = b; + b = getbyte(lx); + if (b == '\\') { + *lx->b++ = b; + b = getbyte(lx); + continue; + } + if (b == '\n') { + errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); + goto Bad; + } + if (b == '"') + break; + } + if (b == '\'') + for (;;) { + if (lx->b >= e) + goto Nospace; + *lx->b++ = b; + b = getbyte(lx); + if (b == '\\') { + *lx->b++ = b; + b = getbyte(lx); + continue; + } + if (b == '\n') { + errorat(lx->pos, "newline found in arguments: macro '%s'", s->name); + goto Bad; + } + if (b == '"') + break; + } + if (b == '/') { + b = getbyte(lx); + switch(b) { + case '*': + for (;;) { + b = getbyte(lx); + if (b == '*') { + b = getbyte(lx); + if (b == '/') + break; + } + } + *lx->b++ = ' '; + continue; + case '/': + while ((b = getbyte(lx)) != '\n') + ; + break; + + default: + ungetbyte(lx); + b = '/'; + } + } + if (lv == 0) { + if (b == ',') { + if (n == nargs && dots) { + *lx->b++ = ','; + continue; + } + *lx->b++ = '\0'; + arg[n++] = lx->b; + if (n > nargs) + break; + continue; + } + if (b == ')') + break; + } + if (b == '\n') + b = ' '; + *lx->b++ = b; + if (b == '(') + lv++; + if (b == ')') + lx--; + } + *lx->b = '\0'; + } + + if (n != nargs) { + errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name); + *dst = '\0'; + goto Bad; + } + + if (s->macro[1] & PPbeg) { + errorat(lx->pos, "corrupted macro buffer: %s", s->name); + *dst = '\0'; + goto Bad; + } + + it = s->macro+2; + e = dst; + for (;;) { + b = *it++; + if (b == '\n') + b = ' '; + switch (b) { + case PParg: + b = *it++; + b -= 'a'; + if (b < 0 && b > n) { + errorat(lx->pos, "malformed macro index: %s", s->name); + goto Bad; + } + strcpy(dst, arg[b]); + dst += strlen(arg[b]); + break; + + case PPtok: + case PPstr: + panicf("haven't implemented"); + break; + + case '\0': + goto End; + + default: + *dst++ = b; + continue; + } + } +End: + *dst = '\0'; + return dst - e; +Nospace: + errorf("out of memory during macro expansion %s", s->name); +Bad: + errorf("failed to expand macro %s", s->name); + return -1; +} + +/* #include */ +error +ppinc(Lexer *lx) +{ + int i; + byte b, end; + string s; + + Stream *f; + Io *io; + + b = getnsbyte(lx); + if (b != '"') { + end = b; + if (b != '<') { + errorat(lx->pos, "unrecognized token '%c' in include directive", b); + goto Bad; + } + end = '>'; + } else + end = '"'; + + lx->b = lx->buf; + for (;;) { + b = getbyte(lx); + if (b == end) + break; + if (b == '\n') { + errorat(lx->pos, "hit end of line before include directive completed"); + goto Bad; + } + *lx->b++ = b; + } + *lx->b = '\0'; + s = lx->buf; + intern(&s); // NOTE: we could use this to see if we already have the file + + lx->b = lx->buf; + for (i = 0; i < C.inc.len; i++) { + if (i == 0 && end == '>') + continue; + + strcpy(lx->buf, C.inc.dir[i]); + strcat(lx->buf, "/"); + if (strcmp(lx->buf, "./") == 0) + lx->buf[0] = '\0'; + strcat(lx->buf, s); + + if (io·exists(lx->buf, ReadOK)) { + break; + } + } + if (i == C.inc.len) { + errorat(lx->pos, "could not find included file '%s' on given search paths", s); + goto Bad; + } + + io = makeio(lx->buf); + if (io != nil) { + pushio(lx, io); + } + + return 0; + +Bad: + ungetbyte(lx); + errorf("failed include"); + return 1; +} + +/* #pragma */ +error +ppprag(Lexer *lx) +{ + string s; + + s = ident(lx); + if (s == nil) { + errorat(lx->pos, "failed to parse pragma identifier"); + goto Bad; + } + if (strcmp(s, "once")) { + lx->io->kind |= IOonce; + return 0; + } +Bad: + errorat(lx->pos, "unrecognized pragma '%s'", s); + return 1; +} + +/* all #if statements */ +static +error +ppif(Lexer *lx, int f) +{ + Sym *sym; + string s; + + if (f == 1) + goto Skip; + + s = ident(lx); + if (s == nil) { + errorat(lx->pos, "failed to parse preprocessor identifier"); + goto Bad; + } + intern(&s); + + sym = lookup(lx->sym, s); + if ((!sym && f == 3) || sym && (f == 2)) + return 1; + + +Skip: + +Bad: + errorat(lx->pos, "bad syntax in preprocessor if directive"); + return 0; +} + +/* #if */ +error +ppif0(Lexer *lx) +{ + return ppif(lx, 0); +} + +/* #else */ +error +ppif1(Lexer *lx) +{ + return ppif(lx, 1); +} + +/* #ifdef */ +error +ppif2(Lexer *lx) +{ + return ppif(lx, 2); +} + +/* #ifndef */ +error +ppif3(Lexer *lx) +{ + return ppif(lx, 3); +} + +/* #endif */ +error +ppend(Lexer *lx) +{ + return 0; +} + +// ----------------------------------------------------------------------- +// dispatch function + +#define DIRECTIVE(a, b, c) c, +error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES }; +#undef DIRECTIVE + +/* reads an identifier into the lexer's buffer */ +/* caller must intern */ + +error +domacro(Lexer *lx) +{ + int n; + error err; + string s; + + s = ident(lx); + intern(&s); + for (n = 0; n < NUM_DIRECTIVES; n++) { + if ((uintptr)s == (uintptr)directives[n]) { + goto Do; + } + } + errorat(lx->pos, "unrecognized directive name '%s'", s); + return 1; +Do: + err = macros[n](lx); + return err; +} + +error +dodefine(Lexer *lx, string s) +{ + int n; + byte *c, *def; + Sym *sym; + + strcpy(lx->buf, s); + c = strchr(lx->buf, '='); + if (c) { + *c++ = '\0'; + sym = lookup(lx->sym, lx->buf); + if (sym) { + errorf("redefinition of symbol '%s'", sym->name); + return 1; + } + sym = define(lx->sym, lx->buf, Smacro); + n = strlen(c) + 2; + sym->macro = str·makelen("", n); + str·appendbyte(&sym->macro, '\0'); + str·append(&sym->macro, c); + } else { + sym = lookup(lx->sym, lx->buf); + if (sym) { + errorf("redefinition of symbol '%s'", sym->name); + return 1; + } + sym->macro = "\001"; + } + + return 0; +} -- cgit v1.2.1