aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/cc
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-17 15:49:08 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-17 15:49:08 -0700
commitc92c89280d036240a75ff122084dd58cde473394 (patch)
treefd0465930cfcafcec9888bfe64bacce74386edb8 /sys/cmd/cc
parent2104b91ab1d7ef585f01578406b0bed3da348460 (diff)
feat: prototype of preprocessor
Diffstat (limited to 'sys/cmd/cc')
-rw-r--r--sys/cmd/cc/pp.c633
1 files changed, 633 insertions, 0 deletions
diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c
new file mode 100644
index 0000000..abef3c6
--- /dev/null
+++ b/sys/cmd/cc/pp.c
@@ -0,0 +1,633 @@
+#include "cc.h"
+
+// -----------------------------------------------------------------------
+// helper functions
+
+static
+string
+ident(Lexer *lx)
+{
+ byte b;
+
+ lx->b = lx->buf;
+ for (b = getnsbyte(lx); !isspace(b); b = getbyte(lx)) {
+ *lx->b++ = b;
+ }
+ *lx->b = '\0';
+
+ return lx->buf;
+}
+
+static
+string
+identdots(Lexer *lx, int *dots)
+{
+ int c;
+ string s;
+
+ s = ident(lx);
+ if (s != nil)
+ return s;
+
+ c = getnsbyte(lx);
+ if (c != '.') {
+ ungetbyte(lx);
+ return s;
+ }
+
+ if (getbyte(lx) != '.' || getbyte(lx) != '.')
+ errorat(lx->pos, "incorrect '...' token in macro");
+
+ *dots = 1;
+ // TODO: should only run intern once...
+ s = "__VA_ARGS__";
+ intern(&s);
+ return s;
+}
+
+static
+Sym*
+defmacro(Lexer *lx, string name, string macro)
+{
+ Sym *mac;
+
+ mac = define(lx->sym, name, Smacro);
+ mac->macro = macro;
+
+ return mac;
+}
+
+// -----------------------------------------------------------------------
+// preprocessor definitions
+
+enum
+{
+ PPbeg = 0x02,
+ PParg = 0x03,
+ PPtok = 0x04,
+ PPstr = 0x05,
+ PPvar = 0x80,
+
+ PPnarg = 30,
+};
+
+// -----------------------------------------------------------------------
+// preprocessor functions
+
+/* #undef */
+error
+ppund(Lexer *lx)
+{
+ Sym *sym;
+ string s;
+
+ s = ident(lx);
+ intern(&s);
+
+ sym = lookup(lx->sym, s);
+ if (!sym) {
+ errorat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s);
+ }
+ return 0;
+}
+
+/* #define */
+error
+ppdef(Lexer *lx)
+{
+ byte b;
+ Sym *sym;
+ int i, n, dot;
+ string s, a, buf, args[PPnarg];
+
+ s = ident(lx);
+ if (!s) {
+ errorat(lx->pos, "failed to parse defined identifer");
+ goto Bad;
+ }
+ intern(&s);
+
+ sym = lookup(lx->sym, s);
+ if (sym) {
+ errorat(lx->pos, "macro redefined: '%s'", sym->name);
+ goto Bad;
+ }
+
+ n = 0;
+ b = getbyte(lx);
+ if (b == '(') {
+ b = getnsbyte(lx);
+ if (b != ')') {
+ ungetbyte(lx);
+ for (;;) {
+ a = identdots(lx, &dot);
+ if (a == nil) {
+ errorat(lx->pos, "macro syntax error: improper argument");
+ goto Bad;
+ }
+ if (n >= PPnarg) {
+ errorat(lx->pos, "macro syntax error: too many arguments: %d > %d", n, PPnarg);
+ goto Bad;
+ }
+
+ args[n++] = a;
+ b = getnsbyte(lx);
+
+ if (b == ')')
+ break;
+ if (b != ',') {
+ errorat(lx->pos, "macro syntax error: bad token in argument '%b'", b);
+ goto Bad;
+ }
+ }
+ }
+ b = getbyte(lx);
+ }
+
+ if (isspace(b))
+ if (b != '\n')
+ b = getnsbyte(lx);
+
+ buf = str·makef("%d%c", n, PPbeg);
+ for (;;) {
+ if (isalpha(b) || b == '_') {
+ lx->b = lx->buf;
+ *lx->b++ = b;
+
+ b = getbyte(lx);
+ while (isalnum(b) || b == '_') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ }
+ *lx->b = '\0';
+
+ for (i = 0; i < n; i++) {
+ if (strcmp(lx->buf, args[i]) == 0) {
+ goto Arg;
+ }
+ }
+ str·appendlen(&buf, (lx->b - lx->buf), lx->buf);
+ continue;
+ Arg:
+ str·appendbyte(&buf, PParg);
+ str·appendbyte(&buf, 'a' + i);
+ continue;
+ }
+ if (b == '/') {
+ b = getbyte(lx);
+ if (b == '/') {
+ while (b = getbyte(lx), b != '\n');
+ continue;
+ }
+ if (b == '*') {
+ b = getbyte(lx);
+ for (;;) {
+ if (b == '*') {
+ b = getbyte(lx);
+ if (b != '/')
+ continue;
+ b = getbyte(lx);
+ break;
+ }
+ if (b == '\n') {
+ errorat(lx->pos, "comment and newline found in define statement of %s", s);
+ break;
+ }
+ b = getbyte(lx);
+ }
+ continue;
+ }
+ str·appendbyte(&buf, '/');
+ continue;
+ }
+ if (b == '\\') {
+ b = getbyte(lx);
+ /* unix */
+ if (b == '\n') {
+ b = getbyte(lx);
+ continue;
+ }
+ /* windows */
+ if (b == '\r') {
+ b = getbyte(lx);
+ if (b == '\n') {
+ b = getbyte(lx);
+ continue;
+ }
+ }
+ str·appendbyte(&buf, '\\');
+ }
+ if (b == '\n')
+ break;
+
+ if (b == '#' && n > 0) {
+ panicf("needs implementation");
+ }
+
+ str·appendbyte(&buf, b);
+ b = getbyte(lx);
+ if (b == '\n')
+ lx->pos.line++;
+ if (b == EOF) {
+ errorat(lx->pos, "eof found in macro '%s'", s);
+ goto Bad;
+ }
+ }
+ if (dot)
+ *buf |= PPvar;
+
+ sym = defmacro(lx, s, buf);
+ return 0;
+Bad:
+ errorat(lx->pos, "failed parse of #define macro '%s'", s);
+ return 1;
+}
+
+/* macro expansion */
+int
+expandmacro(Lexer *lx, Sym *s, byte *dst)
+{
+ int n, lv, nargs, dots;
+ byte b, *it, *e, *arg[PPnarg];
+
+ /* not a function macro */
+ if (*s->macro == '\0') {
+ strcpy(dst, s->macro + 1);
+ return 0;
+ }
+
+ dots = *s->macro & PPvar;
+ nargs = (*s->macro & ~PPvar) - 1;
+
+ b = getnsbyte(lx);
+ if (b != '(')
+ goto Bad;
+ n = 0;
+ b = getbyte(lx);
+ if (b != ')') {
+ ungetbyte(lx);
+ lv = 0;
+ lx->b = lx->buf;
+ e = lx->buf + arrlen(lx->buf) - 4;
+ arg[n++] = lx->buf;
+ for (;;) {
+ if (lx->b >= e)
+ goto Nospace;
+ b = getbyte(lx);
+ if (b == '"')
+ for (;;) {
+ if (lx->b >= e)
+ goto Nospace;
+ *lx->b++ = b;
+ b = getbyte(lx);
+ if (b == '\\') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ continue;
+ }
+ if (b == '\n') {
+ errorat(lx->pos, "newline found in arguments: macro '%s'", s->name);
+ goto Bad;
+ }
+ if (b == '"')
+ break;
+ }
+ if (b == '\'')
+ for (;;) {
+ if (lx->b >= e)
+ goto Nospace;
+ *lx->b++ = b;
+ b = getbyte(lx);
+ if (b == '\\') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ continue;
+ }
+ if (b == '\n') {
+ errorat(lx->pos, "newline found in arguments: macro '%s'", s->name);
+ goto Bad;
+ }
+ if (b == '"')
+ break;
+ }
+ if (b == '/') {
+ b = getbyte(lx);
+ switch(b) {
+ case '*':
+ for (;;) {
+ b = getbyte(lx);
+ if (b == '*') {
+ b = getbyte(lx);
+ if (b == '/')
+ break;
+ }
+ }
+ *lx->b++ = ' ';
+ continue;
+ case '/':
+ while ((b = getbyte(lx)) != '\n')
+ ;
+ break;
+
+ default:
+ ungetbyte(lx);
+ b = '/';
+ }
+ }
+ if (lv == 0) {
+ if (b == ',') {
+ if (n == nargs && dots) {
+ *lx->b++ = ',';
+ continue;
+ }
+ *lx->b++ = '\0';
+ arg[n++] = lx->b;
+ if (n > nargs)
+ break;
+ continue;
+ }
+ if (b == ')')
+ break;
+ }
+ if (b == '\n')
+ b = ' ';
+ *lx->b++ = b;
+ if (b == '(')
+ lv++;
+ if (b == ')')
+ lx--;
+ }
+ *lx->b = '\0';
+ }
+
+ if (n != nargs) {
+ errorat(lx->pos, "number of arguments don't match macro definition: %s", s->name);
+ *dst = '\0';
+ goto Bad;
+ }
+
+ if (s->macro[1] & PPbeg) {
+ errorat(lx->pos, "corrupted macro buffer: %s", s->name);
+ *dst = '\0';
+ goto Bad;
+ }
+
+ it = s->macro+2;
+ e = dst;
+ for (;;) {
+ b = *it++;
+ if (b == '\n')
+ b = ' ';
+ switch (b) {
+ case PParg:
+ b = *it++;
+ b -= 'a';
+ if (b < 0 && b > n) {
+ errorat(lx->pos, "malformed macro index: %s", s->name);
+ goto Bad;
+ }
+ strcpy(dst, arg[b]);
+ dst += strlen(arg[b]);
+ break;
+
+ case PPtok:
+ case PPstr:
+ panicf("haven't implemented");
+ break;
+
+ case '\0':
+ goto End;
+
+ default:
+ *dst++ = b;
+ continue;
+ }
+ }
+End:
+ *dst = '\0';
+ return dst - e;
+Nospace:
+ errorf("out of memory during macro expansion %s", s->name);
+Bad:
+ errorf("failed to expand macro %s", s->name);
+ return -1;
+}
+
+/* #include */
+error
+ppinc(Lexer *lx)
+{
+ int i;
+ byte b, end;
+ string s;
+
+ Stream *f;
+ Io *io;
+
+ b = getnsbyte(lx);
+ if (b != '"') {
+ end = b;
+ if (b != '<') {
+ errorat(lx->pos, "unrecognized token '%c' in include directive", b);
+ goto Bad;
+ }
+ end = '>';
+ } else
+ end = '"';
+
+ lx->b = lx->buf;
+ for (;;) {
+ b = getbyte(lx);
+ if (b == end)
+ break;
+ if (b == '\n') {
+ errorat(lx->pos, "hit end of line before include directive completed");
+ goto Bad;
+ }
+ *lx->b++ = b;
+ }
+ *lx->b = '\0';
+ s = lx->buf;
+ intern(&s); // NOTE: we could use this to see if we already have the file
+
+ lx->b = lx->buf;
+ for (i = 0; i < C.inc.len; i++) {
+ if (i == 0 && end == '>')
+ continue;
+
+ strcpy(lx->buf, C.inc.dir[i]);
+ strcat(lx->buf, "/");
+ if (strcmp(lx->buf, "./") == 0)
+ lx->buf[0] = '\0';
+ strcat(lx->buf, s);
+
+ if (io·exists(lx->buf, ReadOK)) {
+ break;
+ }
+ }
+ if (i == C.inc.len) {
+ errorat(lx->pos, "could not find included file '%s' on given search paths", s);
+ goto Bad;
+ }
+
+ io = makeio(lx->buf);
+ if (io != nil) {
+ pushio(lx, io);
+ }
+
+ return 0;
+
+Bad:
+ ungetbyte(lx);
+ errorf("failed include");
+ return 1;
+}
+
+/* #pragma */
+error
+ppprag(Lexer *lx)
+{
+ string s;
+
+ s = ident(lx);
+ if (s == nil) {
+ errorat(lx->pos, "failed to parse pragma identifier");
+ goto Bad;
+ }
+ if (strcmp(s, "once")) {
+ lx->io->kind |= IOonce;
+ return 0;
+ }
+Bad:
+ errorat(lx->pos, "unrecognized pragma '%s'", s);
+ return 1;
+}
+
+/* all #if statements */
+static
+error
+ppif(Lexer *lx, int f)
+{
+ Sym *sym;
+ string s;
+
+ if (f == 1)
+ goto Skip;
+
+ s = ident(lx);
+ if (s == nil) {
+ errorat(lx->pos, "failed to parse preprocessor identifier");
+ goto Bad;
+ }
+ intern(&s);
+
+ sym = lookup(lx->sym, s);
+ if ((!sym && f == 3) || sym && (f == 2))
+ return 1;
+
+
+Skip:
+
+Bad:
+ errorat(lx->pos, "bad syntax in preprocessor if directive");
+ return 0;
+}
+
+/* #if */
+error
+ppif0(Lexer *lx)
+{
+ return ppif(lx, 0);
+}
+
+/* #else */
+error
+ppif1(Lexer *lx)
+{
+ return ppif(lx, 1);
+}
+
+/* #ifdef */
+error
+ppif2(Lexer *lx)
+{
+ return ppif(lx, 2);
+}
+
+/* #ifndef */
+error
+ppif3(Lexer *lx)
+{
+ return ppif(lx, 3);
+}
+
+/* #endif */
+error
+ppend(Lexer *lx)
+{
+ return 0;
+}
+
+// -----------------------------------------------------------------------
+// dispatch function
+
+#define DIRECTIVE(a, b, c) c,
+error (*macros[NUM_DIRECTIVES])(Lexer*) = { DIRECTIVES };
+#undef DIRECTIVE
+
+/* reads an identifier into the lexer's buffer */
+/* caller must intern */
+
+error
+domacro(Lexer *lx)
+{
+ int n;
+ error err;
+ string s;
+
+ s = ident(lx);
+ intern(&s);
+ for (n = 0; n < NUM_DIRECTIVES; n++) {
+ if ((uintptr)s == (uintptr)directives[n]) {
+ goto Do;
+ }
+ }
+ errorat(lx->pos, "unrecognized directive name '%s'", s);
+ return 1;
+Do:
+ err = macros[n](lx);
+ return err;
+}
+
+error
+dodefine(Lexer *lx, string s)
+{
+ int n;
+ byte *c, *def;
+ Sym *sym;
+
+ strcpy(lx->buf, s);
+ c = strchr(lx->buf, '=');
+ if (c) {
+ *c++ = '\0';
+ sym = lookup(lx->sym, lx->buf);
+ if (sym) {
+ errorf("redefinition of symbol '%s'", sym->name);
+ return 1;
+ }
+ sym = define(lx->sym, lx->buf, Smacro);
+ n = strlen(c) + 2;
+ sym->macro = str·makelen("", n);
+ str·appendbyte(&sym->macro, '\0');
+ str·append(&sym->macro, c);
+ } else {
+ sym = lookup(lx->sym, lx->buf);
+ if (sym) {
+ errorf("redefinition of symbol '%s'", sym->name);
+ return 1;
+ }
+ sym->macro = "\001";
+ }
+
+ return 0;
+}