aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sys/cmd/cc/cc.c1
-rw-r--r--sys/cmd/cc/cc.h16
-rw-r--r--sys/cmd/cc/lex.c42
-rw-r--r--sys/cmd/cc/pp.c438
-rw-r--r--sys/libn/bufio.c4
-rw-r--r--sys/libn/string.c8
6 files changed, 460 insertions, 49 deletions
diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c
index 3dae0fd..6bc363c 100644
--- a/sys/cmd/cc/cc.c
+++ b/sys/cmd/cc/cc.c
@@ -114,6 +114,7 @@ openio(byte *path)
return it;
}
}
+ printf("OPENING PATH %s\n", path);
if ((C.io - C.iostk) >= arrlen(C.iostk)-1)
panicf("out of I/O space!");
diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h
index 9871e99..84f173f 100644
--- a/sys/cmd/cc/cc.h
+++ b/sys/cmd/cc/cc.h
@@ -100,6 +100,7 @@ void errorat(Pos x, byte *fmt, ...);
DIRECTIVE(Ddefine,"define", ppdef) \
DIRECTIVE(Dundef,"undef", ppund) \
DIRECTIVE(Dif,"if", ppif0) \
+ DIRECTIVE(Delif,"elif", ppif1) \
DIRECTIVE(Delse, "else", ppif1) \
DIRECTIVE(Difdef,"ifdef", ppif2) \
DIRECTIVE(Difndef,"ifndef", ppif3) \
@@ -184,6 +185,8 @@ enum
Vusgn = iota(12),
Vfloat = iota(13),
Vstr = iota(14),
+
+ Vmask = Vchar - 1,
};
#undef TOK
@@ -221,6 +224,17 @@ struct Sym
};
};
+struct SymTab
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ string *keys;
+ Sym **vals;
+};
+
Sym *lookup(SymTab *tab, string ident);
Sym *define(SymTab *tab, string ident, int kind);
@@ -228,7 +242,7 @@ struct Lexer
{
Pos pos;
Io *io;
- SymTab *sym;
+ SymTab sym;
byte *b;
byte buf[2*1024];
};
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
index 6d3da59..90f282a 100644
--- a/sys/cmd/cc/lex.c
+++ b/sys/cmd/cc/lex.c
@@ -26,23 +26,32 @@ puttok(Token tok)
// simple wrappers
byte
-getbyte(Lexer *l)
+getbyte(Lexer *lx)
{
- return bufio·getbyte(&l->io->buf);
+ return bufio·getbyte(&lx->io->buf);
}
byte
-getnsbyte(Lexer *l)
+getnsbyte(Lexer *lx)
{
- byte b;
- while (b = bufio·getbyte(&l->io->buf), isspace(b));
+ int b;
+ b = getbyte(lx);
+ for (;;) {
+ if (b >= RuneSelf || !isspace(b))
+ return b;
+ if (b == '\n') {
+ lx->pos.line++;
+ return b;
+ }
+ b = getbyte(lx);
+ }
return b;
}
rune
-getrune(Lexer *l)
+getrune(Lexer *lx)
{
- return bufio·getrune(&l->io->buf);
+ return bufio·getrune(&lx->io->buf);
}
byte
@@ -242,7 +251,7 @@ GetByte:
Dispatch:
tok.pos.beg = lx->pos;
- if (b >= RuneSelf || isalpha(b))
+ if (b >= RuneSelf || isalpha(b) || b == '_')
goto TAlpha;
if (isdigit(b))
goto TNum;
@@ -434,6 +443,7 @@ Dispatch:
r = b;
n = 10;
s = lx->buf;
+ ungetbyte(lx);
if (*s == '0') {
b = *++s;
switch (b) {
@@ -510,6 +520,7 @@ Dispatch:
}
*lx->b = '\0';
d = strtod(lx->buf, nil);
+ ungetbyte(lx);
tok.kind = Alit | Vfloat;
tok.val.f = d;
@@ -539,6 +550,8 @@ Dispatch:
u = getbyte(lx);
}
*s = '\0';
+ ungetbyte(lx);
+
tok.kind = Aident;
tok.val.s = lx->buf;
@@ -547,7 +560,7 @@ Dispatch:
tok.kind = Akeywd;
}
- sym = lookup(lx->sym, tok.val.s);
+ sym = lookup(&lx->sym, tok.val.s);
if (sym) {
io = makeio();
io->buf.end += expandmacro(lx, sym, io->b);
@@ -606,11 +619,6 @@ popio(Lexer *lx)
#define PTR_HASH(p) (uintptr)(p)
#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
-struct SymTab
-{
- MAP_STRUCT_BODY(string, Sym*);
-};
-
Sym*
lookup(SymTab *tab, string ident)
{
@@ -640,14 +648,16 @@ putsym(SymTab *tab, Sym *sym, error *err)
Sym*
define(SymTab *tab, string name, int kind)
{
- Sym *sym;
+ int i;
+ Sym *sym;
error err;
sym = mem·arenaalloc(C.heap, 1, sizeof(*sym));
sym->name = name;
sym->kind = kind;
- putsym(tab, sym, &err);
+ i = putsym(tab, sym, &err);
+ tab->vals[i] = sym;
return sym;
}
diff --git a/sys/cmd/cc/pp.c b/sys/cmd/cc/pp.c
index d1a5d83..ceb9d66 100644
--- a/sys/cmd/cc/pp.c
+++ b/sys/cmd/cc/pp.c
@@ -7,13 +7,24 @@ static
string
ident(Lexer *lx)
{
- byte b;
+ int b;
+ byte *s;
- lx->b = lx->buf;
- for (b = getnsbyte(lx); !isspace(b); b = getbyte(lx)) {
- *lx->b++ = b;
+ b = getnsbyte(lx);
+ if (!isalpha(b) && b != '_' && b < RuneSelf) {
+ ungetbyte(lx);
+ return "";
}
- *lx->b = '\0';
+
+ for (s = lx->buf;;) {
+ *s++ = b;
+ b = getbyte(lx);
+ if (isalnum(b) || b == '_' || b >= RuneSelf)
+ continue;
+ ungetbyte(lx);
+ break;
+ }
+ *s = '\0';
return lx->buf;
}
@@ -51,14 +62,320 @@ defmacro(Lexer *lx, string name, string macro)
{
Sym *mac;
- mac = define(lx->sym, name, Smacro);
+ printf("DEFINING MACRO %s\n", name);
+ mac = define(&lx->sym, name, Smacro);
mac->macro = macro;
return mac;
}
+static vlong evalmacro(Lexer *lx, byte prec);
+
+static
+vlong
+opand(Lexer *lx)
+{
+ int b;
+ vlong v;
+ string s;
+ Token tok;
+ Sym *sym;
+
+ b = getnsbyte(lx);
+ if (b == '\n') {
+ errorat(lx->pos, "new line in macro expression");
+ return 0;
+ }
+ ungetbyte(lx);
+
+ tok = lex(lx);
+
+ switch (tok.kind & Vmask) {
+ case Aneg:
+ return ~opand(lx);
+
+ case Anot:
+ return !opand(lx);
+
+ case Alparen:
+ v = evalmacro(lx, 1);
+ tok = lex(lx);
+ if (!(tok.kind & Arparen)) {
+ errorat(lx->pos, "unbalanced parenthesis in macro expression");
+ return 0;
+ }
+ return v;
+
+ case Alit:
+ switch (tok.kind & ~Vmask) {
+ case Vint:
+ return tok.val.i;
+ case Vchar:
+ return tok.val.c;
+ default:
+ errorat(lx->pos, "invalid literal of type '%d' in conditional macro", tok.kind & ~Vmask);
+ return 0;
+ }
+
+ case Aident:
+ sym = lookup(&lx->sym, tok.val.s);
+ if (!sym) {
+ /* calling lex directly would expand the operand here
+ * manually lex the result
+ */
+ if (strcmp(tok.val.s, "defined") == 0) {
+ b = getnsbyte(lx);
+ if (b == '\n') {
+ errorat(lx->pos, "new line in defined operand");
+ return 0;
+ }
+ s = lx->buf;
+ if (b == '(') {
+ b = getnsbyte(lx);
+ while (b != ')') {
+ if (b == '\n') {
+ errorat(lx->pos, "new line inside defined operand");
+ return 0;
+ }
+ if (b == '(') {
+ errorat(lx->pos, "nested parens not allowed inside defined operator");
+ return 0;
+ }
+ if (!isspace(b))
+ *s++ = b;
+ b = getbyte(lx);
+ }
+ } else {
+ while (!isspace(b)) {
+ *s++ = b;
+ b = getbyte(lx);
+
+ if (b == '\n') {
+ errorat(lx->pos, "new line inside defined operand");
+ return 0;
+ }
+ }
+ }
+ *s = '\0';
+ return lookup(&lx->sym, lx->buf) != nil;
+ }
+ return 0;
+ }
+ panicf("unreachable");
+ return 1;
+
+ default:
+ errorat(lx->pos, "opand: invalid token found in macro conditional: '%s'", tokens[tok.kind & Vmask]);
+ return 0;
+ }
+}
+
+// recursively evaluates a macro
+// reduced set of operators allowed here
+static
+vlong
+evalmacro(Lexer *lx, byte prec)
+{
+ int b;
+ vlong l, r;
+ Token tok;
+
+ l = opand(lx);
+ for (;;) {
+ b = getnsbyte(lx);
+ if (b == '\n') {
+ ungetbyte(lx);
+ break;
+ }
+ ungetbyte(lx);
+
+ tok = lex(lx);
+ // simplified jump table of precedence
+ // unpacked to evaluate inline
+ // NOTE: You might have to unget bytes to replace token on stack
+ switch (tok.kind & Vmask) {
+ case Astar:
+ if (prec > 10) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 10 + 1);
+ l = l * r;
+ continue;
+
+ case Adiv:
+ if (prec > 10) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 10 + 1);
+ l = l / r;
+ continue;
+
+ case Amod:
+ if (prec > 10) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 10 + 1);
+ l = l % r;
+ continue;
+
+ case Aadd:
+ if (prec > 9) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 9 + 1);
+ l = l + r;
+ continue;
+
+ case Asub:
+ if (prec > 9) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 9 + 1);
+ l = l - r;
+ continue;
+
+ case Alsft:
+ if (prec > 8) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 8 + 1);
+ l = l << r;
+ continue;
+
+ case Arsft:
+ if (prec > 8) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 8 + 1);
+ l = l >> r;
+ continue;
+
+ case Alt:
+ if (prec > 7) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l < r;
+ continue;
+
+ case Agt:
+ if (prec > 7) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l > r;
+ continue;
+
+ case Agteq:
+ if (prec > 7) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l >= r;
+ continue;
+
+ case Alteq:
+ if (prec > 7) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 7 + 1);
+ l = l >= r;
+ continue;
+
+ case Aeq:
+ if (prec > 6) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 6 + 1);
+ l = l == r;
+ continue;
+
+ case Aneq:
+ if (prec > 6) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 6 + 1);
+ l = l != r;
+ continue;
+
+ case Aand:
+ if (prec > 5) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 5 + 1);
+ l = l & r;
+ continue;
+
+ case Axor:
+ if (prec > 4) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 4 + 1);
+ l = l ^ r;
+ continue;
+
+ case Aor:
+ if (prec > 3) {
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 3 + 1);
+ l = l | r;
+ continue;
+
+ case Aandand:
+ if (prec > 2) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 2 + 1);
+ l = l && r;
+ continue;
+
+ case Aoror:
+ if (prec > 1) {
+ ungetbyte(lx);
+ ungetbyte(lx);
+ return l;
+ }
+ r = evalmacro(lx, 1 + 1);
+ l = l || r;
+ continue;
+
+ default:
+ errorat(lx->pos, "eval: invalid token found in macro conditional '%s'", tokens[tok.kind & Vmask]);
+ abort();
+ return 0;
+ }
+ }
+
+ return l;
+}
+
// -----------------------------------------------------------------------
-// preprocessor definitions
+// preprocessor magic numbers
enum
{
@@ -74,7 +391,21 @@ enum
// -----------------------------------------------------------------------
// preprocessor functions
+static
+error
+ppend(Lexer *lx)
+{
+ int b;
+ do {
+ b = getnsbyte(lx);
+ } while (b > 0 && b != '\n');
+
+ return 0;
+}
+
+
/* #undef */
+static
error
ppund(Lexer *lx)
{
@@ -84,14 +415,16 @@ ppund(Lexer *lx)
s = ident(lx);
intern(&s);
- sym = lookup(lx->sym, s);
+ sym = lookup(&lx->sym, s);
if (!sym) {
errorat(lx->pos, "attempting to undefine unrecognized symbol '%s'", s);
}
+ ppend(lx);
return 0;
}
/* #define */
+static
error
ppdef(Lexer *lx)
{
@@ -107,7 +440,7 @@ ppdef(Lexer *lx)
}
intern(&s);
- sym = lookup(lx->sym, s);
+ sym = lookup(&lx->sym, s);
if (sym) {
errorat(lx->pos, "macro redefined: '%s'", sym->name);
goto Bad;
@@ -240,6 +573,7 @@ ppdef(Lexer *lx)
return 0;
Bad:
errorat(lx->pos, "failed parse of #define macro '%s'", s);
+ ppend(lx);
return 1;
}
@@ -409,11 +743,13 @@ End:
Nospace:
errorf("out of memory during macro expansion %s", s->name);
Bad:
+ ppend(lx);
errorf("failed to expand macro %s", s->name);
return -1;
}
/* #include */
+static
error
ppinc(Lexer *lx)
{
@@ -479,11 +815,13 @@ ppinc(Lexer *lx)
Bad:
ungetbyte(lx);
- errorf("failed include");
+ errorat(lx->pos, "failed include");
+ ppend(lx);
return 1;
}
/* #pragma */
+static
error
ppprag(Lexer *lx)
{
@@ -500,6 +838,7 @@ ppprag(Lexer *lx)
}
Bad:
errorat(lx->pos, "unrecognized pragma '%s'", s);
+ ppend(lx);
return 1;
}
@@ -510,6 +849,16 @@ ppif(Lexer *lx, int f)
{
Sym *sym;
string s;
+ int c, l, b;
+
+ if (f == 0) {
+ b = evalmacro(lx, 1);
+ if (b) {
+ ppend(lx);
+ return 0;
+ }
+ goto Skip;
+ }
if (f == 1)
goto Skip;
@@ -521,19 +870,60 @@ ppif(Lexer *lx, int f)
}
intern(&s);
- sym = lookup(lx->sym, s);
- if ((!sym && f == 3) || sym && (f == 2))
- return 1;
-
+ sym = lookup(&lx->sym, s);
+ if ((!sym && (f == 3)) || (sym && (f == 2)))
+ return 0;
Skip:
+ b = 1;
+ l = 0;
+ for (;;) {
+ c = getbyte(lx);
+ if (c != '#') {
+ if (!isspace(c))
+ b = 0;
+ if (c == '\n')
+ b = 1;
+ continue;
+ }
+ if (!b)
+ continue;
+ s = ident(lx);
+ if (!s)
+ continue;
+
+ if ((strcmp(s, "elif") == 0) && l == 0)
+ ppif(lx, 0);
+
+ if (strcmp(s, "endif") == 0) {
+ if (l) {
+ l--;
+ continue;
+ }
+ ppend(lx);
+ return 0;
+ }
+ if (strcmp(s, "if") == 0 ||
+ strcmp(s, "elif") == 0 ||
+ strcmp(s, "ifdef") == 0 ||
+ strcmp(s, "ifndef") == 0) {
+ l++;
+ continue;
+ }
+
+ if (l == 0 && f != 1 && strcmp(s, "else") == 0) {
+ return 0;
+ }
+ }
Bad:
- errorat(lx->pos, "bad syntax in preprocessor if directive");
- return 0;
+ errorat(lx->pos, "bad syntax in preprocessor conditional directive");
+ ppend(lx);
+ return 1;
}
/* #if */
+static
error
ppif0(Lexer *lx)
{
@@ -541,6 +931,7 @@ ppif0(Lexer *lx)
}
/* #else */
+static
error
ppif1(Lexer *lx)
{
@@ -548,6 +939,7 @@ ppif1(Lexer *lx)
}
/* #ifdef */
+static
error
ppif2(Lexer *lx)
{
@@ -555,19 +947,13 @@ ppif2(Lexer *lx)
}
/* #ifndef */
+static
error
ppif3(Lexer *lx)
{
return ppif(lx, 3);
}
-/* #endif */
-error
-ppend(Lexer *lx)
-{
- return 0;
-}
-
// -----------------------------------------------------------------------
// dispatch function
@@ -610,18 +996,18 @@ dodefine(Lexer *lx, string s)
c = strchr(lx->buf, '=');
if (c) {
*c++ = '\0';
- sym = lookup(lx->sym, lx->buf);
+ sym = lookup(&lx->sym, lx->buf);
if (sym) {
errorf("redefinition of symbol '%s'", sym->name);
return 1;
}
- sym = define(lx->sym, lx->buf, Smacro);
+ sym = define(&lx->sym, lx->buf, Smacro);
n = strlen(c) + 2;
sym->macro = str·makelen("", n);
str·appendbyte(&sym->macro, '\0');
str·append(&sym->macro, c);
} else {
- sym = lookup(lx->sym, lx->buf);
+ sym = lookup(&lx->sym, lx->buf);
if (sym) {
errorf("redefinition of symbol '%s'", sym->name);
return 1;
diff --git a/sys/libn/bufio.c b/sys/libn/bufio.c
index 05b6068..cde56b7 100644
--- a/sys/libn/bufio.c
+++ b/sys/libn/bufio.c
@@ -75,8 +75,8 @@ getbyte:
error
bufio·ungetbyte(io·Buffer *buf, byte c)
{
- buf->state ^= bufio·end;
- if (buf->state & bufio·rdr) {
+ buf->state &= ~bufio·end;
+ if (!(buf->state & bufio·rdr)) {
errorf("attempted to unget on non-active reader");
return bufio·err;
}
diff --git a/sys/libn/string.c b/sys/libn/string.c
index fb92a04..4c8c903 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -355,11 +355,11 @@ str·appendlen(string *s, vlong n, const byte* b)
str·grow(s, n);
if (*s == nil) return;
- Hdr* h = (Hdr*)(s - sizeof(Hdr));
+ Hdr* h = (Hdr*)(*s - sizeof(Hdr));
memcpy(*s + str·len(*s), b, n);
h->len += n;
- *s[h->len] = '\0';
+ (*s)[h->len] = '\0';
}
// Append will append the given null terminated C string to the string data
@@ -379,11 +379,11 @@ str·appendbyte(string *s, const byte b)
str·grow(s, 1);
if (*s == nil) return;
- Hdr* h = (Hdr*)(s - sizeof(Hdr));
+ Hdr* h = (Hdr*)(*s - sizeof(Hdr));
*(*s + str·len(*s)) = b;
h->len++;
- *s[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..?
+ (*s)[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..?
}
/*