aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/cc
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-17 16:19:17 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-17 16:19:17 -0700
commit9ec5bed6a7d715ffa69851569485a685dd69db2e (patch)
treec25eaef1000f52caacf25ee398e54f63067b8feb /sys/cmd/cc
parentc92c89280d036240a75ff122084dd58cde473394 (diff)
prototype of front end cli
Diffstat (limited to 'sys/cmd/cc')
-rw-r--r--sys/cmd/cc/cc.c237
-rw-r--r--sys/cmd/cc/cc.h189
-rw-r--r--sys/cmd/cc/lex.c649
-rw-r--r--sys/cmd/cc/rules.mk4
-rw-r--r--sys/cmd/cc/sym.c32
5 files changed, 997 insertions, 114 deletions
diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c
index cddea01..39ad5f2 100644
--- a/sys/cmd/cc/cc.c
+++ b/sys/cmd/cc/cc.c
@@ -1,6 +1,9 @@
#include "cc.h"
#include <libn/macro/map.h>
+// -----------------------------------------------------------------------
+// string interning
+
/* jenkins' one at a time hash */
static
int32
@@ -24,8 +27,22 @@ hash_string(byte* s)
return h;
}
+static
+int
+streq(byte *s, byte *t)
+{
+ if (s == nil) {
+ if (t == nil)
+ return 1;
+ else
+ return 0;
+ }
+
+ return (t == nil) ? 0 : strcmp(s, t) == 0;
+}
+
#define HASH(s) hash_string(s)
-#define EQUAL(s, t) (strcmp(s, t) == 0)
+#define EQUAL(s, t) (streq(s, t))
static
int
getstr(string key, int *ok)
@@ -74,20 +91,226 @@ END:
return C.strs.vals[i];
}
+// -----------------------------------------------------------------------
+// io buffer management
+
+#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x}
+// path should be absolute
+Io*
+openio(byte *path)
+{
+ Io *it;
+ Stream *f;
+
+ intern(&path);
+
+ // See if we have already opened file;
+ // If so, and it hasn't been flagged return it
+ for (it = C.iostk; it != C.io + 1; ++it) {
+ if ((uintptr)it->path == (uintptr)path) {
+ if (it->kind & IOonce) {
+ return nil;
+ }
+ return it;
+ }
+ }
+
+ if ((C.io - C.iostk) >= arrlen(C.iostk)-1)
+ panicf("out of I/O space!");
+
+ C.io->f = io·open(path, "r");
+ C.io->path = path;
+ bufio·initreader(&C.io->buf, asrdr(io·read), C.io->f);
+
+ return C.io++;
+}
+
+Io*
+makeio()
+{
+ if ((C.io - C.iostk) >= arrlen(C.iostk)-1)
+ panicf("out of I/O space!");
+
+ C.io->path = "<buffer>";
+ C.io->buf = (io·Buffer) {
+ .state = bufio·rdr | bufio·end,
+ .runesize = 0,
+ .h = nil,
+ .size = bufio·size,
+ .beg = C.io->buf.buf + bufio·ungets,
+ .pos = C.io->buf.buf + bufio·ungets,
+ .end = C.io->buf.buf + bufio·ungets,
+ };
+ C.io->b = C.io->buf.buf;
+
+ return C.io++;
+}
+#undef asrdr
+
+// TODO: Think about if this is always at the _end_ of the stack.
+// Right now we don't have access to it.
+void
+freeio(Io *io)
+{
+ if (io->kind & ~IOmac) {
+ free(io->b);
+ } else {
+ io·close(io->f);
+ }
+ io->link = nil;
+ io->path = nil;
+ io->store = (Pos){ 0 };
+}
+
+// -----------------------------------------------------------------------
+// universal compiler builtins
+
+#define KEYWORD(a, b) b,
+byte *keywords[NUM_KEYWORDS] = { KEYWORDS };
+#undef KEYWORD
+
+#define DIRECTIVE(a, b, c) b,
+byte *directives[NUM_DIRECTIVES] = { DIRECTIVES };
+#undef DIRECTIVE
+
+struct Compiler C = { 0 };
+
+// -----------------------------------------------------------------------
+// flag handlers
+
+void
+pushinclude(byte *dirs)
+{
+ string d, s, *it, *end;
+
+ while (*dirs != 0) {
+ d = strchr(dirs, ' ');
+ if (d != nil)
+ *d = '\0';
+
+ s = d;
+ intern(&s);
+ for (it = C.inc.dir, end = it + C.inc.len; it != end; ++it) {
+ if ((uintptr)s == (uintptr)(*it))
+ goto Nextdir;
+ }
+
+ if (C.inc.len == C.inc.cap) {
+ C.inc.cap += 20;
+ C.inc.dir = realloc(C.inc.dir, C.inc.cap*sizeof(*C.inc.dir));
+ C.inc.dir[C.inc.len++] = s;
+ }
+
+Nextdir:
+ if (d == nil)
+ break;
+ dirs = d + 1;
+ }
+
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
void
-init()
+init(void)
{
- int i, n;
+ int i;
for (i = 0; i < arrlen(keywords); i++) {
intern(&keywords[i]);
- printf("keyword %d: %s", i, keywords[i]);
}
+
+ for (i = 0; i < arrlen(directives); i++) {
+ intern(&directives[i]);
+ }
+
+ C.heap = mem·makearena(mem·sys, nil);
+
+ C.inc.len = 0;
+ C.inc.cap = 100;
+ C.inc.dir = calloc(C.inc.cap, sizeof(*C.inc.dir));
+ C.inc.dir[C.inc.len++] = ".";
+
+ C.outfile = nil;
+ C.lxr = (Lexer){ 0 };
}
-int
-main()
+error
+compile(byte *path)
+{
+ Io *io;
+ Token tok;
+ byte *p, file[400];
+
+ strcpy(file, path);
+ p = utf8·findrrune(file, '/');
+ if (p)
+ *p++ = '\0';
+ else
+ p = file;
+
+ if (!C.outfile) {
+ C.outfile = p;
+ if (C.outfile) {
+ if ((p = utf8·findrrune(C.outfile, '.'))) {
+ p[0] = '.';
+ p[1] = 'o';
+ p[2] = '\0';
+ }
+ } else {
+ C.outfile = "/dev/null";
+ }
+ }
+
+ C.lxr.io = openio(file);
+ while (tok = lex(&C.lxr), tok.kind > Aeof) {
+ ;
+ }
+ freeio(C.lxr.io);
+
+ return tok.kind != Anil;
+}
+
+error
+main(int argc, byte *argv[])
{
+ byte *a, *src;
+ int err;
+
init();
- return 0;
+
+ ARGBEGIN {
+ case 'o':
+ C.outfile = ARGF();
+ break;
+
+ case 'D':
+ a = ARGF();
+ if (a) {
+ intern(&a);
+ dodefine(&C.lxr, a);
+ }
+ break;
+
+ case 'I':
+ a = ARGF();
+ if (a)
+ pushinclude(a);
+ break;
+ } ARGEND
+
+ if (argc < 1 && C.outfile == nil) {
+ printf("usage: cc [-options] files\n");
+ exit(1);
+ }
+
+ src = (argc == 0) ? "<stdin>" : argv[0];
+ intern(&src);
+
+ if ((err = compile(src)), err) {
+ exit(2);
+ }
+
+ exit(0);
}
diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h
index 3228890..5488f3c 100644
--- a/sys/cmd/cc/cc.h
+++ b/sys/cmd/cc/cc.h
@@ -8,8 +8,11 @@
/* core types */
typedef struct Io Io;
typedef struct Pos Pos;
+typedef struct Range Range;
typedef struct Token Token;
+typedef struct Lexer Lexer;
+
typedef struct Sym Sym;
typedef struct Type Type;
@@ -22,33 +25,9 @@ typedef struct Expr Expr;
typedef struct SymTab SymTab;
typedef struct StrTab StrTab;
-// -----------------------------------------------------------------------
-// lexing: byte stream -> tokens
-// pre-processor built in
-
-struct Pos
-{
- int col;
- int line;
- string path;
-};
-
-#define DIRECTIVES \
- DIRECTIVE(Dpragma,"pragma") \
- DIRECTIVE(Dinclude,"include") \
- DIRECTIVE(Dif,"if") \
- DIRECTIVE(Ddefine,"define") \
- DIRECTIVE(Difdef,"ifdef") \
- DIRECTIVE(Difndef,"ifndef")
-
-#define DIRECTIVE(a, b) a,
-enum { DIRECTIVES };
-#undef DIRECTIVE
-
-#define DIRECTIVE(a, b) b,
-static byte *directives[] = { DIRECTIVES };
-#undef DIRECTIVE
+typedef struct Compiler Compiler;
+/* keywords of language */
#define KEYWORDS \
KEYWORD(Kauto,"auto") \
KEYWORD(Kregister,"register") \
@@ -88,15 +67,57 @@ static byte *directives[] = { DIRECTIVES };
KEYWORD(Kalignof,"alignof")
#define KEYWORD(a, b) a,
-enum { KEYWORDS };
+enum { KEYWORDS NUM_KEYWORDS };
#undef KEYWORD
-#define KEYWORD(a, b) b,
-static byte *keywords[] = { KEYWORDS };
-#undef KEYWORD
+extern byte *keywords[NUM_KEYWORDS];
+
+// -----------------------------------------------------------------------
+// lexing: byte stream -> tokens
+// pre-processor built in
+
+/* source position: error reporting */
+struct Pos
+{
+ int col;
+ int line;
+ string path;
+};
+
+
+struct Range
+{
+ Pos beg;
+ Pos end;
+};
+
+void errorat(Pos x, byte *fmt, ...);
+
+/* pre-processor */
+#define DIRECTIVES \
+ DIRECTIVE(Dpragma,"pragma", ppprag) \
+ DIRECTIVE(Dinclude,"include", ppinc) \
+ DIRECTIVE(Ddefine,"define", ppdef) \
+ DIRECTIVE(Dundef,"undef", ppund) \
+ DIRECTIVE(Dif,"if", ppif0) \
+ DIRECTIVE(Delse, "else", ppif1) \
+ DIRECTIVE(Difdef,"ifdef", ppif2) \
+ DIRECTIVE(Difndef,"ifndef", ppif3) \
+ DIRECTIVE(Dendif,"endif", ppend)
+
+#define DIRECTIVE(a, b, c) a,
+enum { DIRECTIVES NUM_DIRECTIVES };
+#undef DIRECTIVE
+
+extern byte *directives[NUM_DIRECTIVES];
-#undef KEYWORDS
+error domacro(Lexer*);
+error dodefine(Lexer *lx, string s);
+int expandmacro(Lexer *lx, Sym *s, byte *dst);
+extern error (*macros[NUM_DIRECTIVES])(Lexer*);
+
+/* tokenization of byte stream */
#define TOKENS \
TOK(Anil,"nil") \
TOK(Aeof,"eof") \
@@ -140,8 +161,8 @@ static byte *keywords[] = { KEYWORDS };
TOK(Arparen,")") \
TOK(Albrace,"{") \
TOK(Arbrace,"}") \
- TOK(Albrkt,"[") \
- TOK(Arbrkt,"]") \
+ TOK(Albrakt,"[") \
+ TOK(Arbrakt,"]") \
TOK(Adot,".") \
TOK(Aarrow,"->") \
TOK(Aqmark,"?") \
@@ -154,47 +175,76 @@ static byte *keywords[] = { KEYWORDS };
enum
{
TOKENS
+ NUM_TOKENS,
+
+ Vchar = iota(8),
+ Vint = iota(9),
+ Vlong = iota(10),
+ Vvlong = iota(11),
+ Vusgn = iota(12),
+ Vfloat = iota(13),
+ Vstr = iota(14),
};
#undef TOK
-#define TOK(a, b) b,
-static byte *tokens[] = { TOKENS };
-#undef TOK
-#undef TOKENS
+extern byte *tokens[NUM_TOKENS];
/* TODO: store literals in a big val */
struct Token
{
uint32 kind;
- struct Pos pos;
+ Range pos;
union {
- string str;
+ byte *s;
double f;
vlong i;
- };
+ uvlong ui;
+ byte c;
+ ubyte uc;
+ } val;
};
enum
{
- Svar,
- Sfunc,
- Smacro,
+ Svar = 1 << 0,
+ Sfunc = 1 << 1,
+ Smacro = 1 << 2,
};
struct Sym
{
uint32 kind;
string name;
+ union {
+ string macro;
+ /*Func *func;*/
+ };
};
+Sym *lookup(SymTab *tab, string ident);
+Sym *define(SymTab *tab, string ident, int kind);
+
struct Lexer
{
- Token tok;
+ Pos pos;
Io *io;
SymTab *sym;
- byte buf[1024];
+ byte *b;
+ byte buf[2*1024];
};
+/* lex.c functions */
+Token lex(Lexer *);
+
+byte getbyte(Lexer *);
+byte getnsbyte(Lexer *l);
+rune getrune(Lexer *);
+byte ungetbyte(Lexer *);
+rune ungetrune(Lexer *, rune r);
+
+void pushio(Lexer *lx, Io *new);
+void popio(Lexer *lx);
+
// -----------------------------------------------------------------------
// parsing & type resolution
// tokens -> ast
@@ -294,35 +344,66 @@ struct Decl
// -----------------------------------------------------------------------
// compiler
+enum
+{
+ IOnil = iota(0),
+ IOonce = iota(1),
+ IOmac = iota(2),
+};
+
struct Io
{
- io·Buffer b;
+ io·Buffer buf;
string path;
- uint32 flag;
+ uint32 kind;
+ union {
+ Stream *f;
+ byte *b;
+ };
+
+ Pos store;
struct Io *link;
};
+Io* openio(byte *path);
+Io* makeio();
+void freeio(Io *io);
+
struct StrTab
{
- int32 n_buckets, size, n_occupied, upper_bound;
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
int32 *flags;
string *keys;
int32 *vals;
};
-static struct
+int32 intern(byte **str);
+string internview(byte* beg, byte *end);
+
+/* main data */
+struct Compiler
{
mem·Arena *heap;
StrTab strs;
- string *include;
+ struct {
+ int cap;
+ int len;
+ string *dir;
+ } inc;
+
Io *io;
Io iostk[100];
-} C;
-void init();
+ string outfile;
-int32 intern(byte **str);
-string internview(byte* beg, byte *end);
+ Lexer lxr;
+};
+extern Compiler C;
+
+void init();
#undef iota
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
index af3bbf3..6b85d8c 100644
--- a/sys/cmd/cc/lex.c
+++ b/sys/cmd/cc/lex.c
@@ -1,38 +1,649 @@
#include "cc.h"
-static
-void
-errorat(Pos x, byte *fmt, ...)
+#include <libn/macro/map.h>
+
+// -----------------------------------------------------------------------
+// simple wrappers
+
+byte
+getbyte(Lexer *l)
{
- va_list args;
- va_start(args, fmt);
- printf("error %d:", x.line);
- vprintf(fmt, args);
- va_end(args);
+ return bufio·getbyte(&l->io->buf);
+}
+
+byte
+getnsbyte(Lexer *l)
+{
+ byte b;
+ while (b = bufio·getbyte(&l->io->buf), isspace(b));
+ return b;
+}
+
+rune
+getrune(Lexer *l)
+{
+ return bufio·getrune(&l->io->buf);
}
-static
byte
-getbyte(struct Lexer *lex)
+ungetbyte(Lexer *lx)
{
- return bufio·getbyte(&lex->buf);
+ byte b;
+ return bufio·ungetbyte(&lx->io->buf, b);
}
+rune
+ungetrune(Lexer *l, rune r)
+{
+ return bufio·ungetrune(&l->io->buf, r);
+}
+
+// -----------------------------------------------------------------------
+// main lexer
+
+#define TOK(a, b) b,
+byte *tokens[NUM_TOKENS] = { TOKENS };
+#undef TOK
+
+static uint8 Atoi[256] =
+{
+ ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5,
+ ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10,
+ ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13,
+ ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15,
+};
+
static
error
-ungetbyte(struct Lexer *lex, byte b)
+escape(Lexer *lx, int x, int *flag, vlong *val)
{
- return bufio·ungetbyte(&lex->buf, b);
+ int i, u, c;
+ vlong l;
+
+ c = getrune(lx);
+
+ switch (c) {
+ case EOF:
+ errorat(lx->pos, "EOF in string");
+ return 1;
+ case '\n':
+ errorat(lx->pos, "newline in string");
+ return 1;
+ case '\\':
+ break;
+ default:
+ if (c == x)
+ return 1;
+ *val = c;
+ return 0;
+ }
+
+ u = 0;
+ c = getrune(lx);
+
+ switch(c) {
+ case 'x':
+ i = 2;
+ *flag = 1;
+ goto hex;
+
+ case 'u':
+ i = 4;
+ u = 1;
+ goto hex;
+
+ case 'U':
+ i = 8;
+ u = 1;
+ goto hex;
+
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ *flag = 1;
+ goto oct;
+
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case '\\': c = '\\'; break;
+
+ default:
+ if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c);
+ }
+ *val = c;
+ return 0;
+
+hex:
+ l = 0;
+ for(; i > 0; i--) {
+ c = getbyte(lx);
+ if (c >= '0' && c <= '9') {
+ l = l*16 + c-'0';
+ continue;
+ }
+ if (c >= 'a' && c <= 'f') {
+ l = l*16 + c-'a' + 10;
+ continue;
+ }
+ if (c >= 'A' && c <= 'F') {
+ l = l*16 + c-'A' + 10;
+ continue;
+ }
+ errorat(lx->pos, "non-hex character in escape sequence: %c", c);
+ ungetbyte(lx);
+ break;
+ }
+ if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) {
+ errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l);
+ l = RuneErr;
+ }
+ *val = l;
+ return 0;
+
+oct:
+ l = c - '0';
+ for (i = 2; i > 0; i--) {
+ c = getbyte(lx);
+ if (c >= '0' && c <= '7') {
+ l = l*8 + c-'0';
+ continue;
+ }
+ errorat(lx->pos, "non-octal character in escape sequence: %c", c);
+ ungetbyte(lx);
+ }
+ if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l);
+
+ *val = l;
+ return 0;
}
-void
-lex(struct Lexer *lex)
+#define CASE1(stmt1, kind1) \
+ case stmt1: \
+ tok.kind = kind1; \
+ break;
+
+#define CASE2(stmt1, kind1, b1, kind2) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else \
+ ungetbyte(lx); \
+ break;
+
+#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else if (b == b2) \
+ tok.kind = kind3; \
+ else \
+ ungetbyte(lx); \
+ break;
+
+#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else if (b == b2) \
+ tok.kind = kind3; \
+ else if (b == b3) \
+ tok.kind = type4; \
+ else \
+ ungetbyte(lx); \
+ break;
+
+
+Token
+lex(Lexer *lx)
{
- int b;
+ int b, n, f;
+ vlong v;
+ uint u;
+ rune r;
+ string s;
+ double d;
+ byte *e;
+ Token tok;
+ Sym *sym;
+ Io *io;
+
+GetByte:
+ b = getbyte(lx);
+Dispatch:
+ tok.pos.beg = lx->pos;
+
+ if (b >= RuneSelf || isalpha(b))
+ goto TAlpha;
+ if (isdigit(b))
+ goto TNum;
- b = getbyte(lex);
-TOP:
switch (b) {
+ case ' ': case '\n': case '\r': case '\t': case '\v': case '\f':
+ while (b = getbyte(lx), isspace(b))
+ if (b == '\n') lx->pos.line++;
+ goto Dispatch;
+
+ case '\'':
+ if (escape(lx, '\'', &f, &v)) {
+ errorat(lx->pos, "empty literal or escaped ' in char literal");
+ v = '\'';
+ }
+ if (!escape(lx, '\'', &f, &v)) {
+ errorat(lx->pos, "missing '");
+ ungetbyte(lx);
+ }
+
+ if (v > 0xff) {
+ errorat(lx->pos, "overflowed character literal");
+ v = 0;
+ }
+ tok.kind = Alit | Vchar;
+ tok.val.c = v;
+ break;
+
+ case '"':
+ s = str·makecap("", 0, 8);
+ for (;;) {
+ if (escape(lx, '"', &f, &v))
+ break;
+
+ if (v < RuneSelf || f)
+ str·appendbyte(&s, v);
+ else {
+ r = v;
+ b = utf8·runelen(r);
+ utf8·runetochar(lx->buf, &r);
+ str·appendlen(&s, b, lx->buf);
+ }
+ }
+ tok.kind = Alit | Vstr;
+ tok.val.s = s;
+ intern(&tok.val.s);
+
+ str·free(s);
+ break;
+ case '.':
+ tok.kind = Adot;
+ b = getbyte(lx);
+
+ if (isdigit(b)) {
+ // *lx->b++ = b;
+ goto TFlt;
+ } else if (b == '.') {
+ b = getbyte(lx);
+ if (b != '.') {
+ errorat(lx->pos, "invalid token '..'");
+ tok.kind = Aellip;
+ break;
+ }
+ }
+ ungetbyte(lx);
+ break;
+
+ case '<':
+ tok.kind = Alt;
+ b = getbyte(lx);
+
+ if (b == '<') {
+ tok.kind = Alsft;
+ b = getbyte(lx);
+ if (b == '=')
+ tok.kind = Alsftasn;
+ else
+ ungetbyte(lx);
+ } else if (b == '=')
+ tok.kind = Alteq;
+ else
+ ungetbyte(lx);
+ break;
+
+ case '>':
+ tok.kind = Agt;
+ b = getbyte(lx);
+
+ if (b == '>') {
+ tok.kind = Arsft;
+ b = getbyte(lx);
+ if (b == '=')
+ tok.kind = Arsftasn;
+ else
+ ungetbyte(lx);
+ } else if (b == '=')
+ tok.kind = Agteq;
+ else
+ ungetbyte(lx);
+ break;
+
+ case '/':
+ tok.kind = Adiv;
+ b = getbyte(lx);
+
+ if (b == '=')
+ tok.kind = Adivasn;
+ else if (b == '/') {
+ while (b != EOF && b != '\n')
+ b = getbyte(lx);
+ lx->pos.line++;
+ goto Dispatch;
+ } else if (b == '*') {
+ int level = 1;
+ b = getbyte(lx);
+ while (b != EOF && level > 0) {
+ if (b == '/') {
+ b = getbyte(lx);
+ if (b == '*')
+ level++;
+ } else if (b == '*') {
+ b = getbyte(lx);
+ if (b == '/')
+ level--;
+ }
+ if (b == '\n') lx->pos.line++;
+ b = getbyte(lx);
+ }
+ goto Dispatch;
+ } else
+ ungetbyte(lx);
+ break;
+
+ case '#':
+ if (domacro(lx)) {
+ tok.kind = Anil;
+ errorat(lx->pos, "failed to perform preprocessor directive");
+ return tok;
+ }
+ goto GetByte;
+ break;
+
+ case EOF:
+ panicf("need to implement popio");
+
+ CASE1('(', Alparen)
+ CASE1(')', Arparen)
+ CASE1('{', Albrace)
+ CASE1('}', Arbrace)
+ CASE1('[', Albrakt)
+ CASE1(']', Arbrakt)
+ CASE1(',', Acomma)
+ CASE1('?', Aqmark)
+ CASE1(';', Asemi)
+ CASE1('~', Aneg)
+ CASE1(':', Acolon)
+ CASE2('^', Axor, '=', Axorasn)
+ CASE2('!', Anot, '=', Aneq)
+ CASE2('*', Astar,'=', Amulasn)
+ CASE2('=', Aasn, '=', Aeq)
+ CASE2('%', Amod, '=', Amodasn)
+ CASE3('+', Aadd, '=', Aaddasn, '+', Ainc)
+ CASE3('&', Aand, '=', Aandasn, '&', Aandand)
+ CASE3('|', Aor, '=', Aorasn, '|', Aoror)
+ CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow)
+
+ default:
+ tok.kind = Anil;
+ errorat(lx->pos, "invalid token, crashing");
+ abort();
}
-}
+
+ goto Return;
+
+ TNum:
+ e = lx->buf + arrlen(lx->buf);
+ do {
+ if (lx->b >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ *lx->b++ = b;
+ } while (b = getbyte(lx), isdigit(b) || b == '_');
+
+ if (b == '.' || tolower(b) == 'e')
+ goto TFlt;
+ TInt:
+ r = b;
+ n = 10;
+ s = lx->buf;
+ if (*s == '0') {
+ b = *++s;
+ switch (b) {
+ case 'x': n = 16; break;
+ case 'b': n = 2; break;
+ case 'o': n = 8; break;
+ default: --s;
+ }
+ if (s >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ }
+
+ v = 0;
+ for (; s != lx->b ; s++) {
+ b = *s;
+ if (b == '_') continue;
+
+ f = Atoi[b];
+ if (f == 0 && b != '0')
+ break;
+
+ if (f >= n) {
+ errorat(lx->pos, "digit '%c' out of range for base %d", b, n);
+ f = 0;
+ }
+
+ if (v > (UINT64_MAX - f) / n) {
+ errorat(lx->pos, "integer literal overflow");
+ v = 0;
+ break;
+ }
+
+ v = v * n + f;
+ }
+ b = r;
+ tok.kind = Alit | Vint;
+ tok.val.i = v;
+ /* TODO: Suffixes!
+ if (tolower(b) == 'u') {
+ tok.kind |= Vusgn;
+ b = getbyte(lx);
+ }
+ */
+ goto Return;
+
+ TFlt:
+ if (b == '.') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ }
+
+ while (isdigit(b)) {
+ *lx->b++ = b;
+
+ if (lx->b >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ }
+
+ if (tolower(b) == 'e') {
+ b = getbyte(lx);
+ if (b == '-' || b == '+')
+ b = getbyte(lx);
+
+ if (!isdigit(b))
+ errorat(lx->pos, "expected number after exponent, found %c", b);
+
+ do {
+ *lx->b++ = b;
+ } while (b = getbyte(lx), isdigit(b));
+ }
+ *lx->b = '\0';
+ d = strtod(lx->buf, nil);
+
+ tok.kind = Alit | Vfloat;
+ tok.val.f = d;
+
+ goto Return;
+
+ TAlpha:
+ u = b;
+ s = lx->buf;
+ e = lx->buf + arrlen(lx->buf);
+ for (;;) {
+ if (s >= e) {
+ errorat(lx->pos, "identifier too long for buffer: %s", s);
+ goto Nospace;
+ }
+ if (u >= RuneSelf) {
+ ungetbyte(lx);
+ r = getrune(lx);
+ if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) {
+ errorat(lx->pos, "invalid identifier character %d", r);
+ }
+ s += utf8·runetochar(s, &r);
+ } else if (!isalnum(u) && u != '_')
+ break;
+ else
+ *s++ = u;
+ u = getbyte(lx);
+ }
+ *s = '\0';
+ tok.kind = Aident;
+ tok.val.s = lx->buf;
+
+ n = intern(&tok.val.s);
+ if (n < arrlen(keywords)) {
+ tok.kind = Akeywd;
+ }
+
+ sym = lookup(lx->sym, tok.val.s);
+ if (sym) {
+ io = makeio();
+ io->buf.end += expandmacro(lx, sym, io->b);
+ pushio(lx, io);
+ goto GetByte;
+ }
+
+Return:
+ lx->b = lx->buf;
+ tok.pos.end = lx->pos;
+ return tok;
+
+Nospace:
+ panicf("aborting compilation");
+}
+
+#undef CASE4
+#undef CASE3
+#undef CASE2
+#undef CASE1
+
+// -----------------------------------------------------------------------
+// push/pop io objects
+
+void
+pushio(Lexer *lx, Io *new)
+{
+ new->link = lx->io;
+ lx->io->store = lx->pos;
+ lx->io = new;
+
+ lx->pos = (Pos){
+ .line = 0,
+ .col = 0,
+ .path = new->path,
+ };
+}
+
+void
+popio(Lexer *lx)
+{
+ Io *prev;
+
+ prev = lx->io->link;
+ if (!prev) {
+ panicf("no buffer left");
+ }
+
+ lx->pos = prev->store;
+ lx->io = prev;
+}
+
+// -----------------------------------------------------------------------
+// symbol tables
+
+#define PTR_HASH(p) (uintptr)(p)
+#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
+
+struct SymTab
+{
+ MAP_STRUCT_BODY(string, Sym*);
+};
+
+Sym*
+lookup(SymTab *tab, string ident)
+{
+ int idx;
+ MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
+
+ if (idx < tab->n_buckets)
+ return tab->vals[idx];
+
+ return nil;
+}
+
+static
+int
+moresymtab(SymTab *tab, int n)
+{
+ MAP_GROW(tab, string, Sym*, n, PTR_HASH, mem·sys.alloc, mem·sys.free, nil);
+}
+
+static
+int
+putsym(SymTab *tab, Sym *sym, error *err)
+{
+ MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
+}
+
+Sym*
+define(SymTab *tab, string name, int kind)
+{
+ Sym *sym;
+ error err;
+
+ sym = mem·arenaalloc(C.heap, 1, sizeof(*sym));
+ sym->name = name;
+ sym->kind = kind;
+
+ putsym(tab, sym, &err);
+
+ return sym;
+}
+
+// -----------------------------------------------------------------------
+// error reporting
+
+void
+errorat(Pos x, byte *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ printf("error %d: ", x.line);
+
+ vprintf(fmt, args);
+ va_end(args);
+}
+
diff --git a/sys/cmd/cc/rules.mk b/sys/cmd/cc/rules.mk
index fe30305..b32d5b6 100644
--- a/sys/cmd/cc/rules.mk
+++ b/sys/cmd/cc/rules.mk
@@ -3,13 +3,13 @@ include share/push.mk
# Local sources
SRCS_$(d) := \
+ $(d)/pp.c \
$(d)/lex.c \
- $(d)/sym.c \
$(d)/cc.c
LIBS_$(d) :=
BINS_$(d) := $(d)/cc
-TSTS_$(d) :=
+UNTS_$(d) :=
include share/paths.mk
diff --git a/sys/cmd/cc/sym.c b/sys/cmd/cc/sym.c
deleted file mode 100644
index ef40bce..0000000
--- a/sys/cmd/cc/sym.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "cc.h"
-
-#include <libn/macro/map.h>
-
-#define PTR_HASH(p) (uintptr)(p)
-#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
-
-#if 0
-struct SymTab
-{
- MAP_STRUCT_BODY(string, Sym*);
-};
-
-Sym*
-getsym(SymTab *tab, string key)
-{
- MAP_GET(tab, key, PTR_HASH, PTR_EQUAL, nil);
-}
-
-static
-int
-moresymtab(SymTab *tab, int n)
-{
- MAP_GROW(tab, string, Sym*, n, PTR_HASH);
-}
-
-int
-putsym(SymTab *tab, Sym *sym, error *err)
-{
- MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
-}
-#endif