prototype of front end cli

author: Nicholas Noll <nbnoll@eml.cc> 2020-05-17 16:19:17 -0700
committer: Nicholas Noll <nbnoll@eml.cc> 2020-05-17 16:19:17 -0700
commit: 9ec5bed6a7d715ffa69851569485a685dd69db2e (patch)
tree: c25eaef1000f52caacf25ee398e54f63067b8feb /sys
parent: c92c89280d036240a75ff122084dd58cde473394 (diff)
8 files changed, 1060 insertions, 132 deletions
diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c
index cddea01..39ad5f2 100644
--- a/sys/cmd/cc/cc.c
+++ b/sys/cmd/cc/cc.c
@@ -1,6 +1,9 @@
 #include "cc.h"
 #include <libn/macro/map.h>
 
+// -----------------------------------------------------------------------
+// string interning
+
 /* jenkins' one at a time hash */
 static 
 int32
@@ -24,8 +27,22 @@ hash_string(byte* s)
     return h;
 }
 
+static
+int
+streq(byte *s, byte *t)
+{
+    if (s == nil) {
+        if (t == nil) 
+            return 1;
+        else
+            return 0;
+    }
+
+    return (t == nil) ? 0 : strcmp(s, t) == 0;
+}
+
 #define HASH(s)      hash_string(s)
-#define EQUAL(s, t) (strcmp(s, t) == 0)
+#define EQUAL(s, t) (streq(s, t))
 static
 int
 getstr(string key, int *ok)
@@ -74,20 +91,226 @@ END:
     return C.strs.vals[i];
 }
 
+// -----------------------------------------------------------------------
+// io buffer management
+
+#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x}
+// path should be absolute 
+Io*
+openio(byte *path)
+{
+    Io     *it;
+    Stream *f;
+
+    intern(&path);
+
+    // See if we have already opened file;
+    // If so, and it hasn't been flagged return it
+    for (it = C.iostk; it != C.io + 1; ++it) {
+        if ((uintptr)it->path == (uintptr)path) {
+            if (it->kind & IOonce) {
+                return nil;
+            }
+            return it;
+        }
+    }
+
+    if ((C.io - C.iostk) >= arrlen(C.iostk)-1) 
+        panicf("out of I/O space!");
+
+    C.io->f    = io·open(path, "r");
+    C.io->path = path;
+    bufio·initreader(&C.io->buf, asrdr(io·read), C.io->f);
+
+    return C.io++;
+} 
+
+Io*
+makeio()
+{
+    if ((C.io - C.iostk) >= arrlen(C.iostk)-1) 
+        panicf("out of I/O space!");
+
+    C.io->path = "<buffer>";
+    C.io->buf = (io·Buffer) {
+        .state    = bufio·rdr | bufio·end, 
+        .runesize = 0,
+        .h        = nil,
+        .size     = bufio·size,
+        .beg      = C.io->buf.buf + bufio·ungets,
+        .pos      = C.io->buf.buf + bufio·ungets,
+        .end      = C.io->buf.buf + bufio·ungets,
+    };
+    C.io->b = C.io->buf.buf;
+
+    return C.io++;
+} 
+#undef asrdr
+
+// TODO: Think about if this is always at the _end_ of the stack.
+//       Right now we don't have access to it.
+void
+freeio(Io *io)
+{
+    if (io->kind & ~IOmac) {
+        free(io->b);
+    } else {
+        io·close(io->f);
+    }
+    io->link  = nil;
+    io->path  = nil;
+    io->store = (Pos){ 0 };
+}
+
+// -----------------------------------------------------------------------
+// universal compiler builtins
+
+#define KEYWORD(a, b) b,
+byte *keywords[NUM_KEYWORDS] = { KEYWORDS };
+#undef KEYWORD
+
+#define DIRECTIVE(a, b, c) b,
+byte *directives[NUM_DIRECTIVES] = { DIRECTIVES };
+#undef DIRECTIVE
+
+struct Compiler C = { 0 }; 
+
+// -----------------------------------------------------------------------
+// flag handlers
+
+void
+pushinclude(byte *dirs)
+{
+    string d, s, *it, *end;
+
+    while (*dirs != 0) {
+        d = strchr(dirs, ' ');
+        if (d != nil)
+            *d = '\0';
+
+        s = d;
+        intern(&s);
+        for (it = C.inc.dir, end = it + C.inc.len; it != end; ++it) {
+            if ((uintptr)s == (uintptr)(*it))
+                goto Nextdir;
+        }
+
+        if (C.inc.len == C.inc.cap) {
+            C.inc.cap += 20;
+            C.inc.dir = realloc(C.inc.dir, C.inc.cap*sizeof(*C.inc.dir));
+            C.inc.dir[C.inc.len++] = s;
+        }
+
+Nextdir:
+        if (d == nil)
+            break;
+        dirs = d + 1;
+    }
+
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
 void
-init()
+init(void)
 {
-    int i, n;
+    int i;
 
     for (i = 0; i < arrlen(keywords); i++) {
         intern(&keywords[i]);
-        printf("keyword %d: %s", i, keywords[i]);
     }
+
+    for (i = 0; i < arrlen(directives); i++) {
+        intern(&directives[i]);
+    }
+
+    C.heap    = mem·makearena(mem·sys, nil);
+
+    C.inc.len = 0;
+    C.inc.cap = 100;
+    C.inc.dir = calloc(C.inc.cap, sizeof(*C.inc.dir));
+    C.inc.dir[C.inc.len++] = ".";
+
+    C.outfile = nil;
+    C.lxr     = (Lexer){ 0 };
 }
 
-int
-main()
+error
+compile(byte *path)
+{
+    Io    *io;
+    Token tok;
+    byte *p, file[400];
+
+    strcpy(file, path);
+    p = utf8·findrrune(file, '/');
+    if (p) 
+        *p++ = '\0';
+    else
+        p = file; 
+
+    if (!C.outfile) {
+        C.outfile = p;
+        if (C.outfile) {
+            if ((p = utf8·findrrune(C.outfile, '.'))) {
+                p[0] = '.';
+                p[1] = 'o';
+                p[2] = '\0';
+            }
+        } else {
+            C.outfile = "/dev/null";
+        }
+    }
+
+    C.lxr.io = openio(file);
+    while (tok = lex(&C.lxr), tok.kind > Aeof) {
+        ;
+    }
+    freeio(C.lxr.io);
+
+    return tok.kind != Anil;
+}
+
+error
+main(int argc, byte *argv[])
 {
+    byte *a, *src;
+    int   err;
+
     init();
-    return 0;
+
+    ARGBEGIN {
+    case 'o':
+        C.outfile = ARGF();
+        break;
+
+    case 'D':
+        a = ARGF();
+        if (a) {
+            intern(&a);
+            dodefine(&C.lxr, a);
+        }
+        break;
+
+    case 'I':
+        a = ARGF();
+        if (a)
+            pushinclude(a);
+        break;
+    } ARGEND
+
+    if (argc < 1 && C.outfile == nil) {
+        printf("usage: cc [-options] files\n");
+        exit(1);
+    }
+
+    src = (argc == 0) ? "<stdin>" : argv[0];
+    intern(&src);
+
+    if ((err = compile(src)), err) {
+        exit(2);
+    }
+
+    exit(0);
 }
diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h
index 3228890..5488f3c 100644
--- a/sys/cmd/cc/cc.h
+++ b/sys/cmd/cc/cc.h
@@ -8,8 +8,11 @@
 /* core types */
 typedef struct Io    Io;
 typedef struct Pos   Pos;
+typedef struct Range Range;
 typedef struct Token Token;
 
+typedef struct Lexer Lexer;
+
 typedef struct Sym   Sym;
 typedef struct Type  Type;
 
@@ -22,33 +25,9 @@ typedef struct Expr  Expr;
 typedef struct SymTab SymTab;
 typedef struct StrTab StrTab;
 
-// -----------------------------------------------------------------------
-// lexing: byte stream -> tokens
-// pre-processor built in
-
-struct Pos
-{
-    int    col;
-    int    line;
-    string path;
-};
-
-#define DIRECTIVES                      \
-    DIRECTIVE(Dpragma,"pragma")         \
-    DIRECTIVE(Dinclude,"include")       \
-    DIRECTIVE(Dif,"if")                 \
-    DIRECTIVE(Ddefine,"define")         \
-    DIRECTIVE(Difdef,"ifdef")           \
-    DIRECTIVE(Difndef,"ifndef")         
-
-#define DIRECTIVE(a, b) a,
-enum { DIRECTIVES };
-#undef DIRECTIVE 
-
-#define DIRECTIVE(a, b) b,
-static byte *directives[] = { DIRECTIVES };
-#undef DIRECTIVE
+typedef struct Compiler Compiler;
 
+/* keywords of language */
 #define KEYWORDS                       \
     KEYWORD(Kauto,"auto")              \
     KEYWORD(Kregister,"register")      \
@@ -88,15 +67,57 @@ static byte *directives[] = { DIRECTIVES };
     KEYWORD(Kalignof,"alignof")        
 
 #define KEYWORD(a, b) a,
-enum { KEYWORDS };
+enum { KEYWORDS NUM_KEYWORDS };
 #undef KEYWORD
 
-#define KEYWORD(a, b) b,
-static byte *keywords[] = { KEYWORDS };
-#undef KEYWORD
+extern byte *keywords[NUM_KEYWORDS];
+
+// -----------------------------------------------------------------------
+// lexing: byte stream -> tokens
+// pre-processor built in
+
+/* source position: error reporting */
+struct Pos
+{
+    int    col;
+    int    line;
+    string path;
+};
+
+
+struct Range
+{
+    Pos beg;
+    Pos end;
+};
+
+void errorat(Pos x, byte *fmt, ...);
+
+/* pre-processor */
+#define DIRECTIVES                          \
+    DIRECTIVE(Dpragma,"pragma", ppprag)     \
+    DIRECTIVE(Dinclude,"include", ppinc)    \
+    DIRECTIVE(Ddefine,"define", ppdef)      \
+    DIRECTIVE(Dundef,"undef", ppund)        \
+    DIRECTIVE(Dif,"if", ppif0)              \
+    DIRECTIVE(Delse, "else", ppif1)         \
+    DIRECTIVE(Difdef,"ifdef", ppif2)        \
+    DIRECTIVE(Difndef,"ifndef", ppif3)      \
+    DIRECTIVE(Dendif,"endif", ppend)        
+
+#define DIRECTIVE(a, b, c) a,
+enum { DIRECTIVES NUM_DIRECTIVES };
+#undef DIRECTIVE 
+
+extern byte *directives[NUM_DIRECTIVES]; 
 
-#undef KEYWORDS
+error  domacro(Lexer*);
+error  dodefine(Lexer *lx, string s);
+int    expandmacro(Lexer *lx, Sym *s, byte *dst);
 
+extern error (*macros[NUM_DIRECTIVES])(Lexer*); 
+
+/* tokenization of byte stream */
 #define TOKENS                   \
     TOK(Anil,"nil")              \
     TOK(Aeof,"eof")              \
@@ -140,8 +161,8 @@ static byte *keywords[] = { KEYWORDS };
     TOK(Arparen,")")             \
     TOK(Albrace,"{")             \
     TOK(Arbrace,"}")             \
-    TOK(Albrkt,"[")              \
-    TOK(Arbrkt,"]")              \
+    TOK(Albrakt,"[")             \
+    TOK(Arbrakt,"]")             \
     TOK(Adot,".")                \
     TOK(Aarrow,"->")             \
     TOK(Aqmark,"?")              \
@@ -154,47 +175,76 @@ static byte *keywords[] = { KEYWORDS };
 enum
 {
     TOKENS
+    NUM_TOKENS,
+
+    Vchar  = iota(8),
+    Vint   = iota(9),
+    Vlong  = iota(10),
+    Vvlong = iota(11),
+    Vusgn  = iota(12),
+    Vfloat = iota(13),
+    Vstr   = iota(14),
 };
 #undef TOK
 
-#define TOK(a, b) b,
-static byte *tokens[] = { TOKENS };
-#undef TOK
-#undef TOKENS
+extern byte *tokens[NUM_TOKENS];
 
 /* TODO: store literals in a big val */
 struct Token
 {
     uint32 kind;
-    struct Pos pos;
+    Range  pos;
     union {
-        string str;
+        byte  *s;
         double f;
         vlong  i;
-    };
+        uvlong ui;
+        byte   c;
+        ubyte  uc;
+    } val;
 };
 
 enum
 {
-    Svar,
-    Sfunc,
-    Smacro,
+    Svar   = 1 << 0,
+    Sfunc  = 1 << 1,
+    Smacro = 1 << 2,
 };
 
 struct Sym
 {
     uint32 kind;
     string name;
+    union {
+        string macro;
+        /*Func   *func;*/
+    };
 };
 
+Sym  *lookup(SymTab *tab, string ident);
+Sym  *define(SymTab *tab, string ident, int kind);
+
 struct Lexer
 {
-    Token   tok;
+    Pos     pos;
     Io     *io;
     SymTab *sym;
-    byte    buf[1024];
+    byte   *b;
+    byte    buf[2*1024];
 };
 
+/* lex.c functions */
+Token lex(Lexer *);
+
+byte  getbyte(Lexer *);
+byte  getnsbyte(Lexer *l);
+rune  getrune(Lexer *);
+byte  ungetbyte(Lexer *);
+rune  ungetrune(Lexer *, rune r);
+
+void  pushio(Lexer *lx, Io *new);
+void  popio(Lexer *lx);
+
 // -----------------------------------------------------------------------
 // parsing & type resolution
 // tokens -> ast
@@ -294,35 +344,66 @@ struct Decl
 // -----------------------------------------------------------------------
 // compiler
 
+enum
+{
+    IOnil  = iota(0),
+    IOonce = iota(1),
+    IOmac  = iota(2),
+};
+
 struct Io
 {
-    io·Buffer  b;
+    io·Buffer  buf;
     string     path;
-    uint32     flag;
+    uint32     kind;
+    union {
+        Stream  *f;
+        byte    *b;
+    };
+
+    Pos        store;
     struct Io *link;
 };
 
+Io*  openio(byte *path);
+Io*  makeio();
+void freeio(Io *io);
+
 struct StrTab
 {
-  int32  n_buckets, size, n_occupied, upper_bound;
+  int32  n_buckets;
+  int32  size;
+  int32  n_occupied; 
+  int32  upper_bound;
   int32  *flags;
   string *keys;
   int32  *vals;
 };
 
-static struct 
+int32  intern(byte **str);
+string internview(byte* beg, byte *end);
+
+/* main data */
+struct Compiler
 {
     mem·Arena *heap;
     StrTab     strs;
 
-    string *include;
+    struct {
+        int     cap;
+        int     len;
+        string *dir;
+    } inc;
+
     Io     *io;
     Io      iostk[100];
-} C;
 
-void   init();
+    string outfile;
 
-int32  intern(byte **str);
-string internview(byte* beg, byte *end);
+    Lexer  lxr;
+}; 
+extern Compiler C;
+
+void   init();
 
 #undef iota
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
index af3bbf3..6b85d8c 100644
--- a/sys/cmd/cc/lex.c
+++ b/sys/cmd/cc/lex.c
@@ -1,38 +1,649 @@
 #include "cc.h"
 
-static
-void
-errorat(Pos x, byte *fmt, ...)
+#include <libn/macro/map.h>
+
+// -----------------------------------------------------------------------
+// simple wrappers
+
+byte
+getbyte(Lexer *l)
 {
-    va_list args;
-    va_start(args, fmt);
-    printf("error %d:", x.line);
-    vprintf(fmt, args);
-    va_end(args);
+    return bufio·getbyte(&l->io->buf);
+}
+
+byte
+getnsbyte(Lexer *l)
+{
+    byte b;
+    while (b = bufio·getbyte(&l->io->buf), isspace(b));
+    return b;
+}
+
+rune
+getrune(Lexer *l)
+{
+    return bufio·getrune(&l->io->buf);
 }
 
-static
 byte
-getbyte(struct Lexer *lex)
+ungetbyte(Lexer *lx)
 {
-    return bufio·getbyte(&lex->buf);
+    byte b;
+    return bufio·ungetbyte(&lx->io->buf, b);
 }
 
+rune
+ungetrune(Lexer *l, rune r)
+{
+    return bufio·ungetrune(&l->io->buf, r);
+}
+
+// -----------------------------------------------------------------------
+// main lexer 
+
+#define TOK(a, b) b,
+byte *tokens[NUM_TOKENS] = { TOKENS };
+#undef TOK
+
+static uint8 Atoi[256] = 
+{
+    ['0'] = 0,  ['1'] = 1,  ['2'] = 2,  ['3'] = 3,  ['4'] = 4,  ['5'] = 5,
+    ['6'] = 6,  ['7'] = 7,  ['8'] = 8,  ['9'] = 9,  ['a'] = 10, ['A'] = 10,
+    ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13,
+    ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15,
+};
+
 static
 error
-ungetbyte(struct Lexer *lex, byte b)
+escape(Lexer *lx, int x, int *flag, vlong *val)
 {
-    return bufio·ungetbyte(&lex->buf, b);
+    int i, u, c;
+    vlong l;
+
+    c = getrune(lx);
+
+    switch (c) {
+    case EOF: 
+        errorat(lx->pos, "EOF in string");
+        return 1;
+    case '\n':
+        errorat(lx->pos, "newline in string");
+        return 1;
+    case '\\':
+        break;
+    default:
+        if (c == x) 
+            return 1;
+        *val = c;
+        return 0;
+    }
+
+    u = 0;
+	c = getrune(lx);
+
+	switch(c) {
+	case 'x':
+		i = 2;
+		*flag = 1;	
+		goto hex;
+
+	case 'u':
+		i = 4;
+		u = 1;
+		goto hex;
+
+	case 'U':
+		i = 8;
+		u = 1;
+		goto hex;
+
+	case '0': case '1': case '2': case '3':
+	case '4': case '5': case '6': case '7':
+		*flag = 1;
+		goto oct;
+
+	case 'a':  c = '\a'; break;
+	case 'b':  c = '\b'; break;
+	case 'f':  c = '\f'; break;
+	case 'n':  c = '\n'; break;
+	case 'r':  c = '\r'; break;
+	case 't':  c = '\t'; break;
+	case 'v':  c = '\v'; break;
+	case '\\': c = '\\'; break;
+
+	default:
+		if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c);
+	}
+	*val = c;
+	return 0;
+
+hex:
+	l = 0;
+	for(; i > 0; i--) {
+		c = getbyte(lx);
+		if (c >= '0' && c <= '9') {
+			l = l*16 + c-'0';
+			continue;
+		}
+		if (c >= 'a' && c <= 'f') {
+			l = l*16 + c-'a' + 10;
+			continue;
+		}
+		if (c >= 'A' && c <= 'F') {
+			l = l*16 + c-'A' + 10;
+			continue;
+		}
+		errorat(lx->pos, "non-hex character in escape sequence: %c", c);
+		ungetbyte(lx);
+		break;
+	}
+	if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) {
+		errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l);
+		l = RuneErr;
+	}
+	*val = l;
+	return 0;
+
+oct:
+	l = c - '0';
+	for (i = 2; i > 0; i--) {
+		c = getbyte(lx);
+		if (c >= '0' && c <= '7') {
+			l = l*8 + c-'0';
+			continue;
+		}
+		errorat(lx->pos, "non-octal character in escape sequence: %c", c);
+		ungetbyte(lx);
+	}
+	if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l);
+
+	*val = l;
+	return 0;
 }
 
-void
-lex(struct Lexer *lex)
+#define CASE1(stmt1, kind1)                                         \
+    case stmt1:                                                     \
+        tok.kind = kind1;                                           \
+    break;
+
+#define CASE2(stmt1, kind1, b1, kind2)                              \
+    case stmt1:                                                     \
+        tok.kind = kind1;                                           \
+        b = getbyte(lx);                                            \
+        if (b == b1)                                                \
+            tok.kind = kind2;                                       \
+        else                                                        \
+            ungetbyte(lx);                                          \
+    break;
+
+#define CASE3(stmt1, kind1, b1, kind2, b2, kind3)                   \
+    case stmt1:                                                     \
+        tok.kind =  kind1;                                          \
+        b = getbyte(lx);                                            \
+        if (b == b1)                                                \
+            tok.kind = kind2;                                       \
+        else if (b == b2)                                           \
+            tok.kind = kind3;                                       \
+        else                                                        \
+            ungetbyte(lx);                                          \
+    break;
+
+#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4)        \
+    case stmt1:                                                     \
+        tok.kind =  kind1;                                          \
+        b = getbyte(lx);                                            \
+        if (b == b1)                                                \
+            tok.kind = kind2;                                       \
+        else if (b == b2)                                           \
+            tok.kind = kind3;                                       \
+        else if (b == b3)                                           \
+            tok.kind = type4;                                       \
+        else                                                        \
+            ungetbyte(lx);                                          \
+    break;
+
+
+Token
+lex(Lexer *lx)
 {
-    int b;
+    int    b, n, f;
+    vlong  v;
+    uint   u;
+    rune   r;
+    string s;
+    double d;
+    byte  *e;
+    Token  tok;
+    Sym   *sym;
+    Io    *io;
+
+GetByte:
+    b = getbyte(lx);
+Dispatch:
+    tok.pos.beg = lx->pos;
+
+    if (b >= RuneSelf || isalpha(b))
+        goto TAlpha;
+    if (isdigit(b))
+        goto TNum;
 
-    b = getbyte(lex);
-TOP:
     switch (b) {
+    case ' ': case '\n': case '\r': case '\t': case '\v': case '\f':
+        while (b = getbyte(lx), isspace(b)) 
+            if (b == '\n') lx->pos.line++;
+        goto Dispatch;
+
+    case '\'':
+        if (escape(lx, '\'', &f, &v)) {
+            errorat(lx->pos, "empty literal or escaped ' in char literal");
+            v = '\'';
+        }
+        if (!escape(lx, '\'', &f, &v)) {
+            errorat(lx->pos, "missing '");
+            ungetbyte(lx);
+        }
+
+        if (v > 0xff) {
+            errorat(lx->pos, "overflowed character literal");
+            v = 0;
+        }
+        tok.kind  = Alit | Vchar;
+        tok.val.c = v;
+    break;
+
+    case '"':
+        s = str·makecap("", 0, 8);
+        for (;;) {
+            if (escape(lx, '"', &f, &v))
+                break;
+
+            if (v < RuneSelf || f)
+                str·appendbyte(&s, v);
+            else {
+                r = v;
+                b = utf8·runelen(r);
+                utf8·runetochar(lx->buf, &r);
+                str·appendlen(&s, b, lx->buf);
+            }
+        }
+        tok.kind  = Alit | Vstr;
+        tok.val.s = s; 
+        intern(&tok.val.s);
+
+        str·free(s);
+    break;
 
+    case '.':
+        tok.kind = Adot;
+        b = getbyte(lx);
+
+        if (isdigit(b)) {
+            // *lx->b++ = b;
+            goto TFlt;
+        } else if (b == '.') {
+            b = getbyte(lx);
+            if (b != '.') {
+                errorat(lx->pos, "invalid token '..'");
+                tok.kind = Aellip;
+                break;
+            }
+        }
+        ungetbyte(lx);
+    break;
+
+    case '<':
+        tok.kind = Alt;
+        b = getbyte(lx);
+
+        if (b == '<') {
+            tok.kind = Alsft;
+            b = getbyte(lx);
+            if (b == '=') 
+                tok.kind = Alsftasn;
+            else 
+                ungetbyte(lx);
+        } else if (b == '=') 
+            tok.kind = Alteq;
+        else 
+            ungetbyte(lx);
+    break;
+
+    case '>':
+        tok.kind = Agt;
+        b = getbyte(lx);
+
+        if (b == '>') {
+            tok.kind = Arsft;
+            b = getbyte(lx);
+            if (b == '=') 
+                tok.kind = Arsftasn;
+            else 
+                ungetbyte(lx);
+        } else if (b == '=') 
+            tok.kind = Agteq;
+        else 
+            ungetbyte(lx);
+    break;
+
+    case '/': 
+        tok.kind = Adiv;
+        b = getbyte(lx);
+
+        if (b == '=') 
+            tok.kind = Adivasn;
+        else if (b == '/') { 
+            while (b != EOF && b != '\n') 
+                b = getbyte(lx);
+            lx->pos.line++;
+            goto Dispatch;
+        } else if (b == '*') {
+            int level = 1;
+            b = getbyte(lx);
+            while (b != EOF && level > 0) {
+                if (b == '/') {
+                    b = getbyte(lx);
+                    if (b == '*') 
+                        level++;
+                } else if (b == '*') {
+                    b = getbyte(lx);
+                    if (b == '/') 
+                        level--;
+                } 
+                if (b == '\n') lx->pos.line++;
+                b = getbyte(lx);
+            }
+            goto Dispatch;
+        } else 
+            ungetbyte(lx);
+    break;
+
+    case '#':
+        if (domacro(lx)) {
+            tok.kind = Anil;
+            errorat(lx->pos, "failed to perform preprocessor directive");
+            return tok;
+        }
+        goto GetByte;
+    break;
+
+    case EOF:
+        panicf("need to implement popio");
+
+    CASE1('(', Alparen)
+    CASE1(')', Arparen)
+    CASE1('{', Albrace)
+    CASE1('}', Arbrace)
+    CASE1('[', Albrakt)
+    CASE1(']', Arbrakt)
+    CASE1(',', Acomma)
+    CASE1('?', Aqmark)
+    CASE1(';', Asemi)
+    CASE1('~', Aneg)
+    CASE1(':', Acolon)
+    CASE2('^', Axor, '=', Axorasn)
+    CASE2('!', Anot, '=', Aneq)
+    CASE2('*', Astar,'=', Amulasn)
+    CASE2('=', Aasn, '=', Aeq)
+    CASE2('%', Amod, '=', Amodasn)
+    CASE3('+', Aadd, '=', Aaddasn, '+', Ainc)
+    CASE3('&', Aand, '=', Aandasn, '&', Aandand)
+    CASE3('|', Aor,  '=', Aorasn,  '|', Aoror)
+    CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow)
+
+    default:
+        tok.kind = Anil;
+        errorat(lx->pos, "invalid token, crashing");
+        abort();
     }
-} 
+
+    goto Return;
+
+    TNum:
+        e = lx->buf + arrlen(lx->buf);
+        do {
+            if (lx->b >= e) {
+                errorat(lx->pos, "number overflows lexer buffer");
+                goto Nospace;
+            }
+            *lx->b++ = b;
+        } while (b = getbyte(lx), isdigit(b) || b == '_');
+        
+        if (b == '.' || tolower(b) == 'e')
+            goto TFlt;
+    TInt:
+        r = b;
+        n = 10;
+        s = lx->buf;
+        if (*s == '0') {
+            b = *++s;
+            switch (b) {
+            case 'x': n = 16; break;
+            case 'b': n = 2;  break;
+            case 'o': n = 8;  break;
+            default: --s;
+            }
+            if (s >= e) {
+                errorat(lx->pos, "number overflows lexer buffer");
+                goto Nospace;
+            }
+        }
+
+        v = 0;
+        for (; s != lx->b ; s++) {
+            b = *s;
+            if (b == '_') continue;
+
+            f = Atoi[b];
+            if (f == 0 && b != '0') 
+                break;
+
+            if (f >= n) {
+                errorat(lx->pos, "digit '%c' out of range for base %d", b, n);
+                f = 0;
+            }
+
+            if (v > (UINT64_MAX - f) / n) {
+                errorat(lx->pos, "integer literal overflow");
+                v = 0;
+                break;
+            }
+
+            v = v * n + f;
+        }
+        b = r;
+        tok.kind  = Alit | Vint;
+        tok.val.i = v;
+        /* TODO: Suffixes!
+        if (tolower(b) == 'u') {
+            tok.kind |= Vusgn;
+            b = getbyte(lx);
+        }
+        */
+    goto Return;
+
+    TFlt:
+        if (b == '.') {
+            *lx->b++ = b;
+            b = getbyte(lx);
+        }
+
+        while (isdigit(b)) {
+            *lx->b++ = b;
+
+            if (lx->b >= e) {
+                errorat(lx->pos, "number overflows lexer buffer");
+                goto Nospace;
+            }
+        }
+
+        if (tolower(b) == 'e') {
+            b = getbyte(lx);
+            if (b == '-' || b == '+')
+                b = getbyte(lx);
+
+            if (!isdigit(b)) 
+                errorat(lx->pos, "expected number after exponent, found %c", b);
+
+            do {
+                *lx->b++ = b;
+            } while (b = getbyte(lx), isdigit(b));
+        }
+        *lx->b = '\0';
+        d      = strtod(lx->buf, nil);
+
+        tok.kind  = Alit | Vfloat;
+        tok.val.f = d;
+
+    goto Return;
+
+    TAlpha:
+        u = b; 
+        s = lx->buf;
+        e = lx->buf + arrlen(lx->buf);
+        for (;;) {
+            if (s >= e) {
+                errorat(lx->pos, "identifier too long for buffer: %s", s);
+                goto Nospace;
+            }
+            if (u >= RuneSelf) {
+                ungetbyte(lx);
+                r = getrune(lx);
+                if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) {
+                    errorat(lx->pos, "invalid identifier character %d", r);
+                }
+                s += utf8·runetochar(s, &r);
+            } else if (!isalnum(u) && u != '_') 
+                break;
+            else 
+                *s++ = u;
+            u = getbyte(lx);
+        }
+        *s = '\0';
+        tok.kind  = Aident;
+        tok.val.s = lx->buf;
+
+        n = intern(&tok.val.s);
+        if (n < arrlen(keywords)) {
+            tok.kind = Akeywd;
+        } 
+
+        sym = lookup(lx->sym, tok.val.s);
+        if (sym) {
+            io = makeio();
+            io->buf.end += expandmacro(lx, sym, io->b);
+            pushio(lx, io);
+            goto GetByte;
+        }
+
+Return:
+    lx->b       = lx->buf;
+    tok.pos.end = lx->pos;
+    return tok;
+
+Nospace:
+    panicf("aborting compilation");
+}
+
+#undef CASE4
+#undef CASE3
+#undef CASE2
+#undef CASE1
+
+// -----------------------------------------------------------------------
+// push/pop io objects
+
+void
+pushio(Lexer *lx, Io *new)
+{
+    new->link     = lx->io;
+    lx->io->store = lx->pos;
+    lx->io        = new;
+
+    lx->pos = (Pos){
+        .line = 0,
+        .col  = 0,
+        .path = new->path,
+    };
+}
+
+void
+popio(Lexer *lx)
+{
+    Io *prev;
+
+    prev = lx->io->link;
+    if (!prev) {
+        panicf("no buffer left");
+    }
+
+    lx->pos = prev->store;
+    lx->io  = prev;
+}
+
+// -----------------------------------------------------------------------
+// symbol tables
+
+#define PTR_HASH(p)        (uintptr)(p)
+#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
+
+struct SymTab
+{
+    MAP_STRUCT_BODY(string, Sym*);
+};
+
+Sym*
+lookup(SymTab *tab, string ident)
+{
+    int idx;
+    MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
+
+    if (idx < tab->n_buckets)
+        return tab->vals[idx];
+
+    return nil;
+}
+
+static
+int
+moresymtab(SymTab *tab, int n)
+{
+    MAP_GROW(tab, string, Sym*, n, PTR_HASH, mem·sys.alloc, mem·sys.free, nil);
+}
+
+static
+int
+putsym(SymTab *tab, Sym *sym, error *err)
+{
+    MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
+}
+
+Sym*
+define(SymTab *tab, string name, int kind)
+{
+    Sym *sym;
+    error err;
+
+    sym = mem·arenaalloc(C.heap, 1, sizeof(*sym));
+    sym->name = name;
+    sym->kind = kind;
+
+    putsym(tab, sym, &err);
+
+    return sym;
+}
+
+// -----------------------------------------------------------------------
+// error reporting
+
+void
+errorat(Pos x, byte *fmt, ...)
+{
+    va_list args;
+    va_start(args, fmt);
+
+    printf("error %d: ", x.line);
+
+    vprintf(fmt, args);
+    va_end(args);
+}
+
diff --git a/sys/cmd/cc/rules.mk b/sys/cmd/cc/rules.mk
index fe30305..b32d5b6 100644
--- a/sys/cmd/cc/rules.mk
+++ b/sys/cmd/cc/rules.mk
@@ -3,13 +3,13 @@ include share/push.mk
 
 # Local sources
 SRCS_$(d) := \
+	$(d)/pp.c  \
 	$(d)/lex.c \
-	$(d)/sym.c \
 	$(d)/cc.c
 
 LIBS_$(d) :=
 BINS_$(d) := $(d)/cc
-TSTS_$(d) := 
+UNTS_$(d) := 
 
 include share/paths.mk
 
diff --git a/sys/cmd/cc/sym.c b/sys/cmd/cc/sym.c
deleted file mode 100644
index ef40bce..0000000
--- a/sys/cmd/cc/sym.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "cc.h"
-
-#include <libn/macro/map.h>
-
-#define PTR_HASH(p)        (uintptr)(p)
-#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
-
-#if 0
-struct SymTab
-{
-    MAP_STRUCT_BODY(string, Sym*);
-};
-
-Sym*
-getsym(SymTab *tab, string key)
-{
-    MAP_GET(tab, key, PTR_HASH, PTR_EQUAL, nil);
-}
-
-static
-int
-moresymtab(SymTab *tab, int n)
-{
-    MAP_GROW(tab, string, Sym*, n, PTR_HASH);
-}
-
-int
-putsym(SymTab *tab, Sym *sym, error *err)
-{
-    MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
-}
-#endif
diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c
index f9be2d2..da94ef2 100644
--- a/sys/libbio/io/newick.c
+++ b/sys/libbio/io/newick.c
@@ -236,7 +236,7 @@ parse(struct Parser *p)
                     errorf("incorrect format: unmatched comment bracket '['");
                     goto ERROR;
                 }
-                str·append(node->comment, tokstr(tok));
+                str·append(&node->comment, tokstr(tok));
             }
         break;
 
diff --git a/sys/libn/memory.c b/sys/libn/memory.c
index 4cf92b2..dce0c36 100644
--- a/sys/libn/memory.c
+++ b/sys/libn/memory.c
@@ -164,3 +164,8 @@ memset64(void *dst, uint64 val, uintptr size)
         ((byte*)dst)[i] = ((byte*)&val)[i&7];
     }
 }
+
+// -------------------------------------------------------------------------
+// First argument
+
+char *argv0;
diff --git a/sys/libn/string.c b/sys/libn/string.c
index 694cdea..fb92a04 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -150,6 +150,34 @@ utf8·findrune(byte* s, long c)
     return nil;
 }
 
+byte*
+utf8·findrrune(byte* s, long c)
+{
+    long c1;
+    rune r;
+    byte *l;
+
+    if (c < RuneSync) 
+        return strrchr(s, c);
+
+    l = nil;
+    for (;;) {
+        c1 = *(ubyte*)s;
+        if (c1 < RuneSelf) {
+            if (c1 == 0) return l;
+            if (c1 == c) l = s;
+            s++;
+            continue;
+        }
+        c1 = utf8·chartorune(&r, s);
+        if (r == c) 
+            l = s;
+        s += c1;
+    }
+
+    return nil;
+}
+
 #undef Bit
 #undef Tbyte
 #undef RuneX
@@ -319,7 +347,7 @@ str·fit(string *s)
 // string to our buffer. The result is reallocated if not enough room is present
 // in the buffer.
 void
-str·appendcount(string *s, vlong n, const byte* b)
+str·appendlen(string *s, vlong n, const byte* b)
 {
     vlong bl = strlen(b);
     if (n > bl) panicf("attempted to make a substring longer than string");
@@ -339,7 +367,7 @@ str·appendcount(string *s, vlong n, const byte* b)
 void
 str·append(string *s, const byte* b)
 {
-    return str·appendcount(s, strlen(b), b);
+    return str·appendlen(s, strlen(b), b);
 }
 
 // AppendByte will append the given byte to our string.
@@ -358,24 +386,11 @@ str·appendbyte(string *s, const byte b)
     *s[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..?
 }
 
-// Equals returns true if string s and t are equivalent.
-bool
-str·equals(const string s, const string t)
-{
-    vlong sL = str·len(s);
-    vlong tL = str·len(t);
-    if (sL != tL) return false;
-
-    return memcmp(s, t, sL) == 0;
-}
-
-//------------------------------------------------------------------------
-// Utility Methods 
-
 /*
  * Appendf will append the given formatted string to our buffer.
  * Returns the newly minted string
  */
+
 void
 str·appendf(string *s, const byte* fmt, ...)
 {
@@ -399,6 +414,31 @@ str·appendf(string *s, const byte* fmt, ...)
     h->len += n;
 }
 
+// Equals returns true if string s and t are equivalent.
+bool
+str·equals(const string s, const string t)
+{
+    vlong sL = str·len(s);
+    vlong tL = str·len(t);
+    if (sL != tL) return false;
+
+    return memcmp(s, t, sL) == 0;
+}
+
+//------------------------------------------------------------------------
+// Utility Methods 
+
+int
+str·read(string s, int size, int n, void *buf)
+{
+    int len;
+
+    len = MIN(n * size, str·len(s));
+    memcpy(buf, s, len);
+
+    return len;
+}
+
 // Find will find the first occurence of
 // substr in the string Returns -1 if nothing was found.
 int
@@ -502,7 +542,7 @@ str·join(vlong len, byte** fields, const byte* sep)
     for (j = 0; j < len; j++) {
         str·append(&s, fields[j]);
         if (j < len - 1) 
-            str·appendcount(&s, 1, sep);
+            str·appendlen(&s, 1, sep);
     }
 
     return s;
author	Nicholas Noll <nbnoll@eml.cc>	2020-05-17 16:19:17 -0700
committer	Nicholas Noll <nbnoll@eml.cc>	2020-05-17 16:19:17 -0700
commit	9ec5bed6a7d715ffa69851569485a685dd69db2e (patch)
tree	c25eaef1000f52caacf25ee398e54f63067b8feb /sys
parent	c92c89280d036240a75ff122084dd58cde473394 (diff)