aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
authorNicholas Noll <nbnoll@eml.cc>2020-05-17 16:19:17 -0700
committerNicholas Noll <nbnoll@eml.cc>2020-05-17 16:19:17 -0700
commit9ec5bed6a7d715ffa69851569485a685dd69db2e (patch)
treec25eaef1000f52caacf25ee398e54f63067b8feb /sys
parentc92c89280d036240a75ff122084dd58cde473394 (diff)
prototype of front end cli
Diffstat (limited to 'sys')
-rw-r--r--sys/cmd/cc/cc.c237
-rw-r--r--sys/cmd/cc/cc.h189
-rw-r--r--sys/cmd/cc/lex.c649
-rw-r--r--sys/cmd/cc/rules.mk4
-rw-r--r--sys/cmd/cc/sym.c32
-rw-r--r--sys/libbio/io/newick.c2
-rw-r--r--sys/libn/memory.c5
-rw-r--r--sys/libn/string.c74
8 files changed, 1060 insertions, 132 deletions
diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c
index cddea01..39ad5f2 100644
--- a/sys/cmd/cc/cc.c
+++ b/sys/cmd/cc/cc.c
@@ -1,6 +1,9 @@
#include "cc.h"
#include <libn/macro/map.h>
+// -----------------------------------------------------------------------
+// string interning
+
/* jenkins' one at a time hash */
static
int32
@@ -24,8 +27,22 @@ hash_string(byte* s)
return h;
}
+static
+int
+streq(byte *s, byte *t)
+{
+ if (s == nil) {
+ if (t == nil)
+ return 1;
+ else
+ return 0;
+ }
+
+ return (t == nil) ? 0 : strcmp(s, t) == 0;
+}
+
#define HASH(s) hash_string(s)
-#define EQUAL(s, t) (strcmp(s, t) == 0)
+#define EQUAL(s, t) (streq(s, t))
static
int
getstr(string key, int *ok)
@@ -74,20 +91,226 @@ END:
return C.strs.vals[i];
}
+// -----------------------------------------------------------------------
+// io buffer management
+
+#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x}
+// path should be absolute
+Io*
+openio(byte *path)
+{
+ Io *it;
+ Stream *f;
+
+ intern(&path);
+
+ // See if we have already opened file;
+ // If so, and it hasn't been flagged return it
+ for (it = C.iostk; it != C.io + 1; ++it) {
+ if ((uintptr)it->path == (uintptr)path) {
+ if (it->kind & IOonce) {
+ return nil;
+ }
+ return it;
+ }
+ }
+
+ if ((C.io - C.iostk) >= arrlen(C.iostk)-1)
+ panicf("out of I/O space!");
+
+ C.io->f = io·open(path, "r");
+ C.io->path = path;
+ bufio·initreader(&C.io->buf, asrdr(io·read), C.io->f);
+
+ return C.io++;
+}
+
+Io*
+makeio()
+{
+ if ((C.io - C.iostk) >= arrlen(C.iostk)-1)
+ panicf("out of I/O space!");
+
+ C.io->path = "<buffer>";
+ C.io->buf = (io·Buffer) {
+ .state = bufio·rdr | bufio·end,
+ .runesize = 0,
+ .h = nil,
+ .size = bufio·size,
+ .beg = C.io->buf.buf + bufio·ungets,
+ .pos = C.io->buf.buf + bufio·ungets,
+ .end = C.io->buf.buf + bufio·ungets,
+ };
+ C.io->b = C.io->buf.buf;
+
+ return C.io++;
+}
+#undef asrdr
+
+// TODO: Think about if this is always at the _end_ of the stack.
+// Right now we don't have access to it.
+void
+freeio(Io *io)
+{
+ if (io->kind & ~IOmac) {
+ free(io->b);
+ } else {
+ io·close(io->f);
+ }
+ io->link = nil;
+ io->path = nil;
+ io->store = (Pos){ 0 };
+}
+
+// -----------------------------------------------------------------------
+// universal compiler builtins
+
+#define KEYWORD(a, b) b,
+byte *keywords[NUM_KEYWORDS] = { KEYWORDS };
+#undef KEYWORD
+
+#define DIRECTIVE(a, b, c) b,
+byte *directives[NUM_DIRECTIVES] = { DIRECTIVES };
+#undef DIRECTIVE
+
+struct Compiler C = { 0 };
+
+// -----------------------------------------------------------------------
+// flag handlers
+
+void
+pushinclude(byte *dirs)
+{
+ string d, s, *it, *end;
+
+ while (*dirs != 0) {
+ d = strchr(dirs, ' ');
+ if (d != nil)
+ *d = '\0';
+
+ s = d;
+ intern(&s);
+ for (it = C.inc.dir, end = it + C.inc.len; it != end; ++it) {
+ if ((uintptr)s == (uintptr)(*it))
+ goto Nextdir;
+ }
+
+ if (C.inc.len == C.inc.cap) {
+ C.inc.cap += 20;
+ C.inc.dir = realloc(C.inc.dir, C.inc.cap*sizeof(*C.inc.dir));
+ C.inc.dir[C.inc.len++] = s;
+ }
+
+Nextdir:
+ if (d == nil)
+ break;
+ dirs = d + 1;
+ }
+
+}
+
+// -----------------------------------------------------------------------
+// main point of entry
+
void
-init()
+init(void)
{
- int i, n;
+ int i;
for (i = 0; i < arrlen(keywords); i++) {
intern(&keywords[i]);
- printf("keyword %d: %s", i, keywords[i]);
}
+
+ for (i = 0; i < arrlen(directives); i++) {
+ intern(&directives[i]);
+ }
+
+ C.heap = mem·makearena(mem·sys, nil);
+
+ C.inc.len = 0;
+ C.inc.cap = 100;
+ C.inc.dir = calloc(C.inc.cap, sizeof(*C.inc.dir));
+ C.inc.dir[C.inc.len++] = ".";
+
+ C.outfile = nil;
+ C.lxr = (Lexer){ 0 };
}
-int
-main()
+error
+compile(byte *path)
+{
+ Io *io;
+ Token tok;
+ byte *p, file[400];
+
+ strcpy(file, path);
+ p = utf8·findrrune(file, '/');
+ if (p)
+ *p++ = '\0';
+ else
+ p = file;
+
+ if (!C.outfile) {
+ C.outfile = p;
+ if (C.outfile) {
+ if ((p = utf8·findrrune(C.outfile, '.'))) {
+ p[0] = '.';
+ p[1] = 'o';
+ p[2] = '\0';
+ }
+ } else {
+ C.outfile = "/dev/null";
+ }
+ }
+
+ C.lxr.io = openio(file);
+ while (tok = lex(&C.lxr), tok.kind > Aeof) {
+ ;
+ }
+ freeio(C.lxr.io);
+
+ return tok.kind != Anil;
+}
+
+error
+main(int argc, byte *argv[])
{
+ byte *a, *src;
+ int err;
+
init();
- return 0;
+
+ ARGBEGIN {
+ case 'o':
+ C.outfile = ARGF();
+ break;
+
+ case 'D':
+ a = ARGF();
+ if (a) {
+ intern(&a);
+ dodefine(&C.lxr, a);
+ }
+ break;
+
+ case 'I':
+ a = ARGF();
+ if (a)
+ pushinclude(a);
+ break;
+ } ARGEND
+
+ if (argc < 1 && C.outfile == nil) {
+ printf("usage: cc [-options] files\n");
+ exit(1);
+ }
+
+ src = (argc == 0) ? "<stdin>" : argv[0];
+ intern(&src);
+
+ if ((err = compile(src)), err) {
+ exit(2);
+ }
+
+ exit(0);
}
diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h
index 3228890..5488f3c 100644
--- a/sys/cmd/cc/cc.h
+++ b/sys/cmd/cc/cc.h
@@ -8,8 +8,11 @@
/* core types */
typedef struct Io Io;
typedef struct Pos Pos;
+typedef struct Range Range;
typedef struct Token Token;
+typedef struct Lexer Lexer;
+
typedef struct Sym Sym;
typedef struct Type Type;
@@ -22,33 +25,9 @@ typedef struct Expr Expr;
typedef struct SymTab SymTab;
typedef struct StrTab StrTab;
-// -----------------------------------------------------------------------
-// lexing: byte stream -> tokens
-// pre-processor built in
-
-struct Pos
-{
- int col;
- int line;
- string path;
-};
-
-#define DIRECTIVES \
- DIRECTIVE(Dpragma,"pragma") \
- DIRECTIVE(Dinclude,"include") \
- DIRECTIVE(Dif,"if") \
- DIRECTIVE(Ddefine,"define") \
- DIRECTIVE(Difdef,"ifdef") \
- DIRECTIVE(Difndef,"ifndef")
-
-#define DIRECTIVE(a, b) a,
-enum { DIRECTIVES };
-#undef DIRECTIVE
-
-#define DIRECTIVE(a, b) b,
-static byte *directives[] = { DIRECTIVES };
-#undef DIRECTIVE
+typedef struct Compiler Compiler;
+/* keywords of language */
#define KEYWORDS \
KEYWORD(Kauto,"auto") \
KEYWORD(Kregister,"register") \
@@ -88,15 +67,57 @@ static byte *directives[] = { DIRECTIVES };
KEYWORD(Kalignof,"alignof")
#define KEYWORD(a, b) a,
-enum { KEYWORDS };
+enum { KEYWORDS NUM_KEYWORDS };
#undef KEYWORD
-#define KEYWORD(a, b) b,
-static byte *keywords[] = { KEYWORDS };
-#undef KEYWORD
+extern byte *keywords[NUM_KEYWORDS];
+
+// -----------------------------------------------------------------------
+// lexing: byte stream -> tokens
+// pre-processor built in
+
+/* source position: error reporting */
+struct Pos
+{
+ int col;
+ int line;
+ string path;
+};
+
+
+struct Range
+{
+ Pos beg;
+ Pos end;
+};
+
+void errorat(Pos x, byte *fmt, ...);
+
+/* pre-processor */
+#define DIRECTIVES \
+ DIRECTIVE(Dpragma,"pragma", ppprag) \
+ DIRECTIVE(Dinclude,"include", ppinc) \
+ DIRECTIVE(Ddefine,"define", ppdef) \
+ DIRECTIVE(Dundef,"undef", ppund) \
+ DIRECTIVE(Dif,"if", ppif0) \
+ DIRECTIVE(Delse, "else", ppif1) \
+ DIRECTIVE(Difdef,"ifdef", ppif2) \
+ DIRECTIVE(Difndef,"ifndef", ppif3) \
+ DIRECTIVE(Dendif,"endif", ppend)
+
+#define DIRECTIVE(a, b, c) a,
+enum { DIRECTIVES NUM_DIRECTIVES };
+#undef DIRECTIVE
+
+extern byte *directives[NUM_DIRECTIVES];
-#undef KEYWORDS
+error domacro(Lexer*);
+error dodefine(Lexer *lx, string s);
+int expandmacro(Lexer *lx, Sym *s, byte *dst);
+extern error (*macros[NUM_DIRECTIVES])(Lexer*);
+
+/* tokenization of byte stream */
#define TOKENS \
TOK(Anil,"nil") \
TOK(Aeof,"eof") \
@@ -140,8 +161,8 @@ static byte *keywords[] = { KEYWORDS };
TOK(Arparen,")") \
TOK(Albrace,"{") \
TOK(Arbrace,"}") \
- TOK(Albrkt,"[") \
- TOK(Arbrkt,"]") \
+ TOK(Albrakt,"[") \
+ TOK(Arbrakt,"]") \
TOK(Adot,".") \
TOK(Aarrow,"->") \
TOK(Aqmark,"?") \
@@ -154,47 +175,76 @@ static byte *keywords[] = { KEYWORDS };
enum
{
TOKENS
+ NUM_TOKENS,
+
+ Vchar = iota(8),
+ Vint = iota(9),
+ Vlong = iota(10),
+ Vvlong = iota(11),
+ Vusgn = iota(12),
+ Vfloat = iota(13),
+ Vstr = iota(14),
};
#undef TOK
-#define TOK(a, b) b,
-static byte *tokens[] = { TOKENS };
-#undef TOK
-#undef TOKENS
+extern byte *tokens[NUM_TOKENS];
/* TODO: store literals in a big val */
struct Token
{
uint32 kind;
- struct Pos pos;
+ Range pos;
union {
- string str;
+ byte *s;
double f;
vlong i;
- };
+ uvlong ui;
+ byte c;
+ ubyte uc;
+ } val;
};
enum
{
- Svar,
- Sfunc,
- Smacro,
+ Svar = 1 << 0,
+ Sfunc = 1 << 1,
+ Smacro = 1 << 2,
};
struct Sym
{
uint32 kind;
string name;
+ union {
+ string macro;
+ /*Func *func;*/
+ };
};
+Sym *lookup(SymTab *tab, string ident);
+Sym *define(SymTab *tab, string ident, int kind);
+
struct Lexer
{
- Token tok;
+ Pos pos;
Io *io;
SymTab *sym;
- byte buf[1024];
+ byte *b;
+ byte buf[2*1024];
};
+/* lex.c functions */
+Token lex(Lexer *);
+
+byte getbyte(Lexer *);
+byte getnsbyte(Lexer *l);
+rune getrune(Lexer *);
+byte ungetbyte(Lexer *);
+rune ungetrune(Lexer *, rune r);
+
+void pushio(Lexer *lx, Io *new);
+void popio(Lexer *lx);
+
// -----------------------------------------------------------------------
// parsing & type resolution
// tokens -> ast
@@ -294,35 +344,66 @@ struct Decl
// -----------------------------------------------------------------------
// compiler
+enum
+{
+ IOnil = iota(0),
+ IOonce = iota(1),
+ IOmac = iota(2),
+};
+
struct Io
{
- io·Buffer b;
+ io·Buffer buf;
string path;
- uint32 flag;
+ uint32 kind;
+ union {
+ Stream *f;
+ byte *b;
+ };
+
+ Pos store;
struct Io *link;
};
+Io* openio(byte *path);
+Io* makeio();
+void freeio(Io *io);
+
struct StrTab
{
- int32 n_buckets, size, n_occupied, upper_bound;
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
int32 *flags;
string *keys;
int32 *vals;
};
-static struct
+int32 intern(byte **str);
+string internview(byte* beg, byte *end);
+
+/* main data */
+struct Compiler
{
mem·Arena *heap;
StrTab strs;
- string *include;
+ struct {
+ int cap;
+ int len;
+ string *dir;
+ } inc;
+
Io *io;
Io iostk[100];
-} C;
-void init();
+ string outfile;
-int32 intern(byte **str);
-string internview(byte* beg, byte *end);
+ Lexer lxr;
+};
+extern Compiler C;
+
+void init();
#undef iota
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
index af3bbf3..6b85d8c 100644
--- a/sys/cmd/cc/lex.c
+++ b/sys/cmd/cc/lex.c
@@ -1,38 +1,649 @@
#include "cc.h"
-static
-void
-errorat(Pos x, byte *fmt, ...)
+#include <libn/macro/map.h>
+
+// -----------------------------------------------------------------------
+// simple wrappers
+
+byte
+getbyte(Lexer *l)
{
- va_list args;
- va_start(args, fmt);
- printf("error %d:", x.line);
- vprintf(fmt, args);
- va_end(args);
+ return bufio·getbyte(&l->io->buf);
+}
+
+byte
+getnsbyte(Lexer *l)
+{
+ byte b;
+ while (b = bufio·getbyte(&l->io->buf), isspace(b));
+ return b;
+}
+
+rune
+getrune(Lexer *l)
+{
+ return bufio·getrune(&l->io->buf);
}
-static
byte
-getbyte(struct Lexer *lex)
+ungetbyte(Lexer *lx)
{
- return bufio·getbyte(&lex->buf);
+ byte b;
+ return bufio·ungetbyte(&lx->io->buf, b);
}
+rune
+ungetrune(Lexer *l, rune r)
+{
+ return bufio·ungetrune(&l->io->buf, r);
+}
+
+// -----------------------------------------------------------------------
+// main lexer
+
+#define TOK(a, b) b,
+byte *tokens[NUM_TOKENS] = { TOKENS };
+#undef TOK
+
+static uint8 Atoi[256] =
+{
+ ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5,
+ ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10,
+ ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13,
+ ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15,
+};
+
static
error
-ungetbyte(struct Lexer *lex, byte b)
+escape(Lexer *lx, int x, int *flag, vlong *val)
{
- return bufio·ungetbyte(&lex->buf, b);
+ int i, u, c;
+ vlong l;
+
+ c = getrune(lx);
+
+ switch (c) {
+ case EOF:
+ errorat(lx->pos, "EOF in string");
+ return 1;
+ case '\n':
+ errorat(lx->pos, "newline in string");
+ return 1;
+ case '\\':
+ break;
+ default:
+ if (c == x)
+ return 1;
+ *val = c;
+ return 0;
+ }
+
+ u = 0;
+ c = getrune(lx);
+
+ switch(c) {
+ case 'x':
+ i = 2;
+ *flag = 1;
+ goto hex;
+
+ case 'u':
+ i = 4;
+ u = 1;
+ goto hex;
+
+ case 'U':
+ i = 8;
+ u = 1;
+ goto hex;
+
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ *flag = 1;
+ goto oct;
+
+ case 'a': c = '\a'; break;
+ case 'b': c = '\b'; break;
+ case 'f': c = '\f'; break;
+ case 'n': c = '\n'; break;
+ case 'r': c = '\r'; break;
+ case 't': c = '\t'; break;
+ case 'v': c = '\v'; break;
+ case '\\': c = '\\'; break;
+
+ default:
+ if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c);
+ }
+ *val = c;
+ return 0;
+
+hex:
+ l = 0;
+ for(; i > 0; i--) {
+ c = getbyte(lx);
+ if (c >= '0' && c <= '9') {
+ l = l*16 + c-'0';
+ continue;
+ }
+ if (c >= 'a' && c <= 'f') {
+ l = l*16 + c-'a' + 10;
+ continue;
+ }
+ if (c >= 'A' && c <= 'F') {
+ l = l*16 + c-'A' + 10;
+ continue;
+ }
+ errorat(lx->pos, "non-hex character in escape sequence: %c", c);
+ ungetbyte(lx);
+ break;
+ }
+ if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) {
+ errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l);
+ l = RuneErr;
+ }
+ *val = l;
+ return 0;
+
+oct:
+ l = c - '0';
+ for (i = 2; i > 0; i--) {
+ c = getbyte(lx);
+ if (c >= '0' && c <= '7') {
+ l = l*8 + c-'0';
+ continue;
+ }
+ errorat(lx->pos, "non-octal character in escape sequence: %c", c);
+ ungetbyte(lx);
+ }
+ if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l);
+
+ *val = l;
+ return 0;
}
-void
-lex(struct Lexer *lex)
+#define CASE1(stmt1, kind1) \
+ case stmt1: \
+ tok.kind = kind1; \
+ break;
+
+#define CASE2(stmt1, kind1, b1, kind2) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else \
+ ungetbyte(lx); \
+ break;
+
+#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else if (b == b2) \
+ tok.kind = kind3; \
+ else \
+ ungetbyte(lx); \
+ break;
+
+#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \
+ case stmt1: \
+ tok.kind = kind1; \
+ b = getbyte(lx); \
+ if (b == b1) \
+ tok.kind = kind2; \
+ else if (b == b2) \
+ tok.kind = kind3; \
+ else if (b == b3) \
+ tok.kind = type4; \
+ else \
+ ungetbyte(lx); \
+ break;
+
+
+Token
+lex(Lexer *lx)
{
- int b;
+ int b, n, f;
+ vlong v;
+ uint u;
+ rune r;
+ string s;
+ double d;
+ byte *e;
+ Token tok;
+ Sym *sym;
+ Io *io;
+
+GetByte:
+ b = getbyte(lx);
+Dispatch:
+ tok.pos.beg = lx->pos;
+
+ if (b >= RuneSelf || isalpha(b))
+ goto TAlpha;
+ if (isdigit(b))
+ goto TNum;
- b = getbyte(lex);
-TOP:
switch (b) {
+ case ' ': case '\n': case '\r': case '\t': case '\v': case '\f':
+ while (b = getbyte(lx), isspace(b))
+ if (b == '\n') lx->pos.line++;
+ goto Dispatch;
+
+ case '\'':
+ if (escape(lx, '\'', &f, &v)) {
+ errorat(lx->pos, "empty literal or escaped ' in char literal");
+ v = '\'';
+ }
+ if (!escape(lx, '\'', &f, &v)) {
+ errorat(lx->pos, "missing '");
+ ungetbyte(lx);
+ }
+
+ if (v > 0xff) {
+ errorat(lx->pos, "overflowed character literal");
+ v = 0;
+ }
+ tok.kind = Alit | Vchar;
+ tok.val.c = v;
+ break;
+
+ case '"':
+ s = str·makecap("", 0, 8);
+ for (;;) {
+ if (escape(lx, '"', &f, &v))
+ break;
+
+ if (v < RuneSelf || f)
+ str·appendbyte(&s, v);
+ else {
+ r = v;
+ b = utf8·runelen(r);
+ utf8·runetochar(lx->buf, &r);
+ str·appendlen(&s, b, lx->buf);
+ }
+ }
+ tok.kind = Alit | Vstr;
+ tok.val.s = s;
+ intern(&tok.val.s);
+
+ str·free(s);
+ break;
+ case '.':
+ tok.kind = Adot;
+ b = getbyte(lx);
+
+ if (isdigit(b)) {
+ // *lx->b++ = b;
+ goto TFlt;
+ } else if (b == '.') {
+ b = getbyte(lx);
+ if (b != '.') {
+ errorat(lx->pos, "invalid token '..'");
+ tok.kind = Aellip;
+ break;
+ }
+ }
+ ungetbyte(lx);
+ break;
+
+ case '<':
+ tok.kind = Alt;
+ b = getbyte(lx);
+
+ if (b == '<') {
+ tok.kind = Alsft;
+ b = getbyte(lx);
+ if (b == '=')
+ tok.kind = Alsftasn;
+ else
+ ungetbyte(lx);
+ } else if (b == '=')
+ tok.kind = Alteq;
+ else
+ ungetbyte(lx);
+ break;
+
+ case '>':
+ tok.kind = Agt;
+ b = getbyte(lx);
+
+ if (b == '>') {
+ tok.kind = Arsft;
+ b = getbyte(lx);
+ if (b == '=')
+ tok.kind = Arsftasn;
+ else
+ ungetbyte(lx);
+ } else if (b == '=')
+ tok.kind = Agteq;
+ else
+ ungetbyte(lx);
+ break;
+
+ case '/':
+ tok.kind = Adiv;
+ b = getbyte(lx);
+
+ if (b == '=')
+ tok.kind = Adivasn;
+ else if (b == '/') {
+ while (b != EOF && b != '\n')
+ b = getbyte(lx);
+ lx->pos.line++;
+ goto Dispatch;
+ } else if (b == '*') {
+ int level = 1;
+ b = getbyte(lx);
+ while (b != EOF && level > 0) {
+ if (b == '/') {
+ b = getbyte(lx);
+ if (b == '*')
+ level++;
+ } else if (b == '*') {
+ b = getbyte(lx);
+ if (b == '/')
+ level--;
+ }
+ if (b == '\n') lx->pos.line++;
+ b = getbyte(lx);
+ }
+ goto Dispatch;
+ } else
+ ungetbyte(lx);
+ break;
+
+ case '#':
+ if (domacro(lx)) {
+ tok.kind = Anil;
+ errorat(lx->pos, "failed to perform preprocessor directive");
+ return tok;
+ }
+ goto GetByte;
+ break;
+
+ case EOF:
+ panicf("need to implement popio");
+
+ CASE1('(', Alparen)
+ CASE1(')', Arparen)
+ CASE1('{', Albrace)
+ CASE1('}', Arbrace)
+ CASE1('[', Albrakt)
+ CASE1(']', Arbrakt)
+ CASE1(',', Acomma)
+ CASE1('?', Aqmark)
+ CASE1(';', Asemi)
+ CASE1('~', Aneg)
+ CASE1(':', Acolon)
+ CASE2('^', Axor, '=', Axorasn)
+ CASE2('!', Anot, '=', Aneq)
+ CASE2('*', Astar,'=', Amulasn)
+ CASE2('=', Aasn, '=', Aeq)
+ CASE2('%', Amod, '=', Amodasn)
+ CASE3('+', Aadd, '=', Aaddasn, '+', Ainc)
+ CASE3('&', Aand, '=', Aandasn, '&', Aandand)
+ CASE3('|', Aor, '=', Aorasn, '|', Aoror)
+ CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow)
+
+ default:
+ tok.kind = Anil;
+ errorat(lx->pos, "invalid token, crashing");
+ abort();
}
-}
+
+ goto Return;
+
+ TNum:
+ e = lx->buf + arrlen(lx->buf);
+ do {
+ if (lx->b >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ *lx->b++ = b;
+ } while (b = getbyte(lx), isdigit(b) || b == '_');
+
+ if (b == '.' || tolower(b) == 'e')
+ goto TFlt;
+ TInt:
+ r = b;
+ n = 10;
+ s = lx->buf;
+ if (*s == '0') {
+ b = *++s;
+ switch (b) {
+ case 'x': n = 16; break;
+ case 'b': n = 2; break;
+ case 'o': n = 8; break;
+ default: --s;
+ }
+ if (s >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ }
+
+ v = 0;
+ for (; s != lx->b ; s++) {
+ b = *s;
+ if (b == '_') continue;
+
+ f = Atoi[b];
+ if (f == 0 && b != '0')
+ break;
+
+ if (f >= n) {
+ errorat(lx->pos, "digit '%c' out of range for base %d", b, n);
+ f = 0;
+ }
+
+ if (v > (UINT64_MAX - f) / n) {
+ errorat(lx->pos, "integer literal overflow");
+ v = 0;
+ break;
+ }
+
+ v = v * n + f;
+ }
+ b = r;
+ tok.kind = Alit | Vint;
+ tok.val.i = v;
+ /* TODO: Suffixes!
+ if (tolower(b) == 'u') {
+ tok.kind |= Vusgn;
+ b = getbyte(lx);
+ }
+ */
+ goto Return;
+
+ TFlt:
+ if (b == '.') {
+ *lx->b++ = b;
+ b = getbyte(lx);
+ }
+
+ while (isdigit(b)) {
+ *lx->b++ = b;
+
+ if (lx->b >= e) {
+ errorat(lx->pos, "number overflows lexer buffer");
+ goto Nospace;
+ }
+ }
+
+ if (tolower(b) == 'e') {
+ b = getbyte(lx);
+ if (b == '-' || b == '+')
+ b = getbyte(lx);
+
+ if (!isdigit(b))
+ errorat(lx->pos, "expected number after exponent, found %c", b);
+
+ do {
+ *lx->b++ = b;
+ } while (b = getbyte(lx), isdigit(b));
+ }
+ *lx->b = '\0';
+ d = strtod(lx->buf, nil);
+
+ tok.kind = Alit | Vfloat;
+ tok.val.f = d;
+
+ goto Return;
+
+ TAlpha:
+ u = b;
+ s = lx->buf;
+ e = lx->buf + arrlen(lx->buf);
+ for (;;) {
+ if (s >= e) {
+ errorat(lx->pos, "identifier too long for buffer: %s", s);
+ goto Nospace;
+ }
+ if (u >= RuneSelf) {
+ ungetbyte(lx);
+ r = getrune(lx);
+ if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) {
+ errorat(lx->pos, "invalid identifier character %d", r);
+ }
+ s += utf8·runetochar(s, &r);
+ } else if (!isalnum(u) && u != '_')
+ break;
+ else
+ *s++ = u;
+ u = getbyte(lx);
+ }
+ *s = '\0';
+ tok.kind = Aident;
+ tok.val.s = lx->buf;
+
+ n = intern(&tok.val.s);
+ if (n < arrlen(keywords)) {
+ tok.kind = Akeywd;
+ }
+
+ sym = lookup(lx->sym, tok.val.s);
+ if (sym) {
+ io = makeio();
+ io->buf.end += expandmacro(lx, sym, io->b);
+ pushio(lx, io);
+ goto GetByte;
+ }
+
+Return:
+ lx->b = lx->buf;
+ tok.pos.end = lx->pos;
+ return tok;
+
+Nospace:
+ panicf("aborting compilation");
+}
+
+#undef CASE4
+#undef CASE3
+#undef CASE2
+#undef CASE1
+
+// -----------------------------------------------------------------------
+// push/pop io objects
+
+void
+pushio(Lexer *lx, Io *new)
+{
+ new->link = lx->io;
+ lx->io->store = lx->pos;
+ lx->io = new;
+
+ lx->pos = (Pos){
+ .line = 0,
+ .col = 0,
+ .path = new->path,
+ };
+}
+
+void
+popio(Lexer *lx)
+{
+ Io *prev;
+
+ prev = lx->io->link;
+ if (!prev) {
+ panicf("no buffer left");
+ }
+
+ lx->pos = prev->store;
+ lx->io = prev;
+}
+
+// -----------------------------------------------------------------------
+// symbol tables
+
+#define PTR_HASH(p) (uintptr)(p)
+#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
+
+struct SymTab
+{
+ MAP_STRUCT_BODY(string, Sym*);
+};
+
+Sym*
+lookup(SymTab *tab, string ident)
+{
+ int idx;
+ MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
+
+ if (idx < tab->n_buckets)
+ return tab->vals[idx];
+
+ return nil;
+}
+
+static
+int
+moresymtab(SymTab *tab, int n)
+{
+ MAP_GROW(tab, string, Sym*, n, PTR_HASH, mem·sys.alloc, mem·sys.free, nil);
+}
+
+static
+int
+putsym(SymTab *tab, Sym *sym, error *err)
+{
+ MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
+}
+
+Sym*
+define(SymTab *tab, string name, int kind)
+{
+ Sym *sym;
+ error err;
+
+ sym = mem·arenaalloc(C.heap, 1, sizeof(*sym));
+ sym->name = name;
+ sym->kind = kind;
+
+ putsym(tab, sym, &err);
+
+ return sym;
+}
+
+// -----------------------------------------------------------------------
+// error reporting
+
+void
+errorat(Pos x, byte *fmt, ...)
+{
+ va_list args;
+ va_start(args, fmt);
+
+ printf("error %d: ", x.line);
+
+ vprintf(fmt, args);
+ va_end(args);
+}
+
diff --git a/sys/cmd/cc/rules.mk b/sys/cmd/cc/rules.mk
index fe30305..b32d5b6 100644
--- a/sys/cmd/cc/rules.mk
+++ b/sys/cmd/cc/rules.mk
@@ -3,13 +3,13 @@ include share/push.mk
# Local sources
SRCS_$(d) := \
+ $(d)/pp.c \
$(d)/lex.c \
- $(d)/sym.c \
$(d)/cc.c
LIBS_$(d) :=
BINS_$(d) := $(d)/cc
-TSTS_$(d) :=
+UNTS_$(d) :=
include share/paths.mk
diff --git a/sys/cmd/cc/sym.c b/sys/cmd/cc/sym.c
deleted file mode 100644
index ef40bce..0000000
--- a/sys/cmd/cc/sym.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "cc.h"
-
-#include <libn/macro/map.h>
-
-#define PTR_HASH(p) (uintptr)(p)
-#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
-
-#if 0
-struct SymTab
-{
- MAP_STRUCT_BODY(string, Sym*);
-};
-
-Sym*
-getsym(SymTab *tab, string key)
-{
- MAP_GET(tab, key, PTR_HASH, PTR_EQUAL, nil);
-}
-
-static
-int
-moresymtab(SymTab *tab, int n)
-{
- MAP_GROW(tab, string, Sym*, n, PTR_HASH);
-}
-
-int
-putsym(SymTab *tab, Sym *sym, error *err)
-{
- MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
-}
-#endif
diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c
index f9be2d2..da94ef2 100644
--- a/sys/libbio/io/newick.c
+++ b/sys/libbio/io/newick.c
@@ -236,7 +236,7 @@ parse(struct Parser *p)
errorf("incorrect format: unmatched comment bracket '['");
goto ERROR;
}
- str·append(node->comment, tokstr(tok));
+ str·append(&node->comment, tokstr(tok));
}
break;
diff --git a/sys/libn/memory.c b/sys/libn/memory.c
index 4cf92b2..dce0c36 100644
--- a/sys/libn/memory.c
+++ b/sys/libn/memory.c
@@ -164,3 +164,8 @@ memset64(void *dst, uint64 val, uintptr size)
((byte*)dst)[i] = ((byte*)&val)[i&7];
}
}
+
+// -------------------------------------------------------------------------
+// First argument
+
+char *argv0;
diff --git a/sys/libn/string.c b/sys/libn/string.c
index 694cdea..fb92a04 100644
--- a/sys/libn/string.c
+++ b/sys/libn/string.c
@@ -150,6 +150,34 @@ utf8·findrune(byte* s, long c)
return nil;
}
+byte*
+utf8·findrrune(byte* s, long c)
+{
+ long c1;
+ rune r;
+ byte *l;
+
+ if (c < RuneSync)
+ return strrchr(s, c);
+
+ l = nil;
+ for (;;) {
+ c1 = *(ubyte*)s;
+ if (c1 < RuneSelf) {
+ if (c1 == 0) return l;
+ if (c1 == c) l = s;
+ s++;
+ continue;
+ }
+ c1 = utf8·chartorune(&r, s);
+ if (r == c)
+ l = s;
+ s += c1;
+ }
+
+ return nil;
+}
+
#undef Bit
#undef Tbyte
#undef RuneX
@@ -319,7 +347,7 @@ str·fit(string *s)
// string to our buffer. The result is reallocated if not enough room is present
// in the buffer.
void
-str·appendcount(string *s, vlong n, const byte* b)
+str·appendlen(string *s, vlong n, const byte* b)
{
vlong bl = strlen(b);
if (n > bl) panicf("attempted to make a substring longer than string");
@@ -339,7 +367,7 @@ str·appendcount(string *s, vlong n, const byte* b)
void
str·append(string *s, const byte* b)
{
- return str·appendcount(s, strlen(b), b);
+ return str·appendlen(s, strlen(b), b);
}
// AppendByte will append the given byte to our string.
@@ -358,24 +386,11 @@ str·appendbyte(string *s, const byte b)
*s[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..?
}
-// Equals returns true if string s and t are equivalent.
-bool
-str·equals(const string s, const string t)
-{
- vlong sL = str·len(s);
- vlong tL = str·len(t);
- if (sL != tL) return false;
-
- return memcmp(s, t, sL) == 0;
-}
-
-//------------------------------------------------------------------------
-// Utility Methods
-
/*
* Appendf will append the given formatted string to our buffer.
* Returns the newly minted string
*/
+
void
str·appendf(string *s, const byte* fmt, ...)
{
@@ -399,6 +414,31 @@ str·appendf(string *s, const byte* fmt, ...)
h->len += n;
}
+// Equals returns true if string s and t are equivalent.
+bool
+str·equals(const string s, const string t)
+{
+ vlong sL = str·len(s);
+ vlong tL = str·len(t);
+ if (sL != tL) return false;
+
+ return memcmp(s, t, sL) == 0;
+}
+
+//------------------------------------------------------------------------
+// Utility Methods
+
+int
+str·read(string s, int size, int n, void *buf)
+{
+ int len;
+
+ len = MIN(n * size, str·len(s));
+ memcpy(buf, s, len);
+
+ return len;
+}
+
// Find will find the first occurence of
// substr in the string Returns -1 if nothing was found.
int
@@ -502,7 +542,7 @@ str·join(vlong len, byte** fields, const byte* sep)
for (j = 0; j < len; j++) {
str·append(&s, fields[j]);
if (j < len - 1)
- str·appendcount(&s, 1, sep);
+ str·appendlen(&s, 1, sep);
}
return s;