From 9ec5bed6a7d715ffa69851569485a685dd69db2e Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sun, 17 May 2020 16:19:17 -0700 Subject: prototype of front end cli --- sys/cmd/cc/cc.c | 237 +++++++++++++++++- sys/cmd/cc/cc.h | 189 ++++++++++---- sys/cmd/cc/lex.c | 649 +++++++++++++++++++++++++++++++++++++++++++++++-- sys/cmd/cc/rules.mk | 4 +- sys/cmd/cc/sym.c | 32 --- sys/libbio/io/newick.c | 2 +- sys/libn/memory.c | 5 + sys/libn/string.c | 74 ++++-- 8 files changed, 1060 insertions(+), 132 deletions(-) delete mode 100644 sys/cmd/cc/sym.c (limited to 'sys') diff --git a/sys/cmd/cc/cc.c b/sys/cmd/cc/cc.c index cddea01..39ad5f2 100644 --- a/sys/cmd/cc/cc.c +++ b/sys/cmd/cc/cc.c @@ -1,6 +1,9 @@ #include "cc.h" #include +// ----------------------------------------------------------------------- +// string interning + /* jenkins' one at a time hash */ static int32 @@ -24,8 +27,22 @@ hash_string(byte* s) return h; } +static +int +streq(byte *s, byte *t) +{ + if (s == nil) { + if (t == nil) + return 1; + else + return 0; + } + + return (t == nil) ? 0 : strcmp(s, t) == 0; +} + #define HASH(s) hash_string(s) -#define EQUAL(s, t) (strcmp(s, t) == 0) +#define EQUAL(s, t) (streq(s, t)) static int getstr(string key, int *ok) @@ -74,20 +91,226 @@ END: return C.strs.vals[i]; } +// ----------------------------------------------------------------------- +// io buffer management + +#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x} +// path should be absolute +Io* +openio(byte *path) +{ + Io *it; + Stream *f; + + intern(&path); + + // See if we have already opened file; + // If so, and it hasn't been flagged return it + for (it = C.iostk; it != C.io + 1; ++it) { + if ((uintptr)it->path == (uintptr)path) { + if (it->kind & IOonce) { + return nil; + } + return it; + } + } + + if ((C.io - C.iostk) >= arrlen(C.iostk)-1) + panicf("out of I/O space!"); + + C.io->f = io·open(path, "r"); + C.io->path = path; + bufio·initreader(&C.io->buf, asrdr(io·read), C.io->f); + + return C.io++; +} + +Io* +makeio() +{ + if ((C.io - C.iostk) >= arrlen(C.iostk)-1) + panicf("out of I/O space!"); + + C.io->path = ""; + C.io->buf = (io·Buffer) { + .state = bufio·rdr | bufio·end, + .runesize = 0, + .h = nil, + .size = bufio·size, + .beg = C.io->buf.buf + bufio·ungets, + .pos = C.io->buf.buf + bufio·ungets, + .end = C.io->buf.buf + bufio·ungets, + }; + C.io->b = C.io->buf.buf; + + return C.io++; +} +#undef asrdr + +// TODO: Think about if this is always at the _end_ of the stack. +// Right now we don't have access to it. +void +freeio(Io *io) +{ + if (io->kind & ~IOmac) { + free(io->b); + } else { + io·close(io->f); + } + io->link = nil; + io->path = nil; + io->store = (Pos){ 0 }; +} + +// ----------------------------------------------------------------------- +// universal compiler builtins + +#define KEYWORD(a, b) b, +byte *keywords[NUM_KEYWORDS] = { KEYWORDS }; +#undef KEYWORD + +#define DIRECTIVE(a, b, c) b, +byte *directives[NUM_DIRECTIVES] = { DIRECTIVES }; +#undef DIRECTIVE + +struct Compiler C = { 0 }; + +// ----------------------------------------------------------------------- +// flag handlers + +void +pushinclude(byte *dirs) +{ + string d, s, *it, *end; + + while (*dirs != 0) { + d = strchr(dirs, ' '); + if (d != nil) + *d = '\0'; + + s = d; + intern(&s); + for (it = C.inc.dir, end = it + C.inc.len; it != end; ++it) { + if ((uintptr)s == (uintptr)(*it)) + goto Nextdir; + } + + if (C.inc.len == C.inc.cap) { + C.inc.cap += 20; + C.inc.dir = realloc(C.inc.dir, C.inc.cap*sizeof(*C.inc.dir)); + C.inc.dir[C.inc.len++] = s; + } + +Nextdir: + if (d == nil) + break; + dirs = d + 1; + } + +} + +// ----------------------------------------------------------------------- +// main point of entry + void -init() +init(void) { - int i, n; + int i; for (i = 0; i < arrlen(keywords); i++) { intern(&keywords[i]); - printf("keyword %d: %s", i, keywords[i]); } + + for (i = 0; i < arrlen(directives); i++) { + intern(&directives[i]); + } + + C.heap = mem·makearena(mem·sys, nil); + + C.inc.len = 0; + C.inc.cap = 100; + C.inc.dir = calloc(C.inc.cap, sizeof(*C.inc.dir)); + C.inc.dir[C.inc.len++] = "."; + + C.outfile = nil; + C.lxr = (Lexer){ 0 }; } -int -main() +error +compile(byte *path) +{ + Io *io; + Token tok; + byte *p, file[400]; + + strcpy(file, path); + p = utf8·findrrune(file, '/'); + if (p) + *p++ = '\0'; + else + p = file; + + if (!C.outfile) { + C.outfile = p; + if (C.outfile) { + if ((p = utf8·findrrune(C.outfile, '.'))) { + p[0] = '.'; + p[1] = 'o'; + p[2] = '\0'; + } + } else { + C.outfile = "/dev/null"; + } + } + + C.lxr.io = openio(file); + while (tok = lex(&C.lxr), tok.kind > Aeof) { + ; + } + freeio(C.lxr.io); + + return tok.kind != Anil; +} + +error +main(int argc, byte *argv[]) { + byte *a, *src; + int err; + init(); - return 0; + + ARGBEGIN { + case 'o': + C.outfile = ARGF(); + break; + + case 'D': + a = ARGF(); + if (a) { + intern(&a); + dodefine(&C.lxr, a); + } + break; + + case 'I': + a = ARGF(); + if (a) + pushinclude(a); + break; + } ARGEND + + if (argc < 1 && C.outfile == nil) { + printf("usage: cc [-options] files\n"); + exit(1); + } + + src = (argc == 0) ? "" : argv[0]; + intern(&src); + + if ((err = compile(src)), err) { + exit(2); + } + + exit(0); } diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 3228890..5488f3c 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -8,8 +8,11 @@ /* core types */ typedef struct Io Io; typedef struct Pos Pos; +typedef struct Range Range; typedef struct Token Token; +typedef struct Lexer Lexer; + typedef struct Sym Sym; typedef struct Type Type; @@ -22,33 +25,9 @@ typedef struct Expr Expr; typedef struct SymTab SymTab; typedef struct StrTab StrTab; -// ----------------------------------------------------------------------- -// lexing: byte stream -> tokens -// pre-processor built in - -struct Pos -{ - int col; - int line; - string path; -}; - -#define DIRECTIVES \ - DIRECTIVE(Dpragma,"pragma") \ - DIRECTIVE(Dinclude,"include") \ - DIRECTIVE(Dif,"if") \ - DIRECTIVE(Ddefine,"define") \ - DIRECTIVE(Difdef,"ifdef") \ - DIRECTIVE(Difndef,"ifndef") - -#define DIRECTIVE(a, b) a, -enum { DIRECTIVES }; -#undef DIRECTIVE - -#define DIRECTIVE(a, b) b, -static byte *directives[] = { DIRECTIVES }; -#undef DIRECTIVE +typedef struct Compiler Compiler; +/* keywords of language */ #define KEYWORDS \ KEYWORD(Kauto,"auto") \ KEYWORD(Kregister,"register") \ @@ -88,15 +67,57 @@ static byte *directives[] = { DIRECTIVES }; KEYWORD(Kalignof,"alignof") #define KEYWORD(a, b) a, -enum { KEYWORDS }; +enum { KEYWORDS NUM_KEYWORDS }; #undef KEYWORD -#define KEYWORD(a, b) b, -static byte *keywords[] = { KEYWORDS }; -#undef KEYWORD +extern byte *keywords[NUM_KEYWORDS]; + +// ----------------------------------------------------------------------- +// lexing: byte stream -> tokens +// pre-processor built in + +/* source position: error reporting */ +struct Pos +{ + int col; + int line; + string path; +}; + + +struct Range +{ + Pos beg; + Pos end; +}; + +void errorat(Pos x, byte *fmt, ...); + +/* pre-processor */ +#define DIRECTIVES \ + DIRECTIVE(Dpragma,"pragma", ppprag) \ + DIRECTIVE(Dinclude,"include", ppinc) \ + DIRECTIVE(Ddefine,"define", ppdef) \ + DIRECTIVE(Dundef,"undef", ppund) \ + DIRECTIVE(Dif,"if", ppif0) \ + DIRECTIVE(Delse, "else", ppif1) \ + DIRECTIVE(Difdef,"ifdef", ppif2) \ + DIRECTIVE(Difndef,"ifndef", ppif3) \ + DIRECTIVE(Dendif,"endif", ppend) + +#define DIRECTIVE(a, b, c) a, +enum { DIRECTIVES NUM_DIRECTIVES }; +#undef DIRECTIVE + +extern byte *directives[NUM_DIRECTIVES]; -#undef KEYWORDS +error domacro(Lexer*); +error dodefine(Lexer *lx, string s); +int expandmacro(Lexer *lx, Sym *s, byte *dst); +extern error (*macros[NUM_DIRECTIVES])(Lexer*); + +/* tokenization of byte stream */ #define TOKENS \ TOK(Anil,"nil") \ TOK(Aeof,"eof") \ @@ -140,8 +161,8 @@ static byte *keywords[] = { KEYWORDS }; TOK(Arparen,")") \ TOK(Albrace,"{") \ TOK(Arbrace,"}") \ - TOK(Albrkt,"[") \ - TOK(Arbrkt,"]") \ + TOK(Albrakt,"[") \ + TOK(Arbrakt,"]") \ TOK(Adot,".") \ TOK(Aarrow,"->") \ TOK(Aqmark,"?") \ @@ -154,47 +175,76 @@ static byte *keywords[] = { KEYWORDS }; enum { TOKENS + NUM_TOKENS, + + Vchar = iota(8), + Vint = iota(9), + Vlong = iota(10), + Vvlong = iota(11), + Vusgn = iota(12), + Vfloat = iota(13), + Vstr = iota(14), }; #undef TOK -#define TOK(a, b) b, -static byte *tokens[] = { TOKENS }; -#undef TOK -#undef TOKENS +extern byte *tokens[NUM_TOKENS]; /* TODO: store literals in a big val */ struct Token { uint32 kind; - struct Pos pos; + Range pos; union { - string str; + byte *s; double f; vlong i; - }; + uvlong ui; + byte c; + ubyte uc; + } val; }; enum { - Svar, - Sfunc, - Smacro, + Svar = 1 << 0, + Sfunc = 1 << 1, + Smacro = 1 << 2, }; struct Sym { uint32 kind; string name; + union { + string macro; + /*Func *func;*/ + }; }; +Sym *lookup(SymTab *tab, string ident); +Sym *define(SymTab *tab, string ident, int kind); + struct Lexer { - Token tok; + Pos pos; Io *io; SymTab *sym; - byte buf[1024]; + byte *b; + byte buf[2*1024]; }; +/* lex.c functions */ +Token lex(Lexer *); + +byte getbyte(Lexer *); +byte getnsbyte(Lexer *l); +rune getrune(Lexer *); +byte ungetbyte(Lexer *); +rune ungetrune(Lexer *, rune r); + +void pushio(Lexer *lx, Io *new); +void popio(Lexer *lx); + // ----------------------------------------------------------------------- // parsing & type resolution // tokens -> ast @@ -294,35 +344,66 @@ struct Decl // ----------------------------------------------------------------------- // compiler +enum +{ + IOnil = iota(0), + IOonce = iota(1), + IOmac = iota(2), +}; + struct Io { - io·Buffer b; + io·Buffer buf; string path; - uint32 flag; + uint32 kind; + union { + Stream *f; + byte *b; + }; + + Pos store; struct Io *link; }; +Io* openio(byte *path); +Io* makeio(); +void freeio(Io *io); + struct StrTab { - int32 n_buckets, size, n_occupied, upper_bound; + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; int32 *flags; string *keys; int32 *vals; }; -static struct +int32 intern(byte **str); +string internview(byte* beg, byte *end); + +/* main data */ +struct Compiler { mem·Arena *heap; StrTab strs; - string *include; + struct { + int cap; + int len; + string *dir; + } inc; + Io *io; Io iostk[100]; -} C; -void init(); + string outfile; -int32 intern(byte **str); -string internview(byte* beg, byte *end); + Lexer lxr; +}; +extern Compiler C; + +void init(); #undef iota diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c index af3bbf3..6b85d8c 100644 --- a/sys/cmd/cc/lex.c +++ b/sys/cmd/cc/lex.c @@ -1,38 +1,649 @@ #include "cc.h" -static -void -errorat(Pos x, byte *fmt, ...) +#include + +// ----------------------------------------------------------------------- +// simple wrappers + +byte +getbyte(Lexer *l) { - va_list args; - va_start(args, fmt); - printf("error %d:", x.line); - vprintf(fmt, args); - va_end(args); + return bufio·getbyte(&l->io->buf); +} + +byte +getnsbyte(Lexer *l) +{ + byte b; + while (b = bufio·getbyte(&l->io->buf), isspace(b)); + return b; +} + +rune +getrune(Lexer *l) +{ + return bufio·getrune(&l->io->buf); } -static byte -getbyte(struct Lexer *lex) +ungetbyte(Lexer *lx) { - return bufio·getbyte(&lex->buf); + byte b; + return bufio·ungetbyte(&lx->io->buf, b); } +rune +ungetrune(Lexer *l, rune r) +{ + return bufio·ungetrune(&l->io->buf, r); +} + +// ----------------------------------------------------------------------- +// main lexer + +#define TOK(a, b) b, +byte *tokens[NUM_TOKENS] = { TOKENS }; +#undef TOK + +static uint8 Atoi[256] = +{ + ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5, + ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10, + ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13, + ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15, +}; + static error -ungetbyte(struct Lexer *lex, byte b) +escape(Lexer *lx, int x, int *flag, vlong *val) { - return bufio·ungetbyte(&lex->buf, b); + int i, u, c; + vlong l; + + c = getrune(lx); + + switch (c) { + case EOF: + errorat(lx->pos, "EOF in string"); + return 1; + case '\n': + errorat(lx->pos, "newline in string"); + return 1; + case '\\': + break; + default: + if (c == x) + return 1; + *val = c; + return 0; + } + + u = 0; + c = getrune(lx); + + switch(c) { + case 'x': + i = 2; + *flag = 1; + goto hex; + + case 'u': + i = 4; + u = 1; + goto hex; + + case 'U': + i = 8; + u = 1; + goto hex; + + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + *flag = 1; + goto oct; + + case 'a': c = '\a'; break; + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'v': c = '\v'; break; + case '\\': c = '\\'; break; + + default: + if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c); + } + *val = c; + return 0; + +hex: + l = 0; + for(; i > 0; i--) { + c = getbyte(lx); + if (c >= '0' && c <= '9') { + l = l*16 + c-'0'; + continue; + } + if (c >= 'a' && c <= 'f') { + l = l*16 + c-'a' + 10; + continue; + } + if (c >= 'A' && c <= 'F') { + l = l*16 + c-'A' + 10; + continue; + } + errorat(lx->pos, "non-hex character in escape sequence: %c", c); + ungetbyte(lx); + break; + } + if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) { + errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l); + l = RuneErr; + } + *val = l; + return 0; + +oct: + l = c - '0'; + for (i = 2; i > 0; i--) { + c = getbyte(lx); + if (c >= '0' && c <= '7') { + l = l*8 + c-'0'; + continue; + } + errorat(lx->pos, "non-octal character in escape sequence: %c", c); + ungetbyte(lx); + } + if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l); + + *val = l; + return 0; } -void -lex(struct Lexer *lex) +#define CASE1(stmt1, kind1) \ + case stmt1: \ + tok.kind = kind1; \ + break; + +#define CASE2(stmt1, kind1, b1, kind2) \ + case stmt1: \ + tok.kind = kind1; \ + b = getbyte(lx); \ + if (b == b1) \ + tok.kind = kind2; \ + else \ + ungetbyte(lx); \ + break; + +#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \ + case stmt1: \ + tok.kind = kind1; \ + b = getbyte(lx); \ + if (b == b1) \ + tok.kind = kind2; \ + else if (b == b2) \ + tok.kind = kind3; \ + else \ + ungetbyte(lx); \ + break; + +#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \ + case stmt1: \ + tok.kind = kind1; \ + b = getbyte(lx); \ + if (b == b1) \ + tok.kind = kind2; \ + else if (b == b2) \ + tok.kind = kind3; \ + else if (b == b3) \ + tok.kind = type4; \ + else \ + ungetbyte(lx); \ + break; + + +Token +lex(Lexer *lx) { - int b; + int b, n, f; + vlong v; + uint u; + rune r; + string s; + double d; + byte *e; + Token tok; + Sym *sym; + Io *io; + +GetByte: + b = getbyte(lx); +Dispatch: + tok.pos.beg = lx->pos; + + if (b >= RuneSelf || isalpha(b)) + goto TAlpha; + if (isdigit(b)) + goto TNum; - b = getbyte(lex); -TOP: switch (b) { + case ' ': case '\n': case '\r': case '\t': case '\v': case '\f': + while (b = getbyte(lx), isspace(b)) + if (b == '\n') lx->pos.line++; + goto Dispatch; + + case '\'': + if (escape(lx, '\'', &f, &v)) { + errorat(lx->pos, "empty literal or escaped ' in char literal"); + v = '\''; + } + if (!escape(lx, '\'', &f, &v)) { + errorat(lx->pos, "missing '"); + ungetbyte(lx); + } + + if (v > 0xff) { + errorat(lx->pos, "overflowed character literal"); + v = 0; + } + tok.kind = Alit | Vchar; + tok.val.c = v; + break; + + case '"': + s = str·makecap("", 0, 8); + for (;;) { + if (escape(lx, '"', &f, &v)) + break; + + if (v < RuneSelf || f) + str·appendbyte(&s, v); + else { + r = v; + b = utf8·runelen(r); + utf8·runetochar(lx->buf, &r); + str·appendlen(&s, b, lx->buf); + } + } + tok.kind = Alit | Vstr; + tok.val.s = s; + intern(&tok.val.s); + + str·free(s); + break; + case '.': + tok.kind = Adot; + b = getbyte(lx); + + if (isdigit(b)) { + // *lx->b++ = b; + goto TFlt; + } else if (b == '.') { + b = getbyte(lx); + if (b != '.') { + errorat(lx->pos, "invalid token '..'"); + tok.kind = Aellip; + break; + } + } + ungetbyte(lx); + break; + + case '<': + tok.kind = Alt; + b = getbyte(lx); + + if (b == '<') { + tok.kind = Alsft; + b = getbyte(lx); + if (b == '=') + tok.kind = Alsftasn; + else + ungetbyte(lx); + } else if (b == '=') + tok.kind = Alteq; + else + ungetbyte(lx); + break; + + case '>': + tok.kind = Agt; + b = getbyte(lx); + + if (b == '>') { + tok.kind = Arsft; + b = getbyte(lx); + if (b == '=') + tok.kind = Arsftasn; + else + ungetbyte(lx); + } else if (b == '=') + tok.kind = Agteq; + else + ungetbyte(lx); + break; + + case '/': + tok.kind = Adiv; + b = getbyte(lx); + + if (b == '=') + tok.kind = Adivasn; + else if (b == '/') { + while (b != EOF && b != '\n') + b = getbyte(lx); + lx->pos.line++; + goto Dispatch; + } else if (b == '*') { + int level = 1; + b = getbyte(lx); + while (b != EOF && level > 0) { + if (b == '/') { + b = getbyte(lx); + if (b == '*') + level++; + } else if (b == '*') { + b = getbyte(lx); + if (b == '/') + level--; + } + if (b == '\n') lx->pos.line++; + b = getbyte(lx); + } + goto Dispatch; + } else + ungetbyte(lx); + break; + + case '#': + if (domacro(lx)) { + tok.kind = Anil; + errorat(lx->pos, "failed to perform preprocessor directive"); + return tok; + } + goto GetByte; + break; + + case EOF: + panicf("need to implement popio"); + + CASE1('(', Alparen) + CASE1(')', Arparen) + CASE1('{', Albrace) + CASE1('}', Arbrace) + CASE1('[', Albrakt) + CASE1(']', Arbrakt) + CASE1(',', Acomma) + CASE1('?', Aqmark) + CASE1(';', Asemi) + CASE1('~', Aneg) + CASE1(':', Acolon) + CASE2('^', Axor, '=', Axorasn) + CASE2('!', Anot, '=', Aneq) + CASE2('*', Astar,'=', Amulasn) + CASE2('=', Aasn, '=', Aeq) + CASE2('%', Amod, '=', Amodasn) + CASE3('+', Aadd, '=', Aaddasn, '+', Ainc) + CASE3('&', Aand, '=', Aandasn, '&', Aandand) + CASE3('|', Aor, '=', Aorasn, '|', Aoror) + CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow) + + default: + tok.kind = Anil; + errorat(lx->pos, "invalid token, crashing"); + abort(); } -} + + goto Return; + + TNum: + e = lx->buf + arrlen(lx->buf); + do { + if (lx->b >= e) { + errorat(lx->pos, "number overflows lexer buffer"); + goto Nospace; + } + *lx->b++ = b; + } while (b = getbyte(lx), isdigit(b) || b == '_'); + + if (b == '.' || tolower(b) == 'e') + goto TFlt; + TInt: + r = b; + n = 10; + s = lx->buf; + if (*s == '0') { + b = *++s; + switch (b) { + case 'x': n = 16; break; + case 'b': n = 2; break; + case 'o': n = 8; break; + default: --s; + } + if (s >= e) { + errorat(lx->pos, "number overflows lexer buffer"); + goto Nospace; + } + } + + v = 0; + for (; s != lx->b ; s++) { + b = *s; + if (b == '_') continue; + + f = Atoi[b]; + if (f == 0 && b != '0') + break; + + if (f >= n) { + errorat(lx->pos, "digit '%c' out of range for base %d", b, n); + f = 0; + } + + if (v > (UINT64_MAX - f) / n) { + errorat(lx->pos, "integer literal overflow"); + v = 0; + break; + } + + v = v * n + f; + } + b = r; + tok.kind = Alit | Vint; + tok.val.i = v; + /* TODO: Suffixes! + if (tolower(b) == 'u') { + tok.kind |= Vusgn; + b = getbyte(lx); + } + */ + goto Return; + + TFlt: + if (b == '.') { + *lx->b++ = b; + b = getbyte(lx); + } + + while (isdigit(b)) { + *lx->b++ = b; + + if (lx->b >= e) { + errorat(lx->pos, "number overflows lexer buffer"); + goto Nospace; + } + } + + if (tolower(b) == 'e') { + b = getbyte(lx); + if (b == '-' || b == '+') + b = getbyte(lx); + + if (!isdigit(b)) + errorat(lx->pos, "expected number after exponent, found %c", b); + + do { + *lx->b++ = b; + } while (b = getbyte(lx), isdigit(b)); + } + *lx->b = '\0'; + d = strtod(lx->buf, nil); + + tok.kind = Alit | Vfloat; + tok.val.f = d; + + goto Return; + + TAlpha: + u = b; + s = lx->buf; + e = lx->buf + arrlen(lx->buf); + for (;;) { + if (s >= e) { + errorat(lx->pos, "identifier too long for buffer: %s", s); + goto Nospace; + } + if (u >= RuneSelf) { + ungetbyte(lx); + r = getrune(lx); + if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) { + errorat(lx->pos, "invalid identifier character %d", r); + } + s += utf8·runetochar(s, &r); + } else if (!isalnum(u) && u != '_') + break; + else + *s++ = u; + u = getbyte(lx); + } + *s = '\0'; + tok.kind = Aident; + tok.val.s = lx->buf; + + n = intern(&tok.val.s); + if (n < arrlen(keywords)) { + tok.kind = Akeywd; + } + + sym = lookup(lx->sym, tok.val.s); + if (sym) { + io = makeio(); + io->buf.end += expandmacro(lx, sym, io->b); + pushio(lx, io); + goto GetByte; + } + +Return: + lx->b = lx->buf; + tok.pos.end = lx->pos; + return tok; + +Nospace: + panicf("aborting compilation"); +} + +#undef CASE4 +#undef CASE3 +#undef CASE2 +#undef CASE1 + +// ----------------------------------------------------------------------- +// push/pop io objects + +void +pushio(Lexer *lx, Io *new) +{ + new->link = lx->io; + lx->io->store = lx->pos; + lx->io = new; + + lx->pos = (Pos){ + .line = 0, + .col = 0, + .path = new->path, + }; +} + +void +popio(Lexer *lx) +{ + Io *prev; + + prev = lx->io->link; + if (!prev) { + panicf("no buffer left"); + } + + lx->pos = prev->store; + lx->io = prev; +} + +// ----------------------------------------------------------------------- +// symbol tables + +#define PTR_HASH(p) (uintptr)(p) +#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2)) + +struct SymTab +{ + MAP_STRUCT_BODY(string, Sym*); +}; + +Sym* +lookup(SymTab *tab, string ident) +{ + int idx; + MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL); + + if (idx < tab->n_buckets) + return tab->vals[idx]; + + return nil; +} + +static +int +moresymtab(SymTab *tab, int n) +{ + MAP_GROW(tab, string, Sym*, n, PTR_HASH, mem·sys.alloc, mem·sys.free, nil); +} + +static +int +putsym(SymTab *tab, Sym *sym, error *err) +{ + MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err); +} + +Sym* +define(SymTab *tab, string name, int kind) +{ + Sym *sym; + error err; + + sym = mem·arenaalloc(C.heap, 1, sizeof(*sym)); + sym->name = name; + sym->kind = kind; + + putsym(tab, sym, &err); + + return sym; +} + +// ----------------------------------------------------------------------- +// error reporting + +void +errorat(Pos x, byte *fmt, ...) +{ + va_list args; + va_start(args, fmt); + + printf("error %d: ", x.line); + + vprintf(fmt, args); + va_end(args); +} + diff --git a/sys/cmd/cc/rules.mk b/sys/cmd/cc/rules.mk index fe30305..b32d5b6 100644 --- a/sys/cmd/cc/rules.mk +++ b/sys/cmd/cc/rules.mk @@ -3,13 +3,13 @@ include share/push.mk # Local sources SRCS_$(d) := \ + $(d)/pp.c \ $(d)/lex.c \ - $(d)/sym.c \ $(d)/cc.c LIBS_$(d) := BINS_$(d) := $(d)/cc -TSTS_$(d) := +UNTS_$(d) := include share/paths.mk diff --git a/sys/cmd/cc/sym.c b/sys/cmd/cc/sym.c deleted file mode 100644 index ef40bce..0000000 --- a/sys/cmd/cc/sym.c +++ /dev/null @@ -1,32 +0,0 @@ -#include "cc.h" - -#include - -#define PTR_HASH(p) (uintptr)(p) -#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2)) - -#if 0 -struct SymTab -{ - MAP_STRUCT_BODY(string, Sym*); -}; - -Sym* -getsym(SymTab *tab, string key) -{ - MAP_GET(tab, key, PTR_HASH, PTR_EQUAL, nil); -} - -static -int -moresymtab(SymTab *tab, int n) -{ - MAP_GROW(tab, string, Sym*, n, PTR_HASH); -} - -int -putsym(SymTab *tab, Sym *sym, error *err) -{ - MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err); -} -#endif diff --git a/sys/libbio/io/newick.c b/sys/libbio/io/newick.c index f9be2d2..da94ef2 100644 --- a/sys/libbio/io/newick.c +++ b/sys/libbio/io/newick.c @@ -236,7 +236,7 @@ parse(struct Parser *p) errorf("incorrect format: unmatched comment bracket '['"); goto ERROR; } - str·append(node->comment, tokstr(tok)); + str·append(&node->comment, tokstr(tok)); } break; diff --git a/sys/libn/memory.c b/sys/libn/memory.c index 4cf92b2..dce0c36 100644 --- a/sys/libn/memory.c +++ b/sys/libn/memory.c @@ -164,3 +164,8 @@ memset64(void *dst, uint64 val, uintptr size) ((byte*)dst)[i] = ((byte*)&val)[i&7]; } } + +// ------------------------------------------------------------------------- +// First argument + +char *argv0; diff --git a/sys/libn/string.c b/sys/libn/string.c index 694cdea..fb92a04 100644 --- a/sys/libn/string.c +++ b/sys/libn/string.c @@ -150,6 +150,34 @@ utf8·findrune(byte* s, long c) return nil; } +byte* +utf8·findrrune(byte* s, long c) +{ + long c1; + rune r; + byte *l; + + if (c < RuneSync) + return strrchr(s, c); + + l = nil; + for (;;) { + c1 = *(ubyte*)s; + if (c1 < RuneSelf) { + if (c1 == 0) return l; + if (c1 == c) l = s; + s++; + continue; + } + c1 = utf8·chartorune(&r, s); + if (r == c) + l = s; + s += c1; + } + + return nil; +} + #undef Bit #undef Tbyte #undef RuneX @@ -319,7 +347,7 @@ str·fit(string *s) // string to our buffer. The result is reallocated if not enough room is present // in the buffer. void -str·appendcount(string *s, vlong n, const byte* b) +str·appendlen(string *s, vlong n, const byte* b) { vlong bl = strlen(b); if (n > bl) panicf("attempted to make a substring longer than string"); @@ -339,7 +367,7 @@ str·appendcount(string *s, vlong n, const byte* b) void str·append(string *s, const byte* b) { - return str·appendcount(s, strlen(b), b); + return str·appendlen(s, strlen(b), b); } // AppendByte will append the given byte to our string. @@ -358,24 +386,11 @@ str·appendbyte(string *s, const byte b) *s[h->len] = '\0'; // NOTE: I don't think an explicit zero is required..? } -// Equals returns true if string s and t are equivalent. -bool -str·equals(const string s, const string t) -{ - vlong sL = str·len(s); - vlong tL = str·len(t); - if (sL != tL) return false; - - return memcmp(s, t, sL) == 0; -} - -//------------------------------------------------------------------------ -// Utility Methods - /* * Appendf will append the given formatted string to our buffer. * Returns the newly minted string */ + void str·appendf(string *s, const byte* fmt, ...) { @@ -399,6 +414,31 @@ str·appendf(string *s, const byte* fmt, ...) h->len += n; } +// Equals returns true if string s and t are equivalent. +bool +str·equals(const string s, const string t) +{ + vlong sL = str·len(s); + vlong tL = str·len(t); + if (sL != tL) return false; + + return memcmp(s, t, sL) == 0; +} + +//------------------------------------------------------------------------ +// Utility Methods + +int +str·read(string s, int size, int n, void *buf) +{ + int len; + + len = MIN(n * size, str·len(s)); + memcpy(buf, s, len); + + return len; +} + // Find will find the first occurence of // substr in the string Returns -1 if nothing was found. int @@ -502,7 +542,7 @@ str·join(vlong len, byte** fields, const byte* sep) for (j = 0; j < len; j++) { str·append(&s, fields[j]); if (j < len - 1) - str·appendcount(&s, 1, sep); + str·appendlen(&s, 1, sep); } return s; -- cgit v1.2.1