From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/cmd/cc/lex.c | 873 ------------------------------------------------------- 1 file changed, 873 deletions(-) delete mode 100644 sys/cmd/cc/lex.c (limited to 'sys/cmd/cc/lex.c') diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c deleted file mode 100644 index 33fc5d0..0000000 --- a/sys/cmd/cc/lex.c +++ /dev/null @@ -1,873 +0,0 @@ -#include "cc.h" -#include - -// ----------------------------------------------------------------------- -// printing functions - -void -puttok(Token tok) -{ - if (tok.kind < Alit) - printf("%s", tokens[tok.kind]); - else if (tok.kind & Alit) { - if (tok.kind & Vchar) - if (tok.kind & Vint) - if (tok.kind & Vlong) - if (tok.kind & Vvlong) - printf("literal <%lld>", tok.val.i); - if (tok.kind & Vfloat) - printf("literal <%f>", tok.val.f); - printf("literal <%s>", tok.val.s); - } else - printf("ident <%s>", tok.val.s); -} - -// ----------------------------------------------------------------------- -// io buffer management - -#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x} - -// path should be absolute -Io* -openio(Lexer *lx, byte *path) -{ - string *it, *end; - - intern(&path); - - // See if we have already opened file; - // If so, and it hasn't been flagged return it - for (it = lx->omit.path, end = it + lx->omit.len; it < end; ++it) { - if ((uintptr)(*it) == (uintptr)(path)) - return nil; - } - - // TODO: See if we have already loaded the file - - if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1) - panicf("out of I/O space!"); - - lx->new->f = io·open(path, "r"); - if (!lx->new->f) - panicf("file %s not found", path); - - lx->new->kind = IOfile; - lx->new->path = path; - bufio·initreader(&lx->new->rdr, asrdr(io·read), lx->new->f); - - return lx->new++; -} - -static -Io* -makeio(Lexer *lx, byte *name) -{ - if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1) - panicf("out of I/O space!"); - - lx->new->path = name; - lx->new->rdr = (io·Buffer) { - .state = bufio·rdr | bufio·end, - .runesize = 0, - .h = nil, - .size = bufio·size, - .beg = lx->new->rdr.buf + bufio·ungets, - .pos = lx->new->rdr.buf + bufio·ungets, - .end = lx->new->rdr.buf + bufio·ungets, - }; - lx->new->b = lx->new->rdr.beg; - - return lx->new++; -} -#undef asrdr - -static -void -freeio(Lexer *lx, Io *io) -{ - if (io->kind & IOfile) { - io·close(io->f); - } - - io->rdr.state = 0; - io->kind = 0; - io->link = nil; - io->path = nil; - io->store = (Pos){ 0 }; - io->path = ""; -} - -void -pushio(Lexer *lx, Io *new) -{ - new->link = lx->io; - lx->io->store = lx->pos; - lx->io = new; - - lx->pos = (Pos){ - .line = 1, - .col = 1, - .path = new->path, - }; -} - -void -popio(Lexer *lx) -{ - Io *prev; - - assert(lx->io == lx->new-1); - --lx->new; - - prev = lx->io->link; - freeio(lx, lx->io); - - lx->io = prev; - if (!prev) { - return; - } - - lx->pos = prev->store; -} - -// ----------------------------------------------------------------------- -// simple wrappers - -int -getbyte(Lexer *lx) -{ - return bufio·getbyte(&lx->io->rdr); -} - -int -getnsbyte(Lexer *lx) -{ - int b; - b = getbyte(lx); - for (;;) { - if (b == EOF) { - if (lx->io->link) { - popio(lx); - assert(lx->io); - b = getbyte(lx); - continue; - } else - return b; - } - if (b >= RuneSelf || !isspace(b)) - return b; - if (b == '\n') - return b; - b = getbyte(lx); - } - return b; -} - -rune -getrune(Lexer *lx) -{ - return bufio·getrune(&lx->io->rdr); -} - -byte -ungetbyte(Lexer *lx) -{ - byte b; - return bufio·ungetbyte(&lx->io->rdr, b); -} - -rune -ungetrune(Lexer *l, rune r) -{ - return bufio·ungetrune(&l->io->rdr, r); -} - -// ----------------------------------------------------------------------- -// main lexer - -#define TOK(a, b) b, -byte *tokens[NUM_TOKENS] = { TOKENS }; -#undef TOK - -static uint8 Atoi[256] = -{ - ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5, - ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10, - ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13, - ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15, -}; - -static -error -escapechar(Lexer *lx, int x, int islong, int esc, vlong *val) -{ - int i, u, c; - vlong l; - - c = getrune(lx); - - switch (c) { - case '\\': - break; - case EOF: - errorat(lx->pos, "EOF in string"); - return 1; - case '\n': - errorat(lx->pos, "newline in string"); - return 1; - default: - if (c == x) - return 1; - *val = c; - return 0; - } - - u = 0; - c = getrune(lx); - - switch(c) { - case 'x': - i = islong ? 4 : 2; - goto hex; - - case 'u': - i = islong ? 8 : 4; - u = 1; - goto hex; - - case 'U': - i = 8; - u = 1; - goto hex; - - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - i = islong ? 4 : 2; - goto oct; - - case 'a': c = '\a'; break; - case 'b': c = '\b'; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - case 'v': c = '\v'; break; - case '\\':c = '\\'; break; - - default: - if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c); - } - *val = c; - return 0; - -hex: - l = 0; - for(; i > 0; i--) { - c = getbyte(lx); - if (c >= '0' && c <= '9') { - l = l*16 + c-'0'; - continue; - } - if (c >= 'a' && c <= 'f') { - l = l*16 + c-'a' + 10; - continue; - } - if (c >= 'A' && c <= 'F') { - l = l*16 + c-'A' + 10; - continue; - } - ungetbyte(lx); - break; - } - if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) { - errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l); - l = RuneErr; - } - *val = l; - if (esc) - *val |= RuneMask + 1; - return 0; - -oct: - l = c - '0'; - for (; i > 0; i--) { - c = getbyte(lx); - if (c >= '0' && c <= '7') { - l = l*8 + c-'0'; - continue; - } - ungetbyte(lx); - break; - } - if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l); - - *val = l; - if (esc) - *val |= RuneMask + 1; - return 0; -} - -#define CASE1(stmt1, kind1) \ - case stmt1: \ - tok.kind = kind1; \ - goto Return - -#define CASE2(stmt1, kind1, b1, kind2) \ - case stmt1: \ - tok.kind = kind1; \ - b = getbyte(lx); \ - if (b == b1) \ - tok.kind = kind2; \ - else \ - ungetbyte(lx); \ - goto Return - -#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \ - case stmt1: \ - tok.kind = kind1; \ - b = getbyte(lx); \ - if (b == b1) \ - tok.kind = kind2; \ - else if (b == b2) \ - tok.kind = kind3; \ - else \ - ungetbyte(lx); \ - goto Return - -#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \ - case stmt1: \ - tok.kind = kind1; \ - b = getbyte(lx); \ - if (b == b1) \ - tok.kind = kind2; \ - else if (b == b2) \ - tok.kind = kind3; \ - else if (b == b3) \ - tok.kind = type4; \ - else \ - ungetbyte(lx); \ - goto Return - - -Token -lex(Lexer *lx) -{ - int b, n, f; - vlong v, _; - rune r; - string s; - double d; - byte *e; - Token tok; - Sym *sym; - Io *io; - -GetByte: - b = getbyte(lx); -Dispatch: - tok.pos = lx->pos; - - if ((b != EOF && b >= RuneSelf) || b == '_') - goto Talpha; - if (isalpha(b)) { - if (b != 'L') - goto Talpha; - - n = b; - b = getbyte(lx); - if (b == '\'') { - if (escapechar(lx, '\'', 1, 0, &v)) - b = '\''; - if (!escapechar(lx, '\'', 1, 0, &_)) { - errorat(lx->pos, "missing ' at end of character constant"); - } - tok.kind = Alit | Vrune; - tok.val.r = v; - goto Return; - } - if (b == '"') - goto TLstr; - ungetbyte(lx); - b = n; - - goto Talpha; - } - if (isdigit(b)) - goto Tnum; - - switch (b) { - case '\n': - lx->pos.line++; - case ' ': case '\r': case '\t': case '\v': case '\f': - while (b = getbyte(lx), isspace(b)) - if (b == '\n') - lx->pos.line++; - goto Dispatch; - - case '\\': - b = getbyte(lx); - if (b != '\n') - errorat(lx->pos, "'\\' without a trailing newline"); - goto GetByte; - - Tchar: - case '\'': - if (escapechar(lx, '\'', 0, 0, &v)) { - errorat(lx->pos, "empty literal or escaped ' in char literal"); - v = '\''; - } - if (!escapechar(lx, '\'', 0, 0, &_)) { - errorat(lx->pos, "missing '"); - ungetbyte(lx); - } - - if (v > 0xff) { - errorat(lx->pos, "overflowed character literal"); - v = 0; - } - tok.kind = Alit | Vchar; - tok.val.c = v; - goto Return; - - case '"': - s = str·makecap("", 0, 8); - for (;;) { - if (escapechar(lx, '"', 0, 1, &v)) - break; - - if (v & (RuneMask + 1)) - str·appendbyte(&s, v); - else { - r = v; - b = utf8·runelen(r); - utf8·runetobyte(lx->buf, &r); - str·appendlen(&s, b, lx->buf); - } - } - tok.kind = Alit | Vstr; - tok.val.s = s; - intern(&tok.val.s); - - str·free(s); - goto Return; - - TLstr: - s = str·makecap("", 0, 8); - // NOTE: this violates strict aliasing - for (;;) { - if (escapechar(lx, '"', 1, 0, &v)) - break; - str·appendlen(&s, sizeof(wchar_t), (byte*)&v); - } - tok.kind = Alit | Vwstr; - tok.val.s = s; - intern(&tok.val.s); - - str·free(s); - goto Return; - - case '.': - tok.kind = Adot; - b = getbyte(lx); - - if (isdigit(b)) { - // *lx->b++ = b; - goto Tflt; - } else if (b == '.') { - b = getbyte(lx); - if (b != '.') { - errorat(lx->pos, "invalid token '..'"); - tok.kind = Aellip; - break; - } - } - ungetbyte(lx); - goto Return; - - case '<': - tok.kind = Alt; - b = getbyte(lx); - - if (b == '<') { - tok.kind = Alsft; - b = getbyte(lx); - if (b == '=') - tok.kind = Alsftasn; - else - ungetbyte(lx); - } else if (b == '=') - tok.kind = Alteq; - else - ungetbyte(lx); - goto Return; - - case '>': - tok.kind = Agt; - b = getbyte(lx); - - if (b == '>') { - tok.kind = Arsft; - b = getbyte(lx); - if (b == '=') - tok.kind = Arsftasn; - else - ungetbyte(lx); - } else if (b == '=') - tok.kind = Agteq; - else - ungetbyte(lx); - goto Return; - - case '/': - tok.kind = Adiv; - b = getbyte(lx); - - if (b == '=') - tok.kind = Adivasn; - else if (b == '/') { - while (b != EOF && b != '\n') - b = getbyte(lx); - goto Dispatch; - } else if (b == '*') { - int level = 1; - b = getbyte(lx); - while (b != EOF && level > 0) { - if (b == '/') { - b = getbyte(lx); - if (b == '*') - level++; - } else if (b == '*') { - b = getbyte(lx); - if (b == '/') - level--; - } - if (b == '\n') - lx->pos.line++; - b = getbyte(lx); - } - goto Dispatch; - } else - ungetbyte(lx); - goto Return; - - case '#': - if (domacro(lx)) { - tok.kind = Anil; - errorat(lx->pos, "failed to perform preprocessor directive"); - return tok; - } - goto GetByte; - - case EOF: - popio(lx); - if (lx->io) - goto GetByte; - tok.kind = Aeof; - goto Return; - - CASE1('(', Alparen); - CASE1(')', Arparen); - CASE1('{', Albrace); - CASE1('}', Arbrace); - CASE1('[', Albrakt); - CASE1(']', Arbrakt); - CASE1(',', Acomma); - CASE1('?', Aqmark); - CASE1(';', Asemi); - CASE1('~', Aneg); - CASE1(':', Acolon); - CASE2('^', Axor, '=', Axorasn); - CASE2('!', Anot, '=', Aneq); - CASE2('*', Astar,'=', Amulasn); - CASE2('=', Aasn, '=', Aeq); - CASE2('%', Amod, '=', Amodasn); - CASE3('+', Aadd, '=', Aaddasn, '+', Ainc); - CASE3('&', Aand, '=', Aandasn, '&', Aandand); - CASE3('|', Aor, '=', Aorasn, '|', Aoror); - CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow); - - Tnum: - e = lx->buf + arrlen(lx->buf); - do { - if (lx->b >= e) { - errorat(lx->pos, "number overflows lexer buffer"); - goto Nospace; - } - *lx->b++ = b; - } while (b = getbyte(lx), isdigit(b) || b == '_'); - - if (b == '.' || tolower(b) == 'e') - goto Tflt; - Tint: - n = 10; - s = lx->buf; - if (*s == '0') { - switch (b) { - case 'x': n = 16; break; - case 'b': n = 2; break; - case 'o': n = 8; break; - default: goto Rint; - } - lx->b = s; - /* reparse number, now with base info */ - while (b = getbyte(lx), (isdigit(b) || - ('a' <= b && b <= 'f') || - ('A' <= b && b <= 'F') || - b == '_')) - *lx->b++ = b; - } - Rint: - v = 0; - r = b; - for (; s != lx->b ; s++) { - b = *s; - if (b == '_') continue; - - f = Atoi[b]; - if (f == 0 && b != '0') - break; - - if (f >= n) { - errorat(lx->pos, "digit '%c' out of range for base %d", b, n); - f = 0; - } - - if (v > (UINT64_MAX - f) / n) { - errorat(lx->pos, "integer literal overflow"); - v = 0; - break; - } - - v = v * n + f; - } - - b = r; - tok.kind = Alit; - tok.val.i = v; - - if (b == 'u' || b == 'U') { - tok.kind |= Vun; - b = getbyte(lx); - } - if (b == 'l' || b == 'L') { - r = getbyte(lx); - if (r == 'l' || r == 'L') { - if (r != b) - errorat(lx->pos, "mismatched case on long long integer suffix"); - tok.kind |= Vvlong; - r = getbyte(lx); - } else - tok.kind |= Vlong; - - if (r == 'u' || r == 'U') { - if (tok.kind & Vun) - errorat(lx->pos, "multiple unsigned designators on integer suffix"); - tok.kind |= Vun; - goto Return; - } - - ungetbyte(lx); - goto Return; - } - - tok.kind |= Vint; - ungetbyte(lx); - goto Return; - - Tflt: - if (b == '.') { - *lx->b++ = b; - b = getbyte(lx); - } - - while (isdigit(b)) { - *lx->b++ = b; - - if (lx->b >= e) { - errorat(lx->pos, "number overflows lexer buffer"); - goto Nospace; - } - } - - if (tolower(b) == 'e') { - b = getbyte(lx); - if (b == '-' || b == '+') - b = getbyte(lx); - - if (!isdigit(b)) - errorat(lx->pos, "expected number after exponent, found %c", b); - - do { - *lx->b++ = b; - } while (b = getbyte(lx), isdigit(b)); - } - *lx->b = '\0'; - d = strtod(lx->buf, nil); - ungetbyte(lx); - - tok.kind = Alit | Vfloat; - tok.val.f = d; - - goto Return; - - Talpha: - s = lx->buf; - e = lx->buf + arrlen(lx->buf); - for (;;) { - if (s >= e) { - errorat(lx->pos, "identifier too long for buffer: %s", s); - goto Nospace; - } - if (b != EOF && b >= RuneSelf) { - ungetbyte(lx); - r = getrune(lx); - if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) { - errorat(lx->pos, "invalid identifier character %d", r); - } - s += utf8·runetobyte(s, &r); - } else if (!isalnum(b) && b != '_') - break; - else - *s++ = b; - b = getbyte(lx); - } - *s = '\0'; - ungetbyte(lx); - - tok.kind = Aident; - tok.val.s = lx->buf; - - n = intern(&tok.val.s); - if (n < arrlen(keywords)) { - tok.kind = Akeywd; - tok.val.i = n; - goto Return; - } - - sym = lookup(&lx->sym, tok.val.s); - if (sym && ((uintptr)sym->name != (uintptr)lx->io->path)) { - if ((uintptr)sym == lx->macline) { - tok.kind = Alit | Vint; - tok.val.i = lx->pos.line; - goto Return; - } - if ((uintptr)sym == lx->macfile) { - tok.kind = Alit | Vstr; - tok.val.s = lx->pos.path; - goto Return; - } - io = makeio(lx, sym->name); - io->rdr.end += expandmacro(lx, sym, io->b); - printf("EXPANDED %s: %s\n", sym->name, io->rdr.beg); - *io->rdr.end++ = EOF; - pushio(lx, io); - goto GetByte; - } - goto Return; - - default: - tok.kind = Anil; - errorat(lx->pos, "invalid token, crashing"); - abort(); - } - -Return: - lx->b = lx->buf; - return tok; - -Nospace: - panicf("aborting compilation"); - exit(1); -} - -#undef CASE4 -#undef CASE3 -#undef CASE2 -#undef CASE1 - -// ----------------------------------------------------------------------- -// symbol tables - -#define PTR_HASH(p) (uintptr)(p) -#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2)) - -static -void -·free(void* _, void* ptr) { - return free(ptr); -} - -static -void * -·alloc(void* _, uint n, ulong size) { - return malloc(n*size); -} - -static -void * -·calloc(void* _, uint n, ulong size) { - return calloc(n, size); -} - -static -int -moresymtab(SymTab *tab, int n) -{ - MAP_GROW(tab, string, Sym*, n, PTR_HASH, sys·Memory, nil); -} - -static -int -putsym(SymTab *tab, Sym *sym, error *err) -{ - MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err); -} - -Sym* -define(SymTab *tab, string name, uint32 kind) -{ - int i; - Sym *sym; - error err; - - sym = mem·arenaalloc(C.heap, 1, sizeof(*sym)); - sym->name = name; - sym->kind = kind; - - i = putsym(tab, sym, &err); - tab->vals[i] = sym; - - return sym; -} - -Sym* -lookup(SymTab *tab, string ident) -{ - int idx; - MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL); - - if (idx < tab->n_buckets) - return tab->vals[idx]; - - return nil; -} - - -error -forget(SymTab *tab, string ident) -{ - int idx; - MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL); - - if (idx < tab->n_buckets) { - MAP_DEL(tab, idx); - return 0; - } - return 1; -} - -void -forgetall(SymTab *tab) -{ - MAP_RESET(tab); -} -- cgit v1.2.1