aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/cc/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cmd/cc/lex.c')
-rw-r--r--sys/cmd/cc/lex.c873
1 files changed, 0 insertions, 873 deletions
diff --git a/sys/cmd/cc/lex.c b/sys/cmd/cc/lex.c
deleted file mode 100644
index 33fc5d0..0000000
--- a/sys/cmd/cc/lex.c
+++ /dev/null
@@ -1,873 +0,0 @@
-#include "cc.h"
-#include <libn/macro/map.h>
-
-// -----------------------------------------------------------------------
-// printing functions
-
-void
-puttok(Token tok)
-{
- if (tok.kind < Alit)
- printf("%s", tokens[tok.kind]);
- else if (tok.kind & Alit) {
- if (tok.kind & Vchar)
- if (tok.kind & Vint)
- if (tok.kind & Vlong)
- if (tok.kind & Vvlong)
- printf("literal <%lld>", tok.val.i);
- if (tok.kind & Vfloat)
- printf("literal <%f>", tok.val.f);
- printf("literal <%s>", tok.val.s);
- } else
- printf("ident <%s>", tok.val.s);
-}
-
-// -----------------------------------------------------------------------
-// io buffer management
-
-#define asrdr(x) (io·Reader){(int (*)(void *, int, int, void *))x}
-
-// path should be absolute
-Io*
-openio(Lexer *lx, byte *path)
-{
- string *it, *end;
-
- intern(&path);
-
- // See if we have already opened file;
- // If so, and it hasn't been flagged return it
- for (it = lx->omit.path, end = it + lx->omit.len; it < end; ++it) {
- if ((uintptr)(*it) == (uintptr)(path))
- return nil;
- }
-
- // TODO: See if we have already loaded the file
-
- if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1)
- panicf("out of I/O space!");
-
- lx->new->f = io·open(path, "r");
- if (!lx->new->f)
- panicf("file %s not found", path);
-
- lx->new->kind = IOfile;
- lx->new->path = path;
- bufio·initreader(&lx->new->rdr, asrdr(io·read), lx->new->f);
-
- return lx->new++;
-}
-
-static
-Io*
-makeio(Lexer *lx, byte *name)
-{
- if ((lx->new - lx->iostk) >= arrlen(lx->iostk)-1)
- panicf("out of I/O space!");
-
- lx->new->path = name;
- lx->new->rdr = (io·Buffer) {
- .state = bufio·rdr | bufio·end,
- .runesize = 0,
- .h = nil,
- .size = bufio·size,
- .beg = lx->new->rdr.buf + bufio·ungets,
- .pos = lx->new->rdr.buf + bufio·ungets,
- .end = lx->new->rdr.buf + bufio·ungets,
- };
- lx->new->b = lx->new->rdr.beg;
-
- return lx->new++;
-}
-#undef asrdr
-
-static
-void
-freeio(Lexer *lx, Io *io)
-{
- if (io->kind & IOfile) {
- io·close(io->f);
- }
-
- io->rdr.state = 0;
- io->kind = 0;
- io->link = nil;
- io->path = nil;
- io->store = (Pos){ 0 };
- io->path = "<empty>";
-}
-
-void
-pushio(Lexer *lx, Io *new)
-{
- new->link = lx->io;
- lx->io->store = lx->pos;
- lx->io = new;
-
- lx->pos = (Pos){
- .line = 1,
- .col = 1,
- .path = new->path,
- };
-}
-
-void
-popio(Lexer *lx)
-{
- Io *prev;
-
- assert(lx->io == lx->new-1);
- --lx->new;
-
- prev = lx->io->link;
- freeio(lx, lx->io);
-
- lx->io = prev;
- if (!prev) {
- return;
- }
-
- lx->pos = prev->store;
-}
-
-// -----------------------------------------------------------------------
-// simple wrappers
-
-int
-getbyte(Lexer *lx)
-{
- return bufio·getbyte(&lx->io->rdr);
-}
-
-int
-getnsbyte(Lexer *lx)
-{
- int b;
- b = getbyte(lx);
- for (;;) {
- if (b == EOF) {
- if (lx->io->link) {
- popio(lx);
- assert(lx->io);
- b = getbyte(lx);
- continue;
- } else
- return b;
- }
- if (b >= RuneSelf || !isspace(b))
- return b;
- if (b == '\n')
- return b;
- b = getbyte(lx);
- }
- return b;
-}
-
-rune
-getrune(Lexer *lx)
-{
- return bufio·getrune(&lx->io->rdr);
-}
-
-byte
-ungetbyte(Lexer *lx)
-{
- byte b;
- return bufio·ungetbyte(&lx->io->rdr, b);
-}
-
-rune
-ungetrune(Lexer *l, rune r)
-{
- return bufio·ungetrune(&l->io->rdr, r);
-}
-
-// -----------------------------------------------------------------------
-// main lexer
-
-#define TOK(a, b) b,
-byte *tokens[NUM_TOKENS] = { TOKENS };
-#undef TOK
-
-static uint8 Atoi[256] =
-{
- ['0'] = 0, ['1'] = 1, ['2'] = 2, ['3'] = 3, ['4'] = 4, ['5'] = 5,
- ['6'] = 6, ['7'] = 7, ['8'] = 8, ['9'] = 9, ['a'] = 10, ['A'] = 10,
- ['b'] = 11, ['B'] = 11, ['c'] = 12, ['C'] = 12, ['d'] = 13, ['D'] = 13,
- ['e'] = 14, ['E'] = 14, ['f'] = 15, ['F'] = 15,
-};
-
-static
-error
-escapechar(Lexer *lx, int x, int islong, int esc, vlong *val)
-{
- int i, u, c;
- vlong l;
-
- c = getrune(lx);
-
- switch (c) {
- case '\\':
- break;
- case EOF:
- errorat(lx->pos, "EOF in string");
- return 1;
- case '\n':
- errorat(lx->pos, "newline in string");
- return 1;
- default:
- if (c == x)
- return 1;
- *val = c;
- return 0;
- }
-
- u = 0;
- c = getrune(lx);
-
- switch(c) {
- case 'x':
- i = islong ? 4 : 2;
- goto hex;
-
- case 'u':
- i = islong ? 8 : 4;
- u = 1;
- goto hex;
-
- case 'U':
- i = 8;
- u = 1;
- goto hex;
-
- case '0': case '1': case '2': case '3':
- case '4': case '5': case '6': case '7':
- i = islong ? 4 : 2;
- goto oct;
-
- case 'a': c = '\a'; break;
- case 'b': c = '\b'; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'v': c = '\v'; break;
- case '\\':c = '\\'; break;
-
- default:
- if(c != x) errorat(lx->pos, "unknown escape sequence: %c", c);
- }
- *val = c;
- return 0;
-
-hex:
- l = 0;
- for(; i > 0; i--) {
- c = getbyte(lx);
- if (c >= '0' && c <= '9') {
- l = l*16 + c-'0';
- continue;
- }
- if (c >= 'a' && c <= 'f') {
- l = l*16 + c-'a' + 10;
- continue;
- }
- if (c >= 'A' && c <= 'F') {
- l = l*16 + c-'A' + 10;
- continue;
- }
- ungetbyte(lx);
- break;
- }
- if (u && (l > RuneMax || (0xd800 <= l && l < 0xe000))) {
- errorat(lx->pos, "invalid unicode code point in escape sequence: %#llx", l);
- l = RuneErr;
- }
- *val = l;
- if (esc)
- *val |= RuneMask + 1;
- return 0;
-
-oct:
- l = c - '0';
- for (; i > 0; i--) {
- c = getbyte(lx);
- if (c >= '0' && c <= '7') {
- l = l*8 + c-'0';
- continue;
- }
- ungetbyte(lx);
- break;
- }
- if (l > 255) errorat(lx->pos, "octal escape value > 255: %d", l);
-
- *val = l;
- if (esc)
- *val |= RuneMask + 1;
- return 0;
-}
-
-#define CASE1(stmt1, kind1) \
- case stmt1: \
- tok.kind = kind1; \
- goto Return
-
-#define CASE2(stmt1, kind1, b1, kind2) \
- case stmt1: \
- tok.kind = kind1; \
- b = getbyte(lx); \
- if (b == b1) \
- tok.kind = kind2; \
- else \
- ungetbyte(lx); \
- goto Return
-
-#define CASE3(stmt1, kind1, b1, kind2, b2, kind3) \
- case stmt1: \
- tok.kind = kind1; \
- b = getbyte(lx); \
- if (b == b1) \
- tok.kind = kind2; \
- else if (b == b2) \
- tok.kind = kind3; \
- else \
- ungetbyte(lx); \
- goto Return
-
-#define CASE4(stmt1, kind1, b1, kind2, b2, kind3, b3, type4) \
- case stmt1: \
- tok.kind = kind1; \
- b = getbyte(lx); \
- if (b == b1) \
- tok.kind = kind2; \
- else if (b == b2) \
- tok.kind = kind3; \
- else if (b == b3) \
- tok.kind = type4; \
- else \
- ungetbyte(lx); \
- goto Return
-
-
-Token
-lex(Lexer *lx)
-{
- int b, n, f;
- vlong v, _;
- rune r;
- string s;
- double d;
- byte *e;
- Token tok;
- Sym *sym;
- Io *io;
-
-GetByte:
- b = getbyte(lx);
-Dispatch:
- tok.pos = lx->pos;
-
- if ((b != EOF && b >= RuneSelf) || b == '_')
- goto Talpha;
- if (isalpha(b)) {
- if (b != 'L')
- goto Talpha;
-
- n = b;
- b = getbyte(lx);
- if (b == '\'') {
- if (escapechar(lx, '\'', 1, 0, &v))
- b = '\'';
- if (!escapechar(lx, '\'', 1, 0, &_)) {
- errorat(lx->pos, "missing ' at end of character constant");
- }
- tok.kind = Alit | Vrune;
- tok.val.r = v;
- goto Return;
- }
- if (b == '"')
- goto TLstr;
- ungetbyte(lx);
- b = n;
-
- goto Talpha;
- }
- if (isdigit(b))
- goto Tnum;
-
- switch (b) {
- case '\n':
- lx->pos.line++;
- case ' ': case '\r': case '\t': case '\v': case '\f':
- while (b = getbyte(lx), isspace(b))
- if (b == '\n')
- lx->pos.line++;
- goto Dispatch;
-
- case '\\':
- b = getbyte(lx);
- if (b != '\n')
- errorat(lx->pos, "'\\' without a trailing newline");
- goto GetByte;
-
- Tchar:
- case '\'':
- if (escapechar(lx, '\'', 0, 0, &v)) {
- errorat(lx->pos, "empty literal or escaped ' in char literal");
- v = '\'';
- }
- if (!escapechar(lx, '\'', 0, 0, &_)) {
- errorat(lx->pos, "missing '");
- ungetbyte(lx);
- }
-
- if (v > 0xff) {
- errorat(lx->pos, "overflowed character literal");
- v = 0;
- }
- tok.kind = Alit | Vchar;
- tok.val.c = v;
- goto Return;
-
- case '"':
- s = str·makecap("", 0, 8);
- for (;;) {
- if (escapechar(lx, '"', 0, 1, &v))
- break;
-
- if (v & (RuneMask + 1))
- str·appendbyte(&s, v);
- else {
- r = v;
- b = utf8·runelen(r);
- utf8·runetobyte(lx->buf, &r);
- str·appendlen(&s, b, lx->buf);
- }
- }
- tok.kind = Alit | Vstr;
- tok.val.s = s;
- intern(&tok.val.s);
-
- str·free(s);
- goto Return;
-
- TLstr:
- s = str·makecap("", 0, 8);
- // NOTE: this violates strict aliasing
- for (;;) {
- if (escapechar(lx, '"', 1, 0, &v))
- break;
- str·appendlen(&s, sizeof(wchar_t), (byte*)&v);
- }
- tok.kind = Alit | Vwstr;
- tok.val.s = s;
- intern(&tok.val.s);
-
- str·free(s);
- goto Return;
-
- case '.':
- tok.kind = Adot;
- b = getbyte(lx);
-
- if (isdigit(b)) {
- // *lx->b++ = b;
- goto Tflt;
- } else if (b == '.') {
- b = getbyte(lx);
- if (b != '.') {
- errorat(lx->pos, "invalid token '..'");
- tok.kind = Aellip;
- break;
- }
- }
- ungetbyte(lx);
- goto Return;
-
- case '<':
- tok.kind = Alt;
- b = getbyte(lx);
-
- if (b == '<') {
- tok.kind = Alsft;
- b = getbyte(lx);
- if (b == '=')
- tok.kind = Alsftasn;
- else
- ungetbyte(lx);
- } else if (b == '=')
- tok.kind = Alteq;
- else
- ungetbyte(lx);
- goto Return;
-
- case '>':
- tok.kind = Agt;
- b = getbyte(lx);
-
- if (b == '>') {
- tok.kind = Arsft;
- b = getbyte(lx);
- if (b == '=')
- tok.kind = Arsftasn;
- else
- ungetbyte(lx);
- } else if (b == '=')
- tok.kind = Agteq;
- else
- ungetbyte(lx);
- goto Return;
-
- case '/':
- tok.kind = Adiv;
- b = getbyte(lx);
-
- if (b == '=')
- tok.kind = Adivasn;
- else if (b == '/') {
- while (b != EOF && b != '\n')
- b = getbyte(lx);
- goto Dispatch;
- } else if (b == '*') {
- int level = 1;
- b = getbyte(lx);
- while (b != EOF && level > 0) {
- if (b == '/') {
- b = getbyte(lx);
- if (b == '*')
- level++;
- } else if (b == '*') {
- b = getbyte(lx);
- if (b == '/')
- level--;
- }
- if (b == '\n')
- lx->pos.line++;
- b = getbyte(lx);
- }
- goto Dispatch;
- } else
- ungetbyte(lx);
- goto Return;
-
- case '#':
- if (domacro(lx)) {
- tok.kind = Anil;
- errorat(lx->pos, "failed to perform preprocessor directive");
- return tok;
- }
- goto GetByte;
-
- case EOF:
- popio(lx);
- if (lx->io)
- goto GetByte;
- tok.kind = Aeof;
- goto Return;
-
- CASE1('(', Alparen);
- CASE1(')', Arparen);
- CASE1('{', Albrace);
- CASE1('}', Arbrace);
- CASE1('[', Albrakt);
- CASE1(']', Arbrakt);
- CASE1(',', Acomma);
- CASE1('?', Aqmark);
- CASE1(';', Asemi);
- CASE1('~', Aneg);
- CASE1(':', Acolon);
- CASE2('^', Axor, '=', Axorasn);
- CASE2('!', Anot, '=', Aneq);
- CASE2('*', Astar,'=', Amulasn);
- CASE2('=', Aasn, '=', Aeq);
- CASE2('%', Amod, '=', Amodasn);
- CASE3('+', Aadd, '=', Aaddasn, '+', Ainc);
- CASE3('&', Aand, '=', Aandasn, '&', Aandand);
- CASE3('|', Aor, '=', Aorasn, '|', Aoror);
- CASE4('-', Asub, '=', Asubasn, '-', Adec, '>', Aarrow);
-
- Tnum:
- e = lx->buf + arrlen(lx->buf);
- do {
- if (lx->b >= e) {
- errorat(lx->pos, "number overflows lexer buffer");
- goto Nospace;
- }
- *lx->b++ = b;
- } while (b = getbyte(lx), isdigit(b) || b == '_');
-
- if (b == '.' || tolower(b) == 'e')
- goto Tflt;
- Tint:
- n = 10;
- s = lx->buf;
- if (*s == '0') {
- switch (b) {
- case 'x': n = 16; break;
- case 'b': n = 2; break;
- case 'o': n = 8; break;
- default: goto Rint;
- }
- lx->b = s;
- /* reparse number, now with base info */
- while (b = getbyte(lx), (isdigit(b) ||
- ('a' <= b && b <= 'f') ||
- ('A' <= b && b <= 'F') ||
- b == '_'))
- *lx->b++ = b;
- }
- Rint:
- v = 0;
- r = b;
- for (; s != lx->b ; s++) {
- b = *s;
- if (b == '_') continue;
-
- f = Atoi[b];
- if (f == 0 && b != '0')
- break;
-
- if (f >= n) {
- errorat(lx->pos, "digit '%c' out of range for base %d", b, n);
- f = 0;
- }
-
- if (v > (UINT64_MAX - f) / n) {
- errorat(lx->pos, "integer literal overflow");
- v = 0;
- break;
- }
-
- v = v * n + f;
- }
-
- b = r;
- tok.kind = Alit;
- tok.val.i = v;
-
- if (b == 'u' || b == 'U') {
- tok.kind |= Vun;
- b = getbyte(lx);
- }
- if (b == 'l' || b == 'L') {
- r = getbyte(lx);
- if (r == 'l' || r == 'L') {
- if (r != b)
- errorat(lx->pos, "mismatched case on long long integer suffix");
- tok.kind |= Vvlong;
- r = getbyte(lx);
- } else
- tok.kind |= Vlong;
-
- if (r == 'u' || r == 'U') {
- if (tok.kind & Vun)
- errorat(lx->pos, "multiple unsigned designators on integer suffix");
- tok.kind |= Vun;
- goto Return;
- }
-
- ungetbyte(lx);
- goto Return;
- }
-
- tok.kind |= Vint;
- ungetbyte(lx);
- goto Return;
-
- Tflt:
- if (b == '.') {
- *lx->b++ = b;
- b = getbyte(lx);
- }
-
- while (isdigit(b)) {
- *lx->b++ = b;
-
- if (lx->b >= e) {
- errorat(lx->pos, "number overflows lexer buffer");
- goto Nospace;
- }
- }
-
- if (tolower(b) == 'e') {
- b = getbyte(lx);
- if (b == '-' || b == '+')
- b = getbyte(lx);
-
- if (!isdigit(b))
- errorat(lx->pos, "expected number after exponent, found %c", b);
-
- do {
- *lx->b++ = b;
- } while (b = getbyte(lx), isdigit(b));
- }
- *lx->b = '\0';
- d = strtod(lx->buf, nil);
- ungetbyte(lx);
-
- tok.kind = Alit | Vfloat;
- tok.val.f = d;
-
- goto Return;
-
- Talpha:
- s = lx->buf;
- e = lx->buf + arrlen(lx->buf);
- for (;;) {
- if (s >= e) {
- errorat(lx->pos, "identifier too long for buffer: %s", s);
- goto Nospace;
- }
- if (b != EOF && b >= RuneSelf) {
- ungetbyte(lx);
- r = getrune(lx);
- if (!utf8·isletter(r) && !utf8·isdigit(r) && r != 0xb7) {
- errorat(lx->pos, "invalid identifier character %d", r);
- }
- s += utf8·runetobyte(s, &r);
- } else if (!isalnum(b) && b != '_')
- break;
- else
- *s++ = b;
- b = getbyte(lx);
- }
- *s = '\0';
- ungetbyte(lx);
-
- tok.kind = Aident;
- tok.val.s = lx->buf;
-
- n = intern(&tok.val.s);
- if (n < arrlen(keywords)) {
- tok.kind = Akeywd;
- tok.val.i = n;
- goto Return;
- }
-
- sym = lookup(&lx->sym, tok.val.s);
- if (sym && ((uintptr)sym->name != (uintptr)lx->io->path)) {
- if ((uintptr)sym == lx->macline) {
- tok.kind = Alit | Vint;
- tok.val.i = lx->pos.line;
- goto Return;
- }
- if ((uintptr)sym == lx->macfile) {
- tok.kind = Alit | Vstr;
- tok.val.s = lx->pos.path;
- goto Return;
- }
- io = makeio(lx, sym->name);
- io->rdr.end += expandmacro(lx, sym, io->b);
- printf("EXPANDED %s: %s\n", sym->name, io->rdr.beg);
- *io->rdr.end++ = EOF;
- pushio(lx, io);
- goto GetByte;
- }
- goto Return;
-
- default:
- tok.kind = Anil;
- errorat(lx->pos, "invalid token, crashing");
- abort();
- }
-
-Return:
- lx->b = lx->buf;
- return tok;
-
-Nospace:
- panicf("aborting compilation");
- exit(1);
-}
-
-#undef CASE4
-#undef CASE3
-#undef CASE2
-#undef CASE1
-
-// -----------------------------------------------------------------------
-// symbol tables
-
-#define PTR_HASH(p) (uintptr)(p)
-#define PTR_EQUAL(p1, p2) ((uintptr)(p1) == (uintptr)(p2))
-
-static
-void
-·free(void* _, void* ptr) {
- return free(ptr);
-}
-
-static
-void *
-·alloc(void* _, uint n, ulong size) {
- return malloc(n*size);
-}
-
-static
-void *
-·calloc(void* _, uint n, ulong size) {
- return calloc(n, size);
-}
-
-static
-int
-moresymtab(SymTab *tab, int n)
-{
- MAP_GROW(tab, string, Sym*, n, PTR_HASH, sys·Memory, nil);
-}
-
-static
-int
-putsym(SymTab *tab, Sym *sym, error *err)
-{
- MAP_PUT(tab, sym->name, sym, PTR_HASH, PTR_EQUAL, moresymtab, err);
-}
-
-Sym*
-define(SymTab *tab, string name, uint32 kind)
-{
- int i;
- Sym *sym;
- error err;
-
- sym = mem·arenaalloc(C.heap, 1, sizeof(*sym));
- sym->name = name;
- sym->kind = kind;
-
- i = putsym(tab, sym, &err);
- tab->vals[i] = sym;
-
- return sym;
-}
-
-Sym*
-lookup(SymTab *tab, string ident)
-{
- int idx;
- MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
-
- if (idx < tab->n_buckets)
- return tab->vals[idx];
-
- return nil;
-}
-
-
-error
-forget(SymTab *tab, string ident)
-{
- int idx;
- MAP_GET(idx, tab, ident, PTR_HASH, PTR_EQUAL);
-
- if (idx < tab->n_buckets) {
- MAP_DEL(tab, idx);
- return 0;
- }
- return 1;
-}
-
-void
-forgetall(SymTab *tab)
-{
- MAP_RESET(tab);
-}