From 9ec5bed6a7d715ffa69851569485a685dd69db2e Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sun, 17 May 2020 16:19:17 -0700 Subject: prototype of front end cli --- sys/cmd/cc/cc.h | 189 ++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 135 insertions(+), 54 deletions(-) (limited to 'sys/cmd/cc/cc.h') diff --git a/sys/cmd/cc/cc.h b/sys/cmd/cc/cc.h index 3228890..5488f3c 100644 --- a/sys/cmd/cc/cc.h +++ b/sys/cmd/cc/cc.h @@ -8,8 +8,11 @@ /* core types */ typedef struct Io Io; typedef struct Pos Pos; +typedef struct Range Range; typedef struct Token Token; +typedef struct Lexer Lexer; + typedef struct Sym Sym; typedef struct Type Type; @@ -22,33 +25,9 @@ typedef struct Expr Expr; typedef struct SymTab SymTab; typedef struct StrTab StrTab; -// ----------------------------------------------------------------------- -// lexing: byte stream -> tokens -// pre-processor built in - -struct Pos -{ - int col; - int line; - string path; -}; - -#define DIRECTIVES \ - DIRECTIVE(Dpragma,"pragma") \ - DIRECTIVE(Dinclude,"include") \ - DIRECTIVE(Dif,"if") \ - DIRECTIVE(Ddefine,"define") \ - DIRECTIVE(Difdef,"ifdef") \ - DIRECTIVE(Difndef,"ifndef") - -#define DIRECTIVE(a, b) a, -enum { DIRECTIVES }; -#undef DIRECTIVE - -#define DIRECTIVE(a, b) b, -static byte *directives[] = { DIRECTIVES }; -#undef DIRECTIVE +typedef struct Compiler Compiler; +/* keywords of language */ #define KEYWORDS \ KEYWORD(Kauto,"auto") \ KEYWORD(Kregister,"register") \ @@ -88,15 +67,57 @@ static byte *directives[] = { DIRECTIVES }; KEYWORD(Kalignof,"alignof") #define KEYWORD(a, b) a, -enum { KEYWORDS }; +enum { KEYWORDS NUM_KEYWORDS }; #undef KEYWORD -#define KEYWORD(a, b) b, -static byte *keywords[] = { KEYWORDS }; -#undef KEYWORD +extern byte *keywords[NUM_KEYWORDS]; + +// ----------------------------------------------------------------------- +// lexing: byte stream -> tokens +// pre-processor built in + +/* source position: error reporting */ +struct Pos +{ + int col; + int line; + string path; +}; + + +struct Range +{ + Pos beg; + Pos end; +}; + +void errorat(Pos x, byte *fmt, ...); + +/* pre-processor */ +#define DIRECTIVES \ + DIRECTIVE(Dpragma,"pragma", ppprag) \ + DIRECTIVE(Dinclude,"include", ppinc) \ + DIRECTIVE(Ddefine,"define", ppdef) \ + DIRECTIVE(Dundef,"undef", ppund) \ + DIRECTIVE(Dif,"if", ppif0) \ + DIRECTIVE(Delse, "else", ppif1) \ + DIRECTIVE(Difdef,"ifdef", ppif2) \ + DIRECTIVE(Difndef,"ifndef", ppif3) \ + DIRECTIVE(Dendif,"endif", ppend) + +#define DIRECTIVE(a, b, c) a, +enum { DIRECTIVES NUM_DIRECTIVES }; +#undef DIRECTIVE + +extern byte *directives[NUM_DIRECTIVES]; -#undef KEYWORDS +error domacro(Lexer*); +error dodefine(Lexer *lx, string s); +int expandmacro(Lexer *lx, Sym *s, byte *dst); +extern error (*macros[NUM_DIRECTIVES])(Lexer*); + +/* tokenization of byte stream */ #define TOKENS \ TOK(Anil,"nil") \ TOK(Aeof,"eof") \ @@ -140,8 +161,8 @@ static byte *keywords[] = { KEYWORDS }; TOK(Arparen,")") \ TOK(Albrace,"{") \ TOK(Arbrace,"}") \ - TOK(Albrkt,"[") \ - TOK(Arbrkt,"]") \ + TOK(Albrakt,"[") \ + TOK(Arbrakt,"]") \ TOK(Adot,".") \ TOK(Aarrow,"->") \ TOK(Aqmark,"?") \ @@ -154,47 +175,76 @@ static byte *keywords[] = { KEYWORDS }; enum { TOKENS + NUM_TOKENS, + + Vchar = iota(8), + Vint = iota(9), + Vlong = iota(10), + Vvlong = iota(11), + Vusgn = iota(12), + Vfloat = iota(13), + Vstr = iota(14), }; #undef TOK -#define TOK(a, b) b, -static byte *tokens[] = { TOKENS }; -#undef TOK -#undef TOKENS +extern byte *tokens[NUM_TOKENS]; /* TODO: store literals in a big val */ struct Token { uint32 kind; - struct Pos pos; + Range pos; union { - string str; + byte *s; double f; vlong i; - }; + uvlong ui; + byte c; + ubyte uc; + } val; }; enum { - Svar, - Sfunc, - Smacro, + Svar = 1 << 0, + Sfunc = 1 << 1, + Smacro = 1 << 2, }; struct Sym { uint32 kind; string name; + union { + string macro; + /*Func *func;*/ + }; }; +Sym *lookup(SymTab *tab, string ident); +Sym *define(SymTab *tab, string ident, int kind); + struct Lexer { - Token tok; + Pos pos; Io *io; SymTab *sym; - byte buf[1024]; + byte *b; + byte buf[2*1024]; }; +/* lex.c functions */ +Token lex(Lexer *); + +byte getbyte(Lexer *); +byte getnsbyte(Lexer *l); +rune getrune(Lexer *); +byte ungetbyte(Lexer *); +rune ungetrune(Lexer *, rune r); + +void pushio(Lexer *lx, Io *new); +void popio(Lexer *lx); + // ----------------------------------------------------------------------- // parsing & type resolution // tokens -> ast @@ -294,35 +344,66 @@ struct Decl // ----------------------------------------------------------------------- // compiler +enum +{ + IOnil = iota(0), + IOonce = iota(1), + IOmac = iota(2), +}; + struct Io { - io·Buffer b; + io·Buffer buf; string path; - uint32 flag; + uint32 kind; + union { + Stream *f; + byte *b; + }; + + Pos store; struct Io *link; }; +Io* openio(byte *path); +Io* makeio(); +void freeio(Io *io); + struct StrTab { - int32 n_buckets, size, n_occupied, upper_bound; + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; int32 *flags; string *keys; int32 *vals; }; -static struct +int32 intern(byte **str); +string internview(byte* beg, byte *end); + +/* main data */ +struct Compiler { mem·Arena *heap; StrTab strs; - string *include; + struct { + int cap; + int len; + string *dir; + } inc; + Io *io; Io iostk[100]; -} C; -void init(); + string outfile; -int32 intern(byte **str); -string internview(byte* beg, byte *end); + Lexer lxr; +}; +extern Compiler C; + +void init(); #undef iota -- cgit v1.2.1