From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/cmd/cc/cc.h | 806 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 806 insertions(+) create mode 100644 src/cmd/cc/cc.h (limited to 'src/cmd/cc/cc.h') diff --git a/src/cmd/cc/cc.h b/src/cmd/cc/cc.h new file mode 100644 index 0000000..8fc5f73 --- /dev/null +++ b/src/cmd/cc/cc.h @@ -0,0 +1,806 @@ +#pragma once + +#include +#include + +#define iota(x) 1 << (x) + +/* core types */ +typedef struct Io Io; +typedef struct Pos Pos; +typedef struct Range Range; +typedef struct Token Token; + +typedef struct Lexer Lexer; + +typedef struct Sym Sym; +typedef struct Type Type; +typedef struct Scope Scope; + +typedef struct Parser Parser; + +typedef struct Ptr Ptr; +typedef struct Name Name; +typedef struct Dtor Dtor; +typedef struct Field Field; + +typedef struct Node Node; +typedef struct Decl Decl; +typedef struct Stmt Stmt; +typedef struct Expr Expr; + +typedef struct SymTab SymTab; +typedef struct StrTab StrTab; + +typedef struct Compiler Compiler; + +/* keywords of language */ +#define KEYWORDS \ + KEYWORD(Kauto,"auto") \ + KEYWORD(Kregister,"register") \ + KEYWORD(Kstatic,"static") \ + KEYWORD(Kextern,"extern") \ + KEYWORD(Ktls,"thread_local") \ + KEYWORD(Ktypedef,"typedef") \ + KEYWORD(Kinline,"inline") \ + KEYWORD(Knoret,"_Noreturn") \ + KEYWORD(Kconst,"const") \ + KEYWORD(Kvolatile,"volatile") \ + KEYWORD(Krestrict,"restrict") \ + KEYWORD(Katomic,"_Atomic") \ + KEYWORD(Ksigned,"signed") \ + KEYWORD(Kunsigned,"unsigned") \ + KEYWORD(Kvoid,"void") \ + KEYWORD(Kbool,"_Bool") \ + KEYWORD(Kchar,"char") \ + KEYWORD(Kfloat,"float") \ + KEYWORD(Kdouble,"double") \ + KEYWORD(Kcomplex,"complex") \ + KEYWORD(Kimaginary,"imaginary") \ + KEYWORD(Kint,"int") \ + KEYWORD(Kshort,"short") \ + KEYWORD(Klong,"long") \ + KEYWORD(Kstruct,"struct") \ + KEYWORD(Kunion,"union") \ + KEYWORD(Kenum,"enum") \ + KEYWORD(Kfor,"for") \ + KEYWORD(Kdo,"do") \ + KEYWORD(Kwhile,"while") \ + KEYWORD(Kcontinue,"continue") \ + KEYWORD(Kif,"if") \ + KEYWORD(Kelse,"else") \ + KEYWORD(Kswitch,"switch") \ + KEYWORD(Kcase,"case") \ + KEYWORD(Kdefault,"default") \ + KEYWORD(Kbreak,"break") \ + KEYWORD(Kgoto,"goto") \ + KEYWORD(Kreturn,"return") \ + KEYWORD(Ksizeof,"sizeof") \ + KEYWORD(Kalignof,"alignof") \ + KEYWORD(Kalignas,"alignas") + +#define KEYWORD(a, b) a, +enum { KEYWORDS NUM_KEYWORDS }; +#undef KEYWORD + +extern byte *keywords[NUM_KEYWORDS]; + +// ----------------------------------------------------------------------- +// lexing: byte stream -> tokens +// pre-processor built in + +/* source position: error reporting */ +struct Pos +{ + int col; + int line; + string path; +}; + + +struct Range +{ + Pos beg; + Pos end; +}; + +void errorat(Pos x, byte *fmt, ...); +void warnat(Pos x, byte *fmt, ...); + +/* pre-processor */ +#define DIRECTIVES \ + DIRECTIVE(Dpragma,"pragma", ppprag) \ + DIRECTIVE(Dinclude,"include", ppinc) \ + DIRECTIVE(Ddefine,"define", ppdef) \ + DIRECTIVE(Dundef,"undef", ppund) \ + DIRECTIVE(Dif,"if", ppif0) \ + DIRECTIVE(Delif,"elif", ppif1) \ + DIRECTIVE(Delse, "else", ppif1) \ + DIRECTIVE(Difdef,"ifdef", ppif2) \ + DIRECTIVE(Difndef,"ifndef", ppif3) \ + DIRECTIVE(Dendif,"endif", ppend) + +#define DIRECTIVE(a, b, c) a, +enum { DIRECTIVES NUM_DIRECTIVES }; +#undef DIRECTIVE + +extern byte *directives[NUM_DIRECTIVES]; + +error domacro(Lexer*); +error dodefine(Lexer *lx, string s); +int expandmacro(Lexer *lx, Sym *s, byte *dst); + +extern error (*macros[NUM_DIRECTIVES])(Lexer*); + +/* tokenization of byte stream */ +#define TOKENS \ + TOK(Anil,"nil") \ + TOK(Aeof,"eof") \ + TOK(Aeq, "==") \ + TOK(Aneq, "!=") \ + TOK(Anot, "!") \ + TOK(Aneg, "~") \ + TOK(Axor, "^") \ + TOK(Aor, "|") \ + TOK(Aand, "&") \ + TOK(Aoror, "||") \ + TOK(Aandand, "&&") \ + TOK(Aadd,"+") \ + TOK(Asub,"-") \ + TOK(Astar,"*") \ + TOK(Adiv,"/") \ + TOK(Amod,"%") \ + TOK(Agt,">") \ + TOK(Alt,"<") \ + TOK(Agteq,">=") \ + TOK(Alteq,"<=") \ + TOK(Alsft,"<<") \ + TOK(Arsft,">>") \ + TOK(Ainc,"++") \ + TOK(Adec,"--") \ + TOK(Aasn,"=") \ + TOK(Aorasn,"|=") \ + TOK(Axorasn,"^=") \ + TOK(Aandasn,"&=") \ + TOK(Aaddasn,"+=") \ + TOK(Asubasn,"-=") \ + TOK(Amulasn,"*=") \ + TOK(Adivasn,"/=") \ + TOK(Amodasn,"%=") \ + TOK(Alsftasn,"<<=") \ + TOK(Arsftasn,">>=") \ + TOK(Acomma,",") \ + TOK(Acolon,":") \ + TOK(Asemi,";") \ + TOK(Alparen,"(") \ + TOK(Arparen,")") \ + TOK(Albrace,"{") \ + TOK(Arbrace,"}") \ + TOK(Albrakt,"[") \ + TOK(Arbrakt,"]") \ + TOK(Adot,".") \ + TOK(Aarrow,"->") \ + TOK(Aqmark,"?") \ + TOK(Aellip,"...") \ + TOK(Alit,"") \ + TOK(Aident,"") \ + TOK(Akeywd,"") \ + +#define TOK(a, b) a, +enum +{ + TOKENS + NUM_TOKENS, + + Vchar = iota(8), + Vrune = iota(9), + Vint = iota(10), + Vlong = iota(11), + Vvlong = iota(12), + Vun = iota(13), + Vfloat = iota(14), + Vstr = iota(15), + Vwstr = iota(16), + + Vmask = Vchar - 1, +}; +#undef TOK + +extern byte *tokens[NUM_TOKENS]; + +/* TODO: store literals in a big val */ +union Val +{ + byte *s; + double f; + vlong i; + uvlong ui; + int32 c; + uint32 uc; + rune r; +}; + +struct Token +{ + uint32 kind; + Pos pos; + union Val val; +}; + +enum +{ + Svar = iota(1), + Sfunc = iota(2), + Stype = iota(3), + Stag = iota(4), + Senum = iota(5), + Slabl = iota(6), + Smacro = iota(7), +}; + +struct Sym +{ + uint32 kind; + string name; + union { + string macro; + Decl *obj; + int32 type; + Stmt *blk; + Expr *val; + }; +}; + +struct SymTab +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + string *keys; + Sym **vals; +}; + +Sym *define(SymTab *tab, string ident, uint32 kind); +Sym *lookup(SymTab *tab, string ident); +error forget(SymTab *tab, string ident); +void forgetall(SymTab *tab); + +enum +{ + IOnil = iota(0), + IOfile = iota(1), + IObuff = iota(2), +}; + +struct Io +{ + io·Buffer rdr; + string path; + uint32 kind; + union { + Stream *f; + byte *b; + }; + + Pos store; + struct Io *link; +}; + +struct Lexer +{ + Pos pos; + SymTab sym; + byte *b; + byte buf[2*1024]; + + /* predefined dynamic macros */ + uintptr macfile; + uintptr macline; + + /* i/o data */ + Io *io, *new; + Io iostk[100]; + struct { + int cap; + int len; + string *path; + } omit; +}; + +/* lex.c functions */ +Token lex(Lexer *); + +int getbyte(Lexer *); +int getnsbyte(Lexer *l); +rune getrune(Lexer *); +byte ungetbyte(Lexer *); +rune ungetrune(Lexer *, rune r); + +Io* openio(Lexer *lx, byte *path); +void pushio(Lexer *lx, Io *new); +void popio(Lexer *lx); + +void puttok(Token); + +// ----------------------------------------------------------------------- +// parsing & type resolution +// tokens -> ast + +/* parent data */ +struct Node +{ + Range pos; + uint32 kind; +}; + +/* ast types */ +enum +{ + Nbad, + /* labels */ + Sempty, Slabel, Scase, + Sblock, + Sexpr, Sdecl, + Sselect, + /* loops */ + Sfor, Swhile, Sdo, + /* jumps */ + Sgoto, Scontin, Sbreak, Sreturn, + /* forks */ + Sif, Sswitch, + + + /* assignments */ + Xasn, Xmulasn, Xdivasn, Xmodasn, Xsubasn, Xaddasn, + Xlsftasn, Xrsftasn, Xandasn, Xxorasn, Xorasn, + /* conditional */ + Xternary, + /* unary prefix ops */ + Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec, + Xcast, + /* unary postfix ops */ + Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xinitlist, + /* binary ops */ + Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft, + Xadd, Xsub, Xmul, Xdiv, Xmod, + /* primary */ + Xparen, Xident, Xlit, + /* lists */ + Xcomma, + + + Dvar, + Dfunc, + Dtype, + Dlist = iota(20), + Dvars = Dvar | Dlist, + Dtypes = Dtype | Dlist, + + /* names (don't interact w/ final AST) */ + Nnil = 0, + Nident, + Nparen, + Nindex, + Ncall, +}; + +/* expressions */ +enum +{ + Keynil, + Keyidx, + Keysel, +}; + +struct Key +{ + uint kind : 2; + union { + Expr *x; + string s; + }; +}; + +struct Expr +{ + struct Node; + uint32 qual; + uint32 type; + union { + string name; + struct { + uint64 kind; + union { + union Val; + union Val v; + }; + } val; + struct { + int n; + struct Key *k; + Expr *v; + } init; + Expr *x; + struct { + Expr *l; + Expr *r; + } asn; + struct { + Expr *c; + Expr *t; + Expr *e; + } cond; + struct { + Expr *x; + union { + Expr *i; + string f; + }; + } idx; + struct { + Expr *fn; + int n; + Expr **arg; + } call; + union { + Expr *pre; + Expr *post; + } unary; + struct { + int type : 1; + union { + struct { + uint32 qual; + uint32 type; + } of; + Expr *x; + }; + } info; + struct { + struct { + uint32 qual; + uint32 type; + } to; + Expr *x; + } cast; + struct { + Expr *l; + Expr *r; + } binary; + struct { + Expr *x[2]; + } comma; + }; +}; + + +/* statements */ +struct Stmt +{ + struct Node; + union { + struct { + union { + string ident; + Expr *x; + }; + Node *stmt; + } lbl; + struct { + long n; + struct Node **item; + } blk; + Expr *x; + struct { + Node *init; + Expr *cond; + Expr *step; + Node *body; + } loop; + union{ + string lbl; + Expr *x; + } jmp; + struct { + Expr *cond; + Node *body; + Node *orelse; + } br; + }; +}; + +/* declarations */ + +/* + * specifiers + * the design is the following: + * type info is held w/in a 64 bit integer. + * the bottom 32 bits are associated to specializations + * the top 32 bits index into a type-info array held by the compiler. + */ +enum +{ + /* memory */ + Mauto = iota(Kauto), + Mstatic = iota(Kstatic), + Mreg = iota(Kregister), + Mtls = iota(Ktls), + Mtype = iota(Ktypedef), + Mextern = iota(Kextern), + + MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype | Mextern, + + /* qualifiers */ + Qconst = iota(Kconst), + Qrestr = iota(Krestrict), + Qvoltl = iota(Kvolatile), + Qatom = iota(Katomic), + + MaskQul = Qconst | Qrestr | Qvoltl | Qatom, + + Finlne = iota(Kinline), + Fnoret = iota(Knoret), + + MaskFcn = Finlne | Fnoret, + + /* types */ + Tsign = iota(Ksigned), + Tunsign = iota(Kunsigned), + + MaskSgn = Tsign | Tunsign, + + Tvoid = iota(Kvoid), + Tfloat = iota(Kfloat), + Tdouble = iota(Kdouble), + Tcmplx = iota(Kcomplex), + Timag = iota(Kimaginary), + + MaskFlt = Tfloat | Tdouble | Tcmplx | Timag, + + Tchar = iota(Kchar), + Tbool = iota(Kbool), + + Tshort = iota(Kshort), + Tint = iota(Kint), + Tlong = iota(Klong), + Tvlong = iota(Klong+1), + + MaskInt = Tshort | Tint | Tlong | Tvlong, + MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx, + /* + * NOTE IMPORTANT: vlong takes over the struct bit place + * DON'T MOVE KEYWORDS WITHOUT REORGANIZING + */ + Tstruct = iota(Kstruct+1), + Tunion = iota(Kunion+1), + Tenum = iota(Kenum+1), + Tname = iota(Kenum+2), + + Sbad = -1, +}; + +/* intermediate nodes */ +struct Ptr +{ + uint64 kind; + Ptr *link; +}; + +struct Name +{ + uint32 kind; + union { + string ident; + struct Dtor *paren; + struct { + Name *name; + union { + struct { + uint32 q; + Expr *x; + } idx; + struct { + int n; + int dots : 1; + Field *arg; + } call; + }; + } sfx; + }; +}; + +struct Dtor +{ + Ptr ptr; + Name *name; +}; + +/* final ast node */ + +struct Field +{ + uint32 qual; + uint32 type; + string name; +}; + +struct Decls +{ + string name; + uint32 type; + Expr *init; + struct Decls *link; +}; + + +struct Decl +{ + struct Node; + uint32 spec; + union { + struct { + string name; + uint32 type; + union { + Stmt *body; + Expr *init; + }; + }; + struct Decls list; + }; +}; + +enum +{ + Tbad, + Tbase, + Tdef, + Tptr, + Tarray, + Tfunc, +}; + +/* types */ +struct Type +{ + uint32 kind; + Sym *sym; + uintptr size; + uintptr max; + uint16 align : 8; + uint8 sign : 2; + union { + struct { + uint32 qual; + uint32 base; + } ptr; + struct { + int len; + uint32 qual; + uint32 *elt; + } arr; + struct { + int len; + Field *f; + Expr *x; + } aggr; + struct { + int len; + string *elt; + Expr *val; + } enm; + struct { + uint32 ret; + int n; + int dots : 1; + Field *arg; + } func; + }; +}; + +/* platform specific */ +extern Type pointer; +extern Type basetypes[24]; +/* mandated by C standard */ +extern uint64 validtypespec[38]; +extern int indextypespec[38]; + +struct Scope +{ + SymTab tags; + SymTab objs; +}; + +struct Parser +{ + Token tok[2]; + struct { + int cap; + int len; + Decl **decls; + } ast; + + /* static buffers/stacks */ + Scope *sp; + Scope spstk[40]; + + Name *nm; + Name nmstk[40]; + + Ptr *pt; + Ptr ptstk[10]; + + Dtor *dt; + Dtor dtstk[40]; +}; + +/* ast.c functions */ +error parse(Parser *, Lexer *); + +// ----------------------------------------------------------------------- +// global compiler data + +struct StrTab +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + string *keys; + int32 *vals; +}; + +#if 0 +struct TypeSet +{ + int32 n_buckets; + int32 size; + int32 n_occupied; + int32 upper_bound; + int32 *flags; + Type **keys; +}; +#endif + +/* main data */ +struct Compiler +{ + mem·Arena *heap; + StrTab strs; + string outfile; + + struct { + int cap; + int len; + string *val; + } def; + + struct { + int cap; + int len; + string *dir; + } inc; + + struct { + int cap; + int len; + Type *info; + } type; + + /* TODO: make array */ + struct { + Decl vargs; + } builtin; +}; + +extern Compiler C; + +/* cc.c functions */ +void init(); +int32 intern(byte **str); +int32 type(); + +#undef iota -- cgit v1.2.1