#pragma once #include #include #define iota(x) 1 << (x) /* core types */ typedef struct Io Io; typedef struct Pos Pos; typedef struct Range Range; typedef struct Token Token; typedef struct Lexer Lexer; typedef struct Sym Sym; typedef struct Type Type; typedef struct Scope Scope; typedef struct Parser Parser; typedef struct Ptr Ptr; typedef struct Name Name; typedef struct Dtor Dtor; typedef struct Field Field; typedef struct Node Node; typedef struct Decl Decl; typedef struct Stmt Stmt; typedef struct Expr Expr; typedef struct SymTab SymTab; typedef struct StrTab StrTab; typedef struct Compiler Compiler; /* keywords of language */ #define KEYWORDS \ KEYWORD(Kauto,"auto") \ KEYWORD(Kregister,"register") \ KEYWORD(Kstatic,"static") \ KEYWORD(Kextern,"extern") \ KEYWORD(Ktls,"thread_local") \ KEYWORD(Ktypedef,"typedef") \ KEYWORD(Kinline,"inline") \ KEYWORD(Knoret,"_Noreturn") \ KEYWORD(Kconst,"const") \ KEYWORD(Kvolatile,"volatile") \ KEYWORD(Krestrict,"restrict") \ KEYWORD(Katomic,"_Atomic") \ KEYWORD(Ksigned,"signed") \ KEYWORD(Kunsigned,"unsigned") \ KEYWORD(Kvoid,"void") \ KEYWORD(Kbool,"_Bool") \ KEYWORD(Kchar,"char") \ KEYWORD(Kfloat,"float") \ KEYWORD(Kdouble,"double") \ KEYWORD(Kcomplex,"complex") \ KEYWORD(Kimaginary,"imaginary") \ KEYWORD(Kint,"int") \ KEYWORD(Kshort,"short") \ KEYWORD(Klong,"long") \ KEYWORD(Kstruct,"struct") \ KEYWORD(Kunion,"union") \ KEYWORD(Kenum,"enum") \ KEYWORD(Kfor,"for") \ KEYWORD(Kdo,"do") \ KEYWORD(Kwhile,"while") \ KEYWORD(Kcontinue,"continue") \ KEYWORD(Kif,"if") \ KEYWORD(Kelse,"else") \ KEYWORD(Kswitch,"switch") \ KEYWORD(Kcase,"case") \ KEYWORD(Kdefault,"default") \ KEYWORD(Kbreak,"break") \ KEYWORD(Kgoto,"goto") \ KEYWORD(Kreturn,"return") \ KEYWORD(Ksizeof,"sizeof") \ KEYWORD(Kalignof,"alignof") \ KEYWORD(Kalignas,"alignas") #define KEYWORD(a, b) a, enum { KEYWORDS NUM_KEYWORDS }; #undef KEYWORD extern byte *keywords[NUM_KEYWORDS]; // ----------------------------------------------------------------------- // lexing: byte stream -> tokens // pre-processor built in /* source position: error reporting */ struct Pos { int col; int line; string path; }; struct Range { Pos beg; Pos end; }; void errorat(Pos x, byte *fmt, ...); void warnat(Pos x, byte *fmt, ...); /* pre-processor */ #define DIRECTIVES \ DIRECTIVE(Dpragma,"pragma", ppprag) \ DIRECTIVE(Dinclude,"include", ppinc) \ DIRECTIVE(Ddefine,"define", ppdef) \ DIRECTIVE(Dundef,"undef", ppund) \ DIRECTIVE(Dif,"if", ppif0) \ DIRECTIVE(Delif,"elif", ppif1) \ DIRECTIVE(Delse, "else", ppif1) \ DIRECTIVE(Difdef,"ifdef", ppif2) \ DIRECTIVE(Difndef,"ifndef", ppif3) \ DIRECTIVE(Dendif,"endif", ppend) #define DIRECTIVE(a, b, c) a, enum { DIRECTIVES NUM_DIRECTIVES }; #undef DIRECTIVE extern byte *directives[NUM_DIRECTIVES]; error domacro(Lexer*); error dodefine(Lexer *lx, string s); int expandmacro(Lexer *lx, Sym *s, byte *dst); extern error (*macros[NUM_DIRECTIVES])(Lexer*); /* tokenization of byte stream */ #define TOKENS \ TOK(Anil,"nil") \ TOK(Aeof,"eof") \ TOK(Aeq, "==") \ TOK(Aneq, "!=") \ TOK(Anot, "!") \ TOK(Aneg, "~") \ TOK(Axor, "^") \ TOK(Aor, "|") \ TOK(Aand, "&") \ TOK(Aoror, "||") \ TOK(Aandand, "&&") \ TOK(Aadd,"+") \ TOK(Asub,"-") \ TOK(Astar,"*") \ TOK(Adiv,"/") \ TOK(Amod,"%") \ TOK(Agt,">") \ TOK(Alt,"<") \ TOK(Agteq,">=") \ TOK(Alteq,"<=") \ TOK(Alsft,"<<") \ TOK(Arsft,">>") \ TOK(Ainc,"++") \ TOK(Adec,"--") \ TOK(Aasn,"=") \ TOK(Aorasn,"|=") \ TOK(Axorasn,"^=") \ TOK(Aandasn,"&=") \ TOK(Aaddasn,"+=") \ TOK(Asubasn,"-=") \ TOK(Amulasn,"*=") \ TOK(Adivasn,"/=") \ TOK(Amodasn,"%=") \ TOK(Alsftasn,"<<=") \ TOK(Arsftasn,">>=") \ TOK(Acomma,",") \ TOK(Acolon,":") \ TOK(Asemi,";") \ TOK(Alparen,"(") \ TOK(Arparen,")") \ TOK(Albrace,"{") \ TOK(Arbrace,"}") \ TOK(Albrakt,"[") \ TOK(Arbrakt,"]") \ TOK(Adot,".") \ TOK(Aarrow,"->") \ TOK(Aqmark,"?") \ TOK(Aellip,"...") \ TOK(Alit,"") \ TOK(Aident,"") \ TOK(Akeywd,"") \ #define TOK(a, b) a, enum { TOKENS NUM_TOKENS, Vchar = iota(8), Vrune = iota(9), Vint = iota(10), Vlong = iota(11), Vvlong = iota(12), Vun = iota(13), Vfloat = iota(14), Vstr = iota(15), Vwstr = iota(16), Vmask = Vchar - 1, }; #undef TOK extern byte *tokens[NUM_TOKENS]; /* TODO: store literals in a big val */ union Val { byte *s; double f; vlong i; uvlong ui; int32 c; uint32 uc; rune r; }; struct Token { uint32 kind; Pos pos; union Val val; }; enum { Svar = iota(1), Sfunc = iota(2), Stype = iota(3), Stag = iota(4), Senum = iota(5), Slabl = iota(6), Smacro = iota(7), }; struct Sym { uint32 kind; string name; union { string macro; Decl *obj; int32 type; Stmt *blk; Expr *val; }; }; struct SymTab { int32 n_buckets; int32 size; int32 n_occupied; int32 upper_bound; int32 *flags; string *keys; Sym **vals; }; Sym *define(SymTab *tab, string ident, uint32 kind); Sym *lookup(SymTab *tab, string ident); error forget(SymTab *tab, string ident); void forgetall(SymTab *tab); enum { IOnil = iota(0), IOfile = iota(1), IObuff = iota(2), }; struct Io { io·Buffer rdr; string path; uint32 kind; union { Stream *f; byte *b; }; Pos store; struct Io *link; }; struct Lexer { Pos pos; SymTab sym; byte *b; byte buf[2*1024]; /* predefined dynamic macros */ uintptr macfile; uintptr macline; /* i/o data */ Io *io, *new; Io iostk[100]; struct { int cap; int len; string *path; } omit; }; /* lex.c functions */ Token lex(Lexer *); int getbyte(Lexer *); int getnsbyte(Lexer *l); rune getrune(Lexer *); byte ungetbyte(Lexer *); rune ungetrune(Lexer *, rune r); Io* openio(Lexer *lx, byte *path); void pushio(Lexer *lx, Io *new); void popio(Lexer *lx); void puttok(Token); // ----------------------------------------------------------------------- // parsing & type resolution // tokens -> ast /* parent data */ struct Node { Range pos; uint32 kind; }; /* ast types */ enum { Nbad, /* labels */ Sempty, Slabel, Scase, Sblock, Sexpr, Sdecl, Sselect, /* loops */ Sfor, Swhile, Sdo, /* jumps */ Sgoto, Scontin, Sbreak, Sreturn, /* forks */ Sif, Sswitch, /* assignments */ Xasn, Xmulasn, Xdivasn, Xmodasn, Xsubasn, Xaddasn, Xlsftasn, Xrsftasn, Xandasn, Xxorasn, Xorasn, /* conditional */ Xternary, /* unary prefix ops */ Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec, Xcast, /* unary postfix ops */ Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xinitlist, /* binary ops */ Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft, Xadd, Xsub, Xmul, Xdiv, Xmod, /* primary */ Xparen, Xident, Xlit, /* lists */ Xcomma, Dvar, Dfunc, Dtype, Dlist = iota(20), Dvars = Dvar | Dlist, Dtypes = Dtype | Dlist, /* names (don't interact w/ final AST) */ Nnil = 0, Nident, Nparen, Nindex, Ncall, }; /* expressions */ enum { Keynil, Keyidx, Keysel, }; struct Key { uint kind : 2; union { Expr *x; string s; }; }; struct Expr { struct Node; uint32 qual; uint32 type; union { string name; struct { uint64 kind; union { union Val; union Val v; }; } val; struct { int n; struct Key *k; Expr *v; } init; Expr *x; struct { Expr *l; Expr *r; } asn; struct { Expr *c; Expr *t; Expr *e; } cond; struct { Expr *x; union { Expr *i; string f; }; } idx; struct { Expr *fn; int n; Expr **arg; } call; union { Expr *pre; Expr *post; } unary; struct { int type : 1; union { struct { uint32 qual; uint32 type; } of; Expr *x; }; } info; struct { struct { uint32 qual; uint32 type; } to; Expr *x; } cast; struct { Expr *l; Expr *r; } binary; struct { Expr *x[2]; } comma; }; }; /* statements */ struct Stmt { struct Node; union { struct { union { string ident; Expr *x; }; Node *stmt; } lbl; struct { long n; struct Node **item; } blk; Expr *x; struct { Node *init; Expr *cond; Expr *step; Node *body; } loop; union{ string lbl; Expr *x; } jmp; struct { Expr *cond; Node *body; Node *orelse; } br; }; }; /* declarations */ /* * specifiers * the design is the following: * type info is held w/in a 64 bit integer. * the bottom 32 bits are associated to specializations * the top 32 bits index into a type-info array held by the compiler. */ enum { /* memory */ Mauto = iota(Kauto), Mstatic = iota(Kstatic), Mreg = iota(Kregister), Mtls = iota(Ktls), Mtype = iota(Ktypedef), Mextern = iota(Kextern), MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype | Mextern, /* qualifiers */ Qconst = iota(Kconst), Qrestr = iota(Krestrict), Qvoltl = iota(Kvolatile), Qatom = iota(Katomic), MaskQul = Qconst | Qrestr | Qvoltl | Qatom, Finlne = iota(Kinline), Fnoret = iota(Knoret), MaskFcn = Finlne | Fnoret, /* types */ Tsign = iota(Ksigned), Tunsign = iota(Kunsigned), MaskSgn = Tsign | Tunsign, Tvoid = iota(Kvoid), Tfloat = iota(Kfloat), Tdouble = iota(Kdouble), Tcmplx = iota(Kcomplex), Timag = iota(Kimaginary), MaskFlt = Tfloat | Tdouble | Tcmplx | Timag, Tchar = iota(Kchar), Tbool = iota(Kbool), Tshort = iota(Kshort), Tint = iota(Kint), Tlong = iota(Klong), Tvlong = iota(Klong+1), MaskInt = Tshort | Tint | Tlong | Tvlong, MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx, /* * NOTE IMPORTANT: vlong takes over the struct bit place * DON'T MOVE KEYWORDS WITHOUT REORGANIZING */ Tstruct = iota(Kstruct+1), Tunion = iota(Kunion+1), Tenum = iota(Kenum+1), Tname = iota(Kenum+2), Sbad = -1, }; /* intermediate nodes */ struct Ptr { uint64 kind; Ptr *link; }; struct Name { uint32 kind; union { string ident; struct Dtor *paren; struct { Name *name; union { struct { uint32 q; Expr *x; } idx; struct { int n; int dots : 1; Field *arg; } call; }; } sfx; }; }; struct Dtor { Ptr ptr; Name *name; }; /* final ast node */ struct Field { uint32 qual; uint32 type; string name; }; struct Decls { string name; uint32 type; Expr *init; struct Decls *link; }; struct Decl { struct Node; uint32 spec; union { struct { string name; uint32 type; union { Stmt *body; Expr *init; }; }; struct Decls list; }; }; enum { Tbad, Tbase, Tdef, Tptr, Tarray, Tfunc, }; /* types */ struct Type { uint32 kind; Sym *sym; uintptr size; uintptr max; uint16 align : 8; uint8 sign : 2; union { struct { uint32 qual; uint32 base; } ptr; struct { int len; uint32 qual; uint32 *elt; } arr; struct { int len; Field *f; Expr *x; } aggr; struct { int len; string *elt; Expr *val; } enm; struct { uint32 ret; int n; int dots : 1; Field *arg; } func; }; }; /* platform specific */ extern Type pointer; extern Type basetypes[24]; /* mandated by C standard */ extern uint64 validtypespec[38]; extern int indextypespec[38]; struct Scope { SymTab tags; SymTab objs; }; struct Parser { Token tok[2]; struct { int cap; int len; Decl **decls; } ast; /* static buffers/stacks */ Scope *sp; Scope spstk[40]; Name *nm; Name nmstk[40]; Ptr *pt; Ptr ptstk[10]; Dtor *dt; Dtor dtstk[40]; }; /* ast.c functions */ error parse(Parser *, Lexer *); // ----------------------------------------------------------------------- // global compiler data struct StrTab { int32 n_buckets; int32 size; int32 n_occupied; int32 upper_bound; int32 *flags; string *keys; int32 *vals; }; #if 0 struct TypeSet { int32 n_buckets; int32 size; int32 n_occupied; int32 upper_bound; int32 *flags; Type **keys; }; #endif /* main data */ struct Compiler { mem·Arena *heap; StrTab strs; string outfile; struct { int cap; int len; string *val; } def; struct { int cap; int len; string *dir; } inc; struct { int cap; int len; Type *info; } type; /* TODO: make array */ struct { Decl vargs; } builtin; }; extern Compiler C; /* cc.c functions */ void init(); int32 intern(byte **str); int32 type(); #undef iota