aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/cc/cc.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/cc/cc.h')
-rw-r--r--src/cmd/cc/cc.h806
1 files changed, 806 insertions, 0 deletions
diff --git a/src/cmd/cc/cc.h b/src/cmd/cc/cc.h
new file mode 100644
index 0000000..8fc5f73
--- /dev/null
+++ b/src/cmd/cc/cc.h
@@ -0,0 +1,806 @@
+#pragma once
+
+#include <u.h>
+#include <libn.h>
+
+#define iota(x) 1 << (x)
+
+/* core types */
+typedef struct Io Io;
+typedef struct Pos Pos;
+typedef struct Range Range;
+typedef struct Token Token;
+
+typedef struct Lexer Lexer;
+
+typedef struct Sym Sym;
+typedef struct Type Type;
+typedef struct Scope Scope;
+
+typedef struct Parser Parser;
+
+typedef struct Ptr Ptr;
+typedef struct Name Name;
+typedef struct Dtor Dtor;
+typedef struct Field Field;
+
+typedef struct Node Node;
+typedef struct Decl Decl;
+typedef struct Stmt Stmt;
+typedef struct Expr Expr;
+
+typedef struct SymTab SymTab;
+typedef struct StrTab StrTab;
+
+typedef struct Compiler Compiler;
+
+/* keywords of language */
+#define KEYWORDS \
+ KEYWORD(Kauto,"auto") \
+ KEYWORD(Kregister,"register") \
+ KEYWORD(Kstatic,"static") \
+ KEYWORD(Kextern,"extern") \
+ KEYWORD(Ktls,"thread_local") \
+ KEYWORD(Ktypedef,"typedef") \
+ KEYWORD(Kinline,"inline") \
+ KEYWORD(Knoret,"_Noreturn") \
+ KEYWORD(Kconst,"const") \
+ KEYWORD(Kvolatile,"volatile") \
+ KEYWORD(Krestrict,"restrict") \
+ KEYWORD(Katomic,"_Atomic") \
+ KEYWORD(Ksigned,"signed") \
+ KEYWORD(Kunsigned,"unsigned") \
+ KEYWORD(Kvoid,"void") \
+ KEYWORD(Kbool,"_Bool") \
+ KEYWORD(Kchar,"char") \
+ KEYWORD(Kfloat,"float") \
+ KEYWORD(Kdouble,"double") \
+ KEYWORD(Kcomplex,"complex") \
+ KEYWORD(Kimaginary,"imaginary") \
+ KEYWORD(Kint,"int") \
+ KEYWORD(Kshort,"short") \
+ KEYWORD(Klong,"long") \
+ KEYWORD(Kstruct,"struct") \
+ KEYWORD(Kunion,"union") \
+ KEYWORD(Kenum,"enum") \
+ KEYWORD(Kfor,"for") \
+ KEYWORD(Kdo,"do") \
+ KEYWORD(Kwhile,"while") \
+ KEYWORD(Kcontinue,"continue") \
+ KEYWORD(Kif,"if") \
+ KEYWORD(Kelse,"else") \
+ KEYWORD(Kswitch,"switch") \
+ KEYWORD(Kcase,"case") \
+ KEYWORD(Kdefault,"default") \
+ KEYWORD(Kbreak,"break") \
+ KEYWORD(Kgoto,"goto") \
+ KEYWORD(Kreturn,"return") \
+ KEYWORD(Ksizeof,"sizeof") \
+ KEYWORD(Kalignof,"alignof") \
+ KEYWORD(Kalignas,"alignas")
+
+#define KEYWORD(a, b) a,
+enum { KEYWORDS NUM_KEYWORDS };
+#undef KEYWORD
+
+extern byte *keywords[NUM_KEYWORDS];
+
+// -----------------------------------------------------------------------
+// lexing: byte stream -> tokens
+// pre-processor built in
+
+/* source position: error reporting */
+struct Pos
+{
+ int col;
+ int line;
+ string path;
+};
+
+
+struct Range
+{
+ Pos beg;
+ Pos end;
+};
+
+void errorat(Pos x, byte *fmt, ...);
+void warnat(Pos x, byte *fmt, ...);
+
+/* pre-processor */
+#define DIRECTIVES \
+ DIRECTIVE(Dpragma,"pragma", ppprag) \
+ DIRECTIVE(Dinclude,"include", ppinc) \
+ DIRECTIVE(Ddefine,"define", ppdef) \
+ DIRECTIVE(Dundef,"undef", ppund) \
+ DIRECTIVE(Dif,"if", ppif0) \
+ DIRECTIVE(Delif,"elif", ppif1) \
+ DIRECTIVE(Delse, "else", ppif1) \
+ DIRECTIVE(Difdef,"ifdef", ppif2) \
+ DIRECTIVE(Difndef,"ifndef", ppif3) \
+ DIRECTIVE(Dendif,"endif", ppend)
+
+#define DIRECTIVE(a, b, c) a,
+enum { DIRECTIVES NUM_DIRECTIVES };
+#undef DIRECTIVE
+
+extern byte *directives[NUM_DIRECTIVES];
+
+error domacro(Lexer*);
+error dodefine(Lexer *lx, string s);
+int expandmacro(Lexer *lx, Sym *s, byte *dst);
+
+extern error (*macros[NUM_DIRECTIVES])(Lexer*);
+
+/* tokenization of byte stream */
+#define TOKENS \
+ TOK(Anil,"nil") \
+ TOK(Aeof,"eof") \
+ TOK(Aeq, "==") \
+ TOK(Aneq, "!=") \
+ TOK(Anot, "!") \
+ TOK(Aneg, "~") \
+ TOK(Axor, "^") \
+ TOK(Aor, "|") \
+ TOK(Aand, "&") \
+ TOK(Aoror, "||") \
+ TOK(Aandand, "&&") \
+ TOK(Aadd,"+") \
+ TOK(Asub,"-") \
+ TOK(Astar,"*") \
+ TOK(Adiv,"/") \
+ TOK(Amod,"%") \
+ TOK(Agt,">") \
+ TOK(Alt,"<") \
+ TOK(Agteq,">=") \
+ TOK(Alteq,"<=") \
+ TOK(Alsft,"<<") \
+ TOK(Arsft,">>") \
+ TOK(Ainc,"++") \
+ TOK(Adec,"--") \
+ TOK(Aasn,"=") \
+ TOK(Aorasn,"|=") \
+ TOK(Axorasn,"^=") \
+ TOK(Aandasn,"&=") \
+ TOK(Aaddasn,"+=") \
+ TOK(Asubasn,"-=") \
+ TOK(Amulasn,"*=") \
+ TOK(Adivasn,"/=") \
+ TOK(Amodasn,"%=") \
+ TOK(Alsftasn,"<<=") \
+ TOK(Arsftasn,">>=") \
+ TOK(Acomma,",") \
+ TOK(Acolon,":") \
+ TOK(Asemi,";") \
+ TOK(Alparen,"(") \
+ TOK(Arparen,")") \
+ TOK(Albrace,"{") \
+ TOK(Arbrace,"}") \
+ TOK(Albrakt,"[") \
+ TOK(Arbrakt,"]") \
+ TOK(Adot,".") \
+ TOK(Aarrow,"->") \
+ TOK(Aqmark,"?") \
+ TOK(Aellip,"...") \
+ TOK(Alit,"<literal>") \
+ TOK(Aident,"<identifier>") \
+ TOK(Akeywd,"<keyword>") \
+
+#define TOK(a, b) a,
+enum
+{
+ TOKENS
+ NUM_TOKENS,
+
+ Vchar = iota(8),
+ Vrune = iota(9),
+ Vint = iota(10),
+ Vlong = iota(11),
+ Vvlong = iota(12),
+ Vun = iota(13),
+ Vfloat = iota(14),
+ Vstr = iota(15),
+ Vwstr = iota(16),
+
+ Vmask = Vchar - 1,
+};
+#undef TOK
+
+extern byte *tokens[NUM_TOKENS];
+
+/* TODO: store literals in a big val */
+union Val
+{
+ byte *s;
+ double f;
+ vlong i;
+ uvlong ui;
+ int32 c;
+ uint32 uc;
+ rune r;
+};
+
+struct Token
+{
+ uint32 kind;
+ Pos pos;
+ union Val val;
+};
+
+enum
+{
+ Svar = iota(1),
+ Sfunc = iota(2),
+ Stype = iota(3),
+ Stag = iota(4),
+ Senum = iota(5),
+ Slabl = iota(6),
+ Smacro = iota(7),
+};
+
+struct Sym
+{
+ uint32 kind;
+ string name;
+ union {
+ string macro;
+ Decl *obj;
+ int32 type;
+ Stmt *blk;
+ Expr *val;
+ };
+};
+
+struct SymTab
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ string *keys;
+ Sym **vals;
+};
+
+Sym *define(SymTab *tab, string ident, uint32 kind);
+Sym *lookup(SymTab *tab, string ident);
+error forget(SymTab *tab, string ident);
+void forgetall(SymTab *tab);
+
+enum
+{
+ IOnil = iota(0),
+ IOfile = iota(1),
+ IObuff = iota(2),
+};
+
+struct Io
+{
+ io·Buffer rdr;
+ string path;
+ uint32 kind;
+ union {
+ Stream *f;
+ byte *b;
+ };
+
+ Pos store;
+ struct Io *link;
+};
+
+struct Lexer
+{
+ Pos pos;
+ SymTab sym;
+ byte *b;
+ byte buf[2*1024];
+
+ /* predefined dynamic macros */
+ uintptr macfile;
+ uintptr macline;
+
+ /* i/o data */
+ Io *io, *new;
+ Io iostk[100];
+ struct {
+ int cap;
+ int len;
+ string *path;
+ } omit;
+};
+
+/* lex.c functions */
+Token lex(Lexer *);
+
+int getbyte(Lexer *);
+int getnsbyte(Lexer *l);
+rune getrune(Lexer *);
+byte ungetbyte(Lexer *);
+rune ungetrune(Lexer *, rune r);
+
+Io* openio(Lexer *lx, byte *path);
+void pushio(Lexer *lx, Io *new);
+void popio(Lexer *lx);
+
+void puttok(Token);
+
+// -----------------------------------------------------------------------
+// parsing & type resolution
+// tokens -> ast
+
+/* parent data */
+struct Node
+{
+ Range pos;
+ uint32 kind;
+};
+
+/* ast types */
+enum
+{
+ Nbad,
+ /* labels */
+ Sempty, Slabel, Scase,
+ Sblock,
+ Sexpr, Sdecl,
+ Sselect,
+ /* loops */
+ Sfor, Swhile, Sdo,
+ /* jumps */
+ Sgoto, Scontin, Sbreak, Sreturn,
+ /* forks */
+ Sif, Sswitch,
+
+
+ /* assignments */
+ Xasn, Xmulasn, Xdivasn, Xmodasn, Xsubasn, Xaddasn,
+ Xlsftasn, Xrsftasn, Xandasn, Xxorasn, Xorasn,
+ /* conditional */
+ Xternary,
+ /* unary prefix ops */
+ Xref, Xstar, Xplus, Xminus, Xneg, Xnot, Xsizeof, Xalignof, Xpreinc, Xpredec,
+ Xcast,
+ /* unary postfix ops */
+ Xpostinc, Xpostdec, Xindex, Xcall, Xselp, Xself, Xinitlist,
+ /* binary ops */
+ Xoror, Xandand, Xor, Xxor, Xand, Xneq, Xeql, Xgt, Xlt, Xgteq, Xlteq, Xlsft, Xrsft,
+ Xadd, Xsub, Xmul, Xdiv, Xmod,
+ /* primary */
+ Xparen, Xident, Xlit,
+ /* lists */
+ Xcomma,
+
+
+ Dvar,
+ Dfunc,
+ Dtype,
+ Dlist = iota(20),
+ Dvars = Dvar | Dlist,
+ Dtypes = Dtype | Dlist,
+
+ /* names (don't interact w/ final AST) */
+ Nnil = 0,
+ Nident,
+ Nparen,
+ Nindex,
+ Ncall,
+};
+
+/* expressions */
+enum
+{
+ Keynil,
+ Keyidx,
+ Keysel,
+};
+
+struct Key
+{
+ uint kind : 2;
+ union {
+ Expr *x;
+ string s;
+ };
+};
+
+struct Expr
+{
+ struct Node;
+ uint32 qual;
+ uint32 type;
+ union {
+ string name;
+ struct {
+ uint64 kind;
+ union {
+ union Val;
+ union Val v;
+ };
+ } val;
+ struct {
+ int n;
+ struct Key *k;
+ Expr *v;
+ } init;
+ Expr *x;
+ struct {
+ Expr *l;
+ Expr *r;
+ } asn;
+ struct {
+ Expr *c;
+ Expr *t;
+ Expr *e;
+ } cond;
+ struct {
+ Expr *x;
+ union {
+ Expr *i;
+ string f;
+ };
+ } idx;
+ struct {
+ Expr *fn;
+ int n;
+ Expr **arg;
+ } call;
+ union {
+ Expr *pre;
+ Expr *post;
+ } unary;
+ struct {
+ int type : 1;
+ union {
+ struct {
+ uint32 qual;
+ uint32 type;
+ } of;
+ Expr *x;
+ };
+ } info;
+ struct {
+ struct {
+ uint32 qual;
+ uint32 type;
+ } to;
+ Expr *x;
+ } cast;
+ struct {
+ Expr *l;
+ Expr *r;
+ } binary;
+ struct {
+ Expr *x[2];
+ } comma;
+ };
+};
+
+
+/* statements */
+struct Stmt
+{
+ struct Node;
+ union {
+ struct {
+ union {
+ string ident;
+ Expr *x;
+ };
+ Node *stmt;
+ } lbl;
+ struct {
+ long n;
+ struct Node **item;
+ } blk;
+ Expr *x;
+ struct {
+ Node *init;
+ Expr *cond;
+ Expr *step;
+ Node *body;
+ } loop;
+ union{
+ string lbl;
+ Expr *x;
+ } jmp;
+ struct {
+ Expr *cond;
+ Node *body;
+ Node *orelse;
+ } br;
+ };
+};
+
+/* declarations */
+
+/*
+ * specifiers
+ * the design is the following:
+ * type info is held w/in a 64 bit integer.
+ * the bottom 32 bits are associated to specializations
+ * the top 32 bits index into a type-info array held by the compiler.
+ */
+enum
+{
+ /* memory */
+ Mauto = iota(Kauto),
+ Mstatic = iota(Kstatic),
+ Mreg = iota(Kregister),
+ Mtls = iota(Ktls),
+ Mtype = iota(Ktypedef),
+ Mextern = iota(Kextern),
+
+ MaskMem = Mauto | Mstatic | Mreg | Mtls | Mtype | Mextern,
+
+ /* qualifiers */
+ Qconst = iota(Kconst),
+ Qrestr = iota(Krestrict),
+ Qvoltl = iota(Kvolatile),
+ Qatom = iota(Katomic),
+
+ MaskQul = Qconst | Qrestr | Qvoltl | Qatom,
+
+ Finlne = iota(Kinline),
+ Fnoret = iota(Knoret),
+
+ MaskFcn = Finlne | Fnoret,
+
+ /* types */
+ Tsign = iota(Ksigned),
+ Tunsign = iota(Kunsigned),
+
+ MaskSgn = Tsign | Tunsign,
+
+ Tvoid = iota(Kvoid),
+ Tfloat = iota(Kfloat),
+ Tdouble = iota(Kdouble),
+ Tcmplx = iota(Kcomplex),
+ Timag = iota(Kimaginary),
+
+ MaskFlt = Tfloat | Tdouble | Tcmplx | Timag,
+
+ Tchar = iota(Kchar),
+ Tbool = iota(Kbool),
+
+ Tshort = iota(Kshort),
+ Tint = iota(Kint),
+ Tlong = iota(Klong),
+ Tvlong = iota(Klong+1),
+
+ MaskInt = Tshort | Tint | Tlong | Tvlong,
+ MaskTyp = Tvoid | Tbool | Tchar | Tint | Tfloat | Timag | Tcmplx,
+ /*
+ * NOTE IMPORTANT: vlong takes over the struct bit place
+ * DON'T MOVE KEYWORDS WITHOUT REORGANIZING
+ */
+ Tstruct = iota(Kstruct+1),
+ Tunion = iota(Kunion+1),
+ Tenum = iota(Kenum+1),
+ Tname = iota(Kenum+2),
+
+ Sbad = -1,
+};
+
+/* intermediate nodes */
+struct Ptr
+{
+ uint64 kind;
+ Ptr *link;
+};
+
+struct Name
+{
+ uint32 kind;
+ union {
+ string ident;
+ struct Dtor *paren;
+ struct {
+ Name *name;
+ union {
+ struct {
+ uint32 q;
+ Expr *x;
+ } idx;
+ struct {
+ int n;
+ int dots : 1;
+ Field *arg;
+ } call;
+ };
+ } sfx;
+ };
+};
+
+struct Dtor
+{
+ Ptr ptr;
+ Name *name;
+};
+
+/* final ast node */
+
+struct Field
+{
+ uint32 qual;
+ uint32 type;
+ string name;
+};
+
+struct Decls
+{
+ string name;
+ uint32 type;
+ Expr *init;
+ struct Decls *link;
+};
+
+
+struct Decl
+{
+ struct Node;
+ uint32 spec;
+ union {
+ struct {
+ string name;
+ uint32 type;
+ union {
+ Stmt *body;
+ Expr *init;
+ };
+ };
+ struct Decls list;
+ };
+};
+
+enum
+{
+ Tbad,
+ Tbase,
+ Tdef,
+ Tptr,
+ Tarray,
+ Tfunc,
+};
+
+/* types */
+struct Type
+{
+ uint32 kind;
+ Sym *sym;
+ uintptr size;
+ uintptr max;
+ uint16 align : 8;
+ uint8 sign : 2;
+ union {
+ struct {
+ uint32 qual;
+ uint32 base;
+ } ptr;
+ struct {
+ int len;
+ uint32 qual;
+ uint32 *elt;
+ } arr;
+ struct {
+ int len;
+ Field *f;
+ Expr *x;
+ } aggr;
+ struct {
+ int len;
+ string *elt;
+ Expr *val;
+ } enm;
+ struct {
+ uint32 ret;
+ int n;
+ int dots : 1;
+ Field *arg;
+ } func;
+ };
+};
+
+/* platform specific */
+extern Type pointer;
+extern Type basetypes[24];
+/* mandated by C standard */
+extern uint64 validtypespec[38];
+extern int indextypespec[38];
+
+struct Scope
+{
+ SymTab tags;
+ SymTab objs;
+};
+
+struct Parser
+{
+ Token tok[2];
+ struct {
+ int cap;
+ int len;
+ Decl **decls;
+ } ast;
+
+ /* static buffers/stacks */
+ Scope *sp;
+ Scope spstk[40];
+
+ Name *nm;
+ Name nmstk[40];
+
+ Ptr *pt;
+ Ptr ptstk[10];
+
+ Dtor *dt;
+ Dtor dtstk[40];
+};
+
+/* ast.c functions */
+error parse(Parser *, Lexer *);
+
+// -----------------------------------------------------------------------
+// global compiler data
+
+struct StrTab
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ string *keys;
+ int32 *vals;
+};
+
+#if 0
+struct TypeSet
+{
+ int32 n_buckets;
+ int32 size;
+ int32 n_occupied;
+ int32 upper_bound;
+ int32 *flags;
+ Type **keys;
+};
+#endif
+
+/* main data */
+struct Compiler
+{
+ mem·Arena *heap;
+ StrTab strs;
+ string outfile;
+
+ struct {
+ int cap;
+ int len;
+ string *val;
+ } def;
+
+ struct {
+ int cap;
+ int len;
+ string *dir;
+ } inc;
+
+ struct {
+ int cap;
+ int len;
+ Type *info;
+ } type;
+
+ /* TODO: make array */
+ struct {
+ Decl vargs;
+ } builtin;
+};
+
+extern Compiler C;
+
+/* cc.c functions */
+void init();
+int32 intern(byte **str);
+int32 type();
+
+#undef iota