#include "rc.h" #include "parse.h" static int advance(void); // ----------------------------------------------------------------------- // lexer struct Lexer { int c[2]; ushort doprompt; ushort hadword; ushort haddollar; ushort inquote; char buf[BUFSIZ]; }; struct Here { Tree *tag; char *name; struct Here *link; }; // ----------------------------------------------------------------------- // globals static struct Lexer lexer = { .c={0, EOF}, .doprompt=1 }; #define put1(b) lexer.buf[0] = (b), lexer.buf[1] = 0; #define put2(b0,b1) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = 0; #define put3(b0,b1,b2) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = b2, lexer.buf[3] = 0; static struct Here *heres, **endhere; static char hex[]="0123456789abcdef"; static char tmp[]="/tmp/here0000.0000"; static int numhere; // ----------------------------------------------------------------------- // internal functions void yyerror(const char *msg) { print(shell.err, "rc:%d: ", runner->line); if(lexer.buf[0] && lexer.buf[0]!='\n') print(shell.err, "%q: ", lexer.buf); print(shell.err, "%s\n", msg); flush(shell.err); lexer.hadword = 0; lexer.haddollar = 0; /* consume remaining tokens */ while(lexer.c[0] !='\n' && lexer.c[0] != EOF) advance(); } static void hexnum(char *p, int n) { *p++=hex[(n>>12)&0xF]; *p++=hex[(n>>8)&0xF]; *p++=hex[(n>>4)&0xF]; *p = hex[n&0xF]; } static void pword(Io *io, Word *arg) { if(arg){ while(arg->link && arg->link->str){ printstr(io, arg->str); printchar(io, ' '); arg = arg->link; } printstr(io, arg->str); } } static void psubst(Io *io, char *s) { char *t, *u; int savec, n; Word *star; while(*s){ if(*s!='$'){ if(0xa0<=(*s&0xff) && (*s&0xff)<=0xf5){ printchar(io, *s++); if(*s=='\0') break; }else if(0xf6<=(*s&0xff) && (*s&0xff)<=0xf7){ printchar(io, *s++); if(*s=='\0') break; printchar(io, *s++); if(*s=='\0') break; } printchar(io, *s++); }else{ t=++s; if(*t=='$') printchar(io, *t++); else{ while(*t && isidentchar(*t)) t++; savec=*t; *t='\0'; n = 0; for(u = s;*u && '0'<=*u && *u<='9';u++) n = n*10+*u-'0'; if(n && *u=='\0'){ star = var("*")->val; if(star && 1<=n && n<=count(star)){ while(--n) star = star->link; printstr(io, star->str); } } else pword(io, var(s)->val); *t = savec; if(savec=='^') t++; } s = t; } } } int readc(void) { int c; static int peek = EOF; if(peek!=EOF){ c = peek; peek = EOF; return c; } if(runner->flag.eof) return EOF; if(!prompt(&lexer.doprompt)) exit(1); // XXX: hack for signal handling right now... c = get(runner->cmd.io); lexer.doprompt = lexer.doprompt || c=='\n' || c==EOF; if(c==EOF) runner->flag.eof = 1; return c; } static int peekc(void) { if(lexer.c[1] == EOF) lexer.c[1] = readc(); return lexer.c[1]; } static int advance(void) { int c = peekc(); lexer.c[0] = lexer.c[1], lexer.c[1] = EOF; return c; } static void skipws(void) { int c; for(;;){ c = peekc(); if(c== ' ' || c == '\t') advance(); else return; } } static void skipnl(void) { int c; for(;;){ c = peekc(); if(c== ' ' || c == '\t' || c == '\n') advance(); else return; } } static int nextis(int c) { if(peekc()==c){ advance(); return 1; } return 0; } static char * putbyte(char *buf, int c) { if(!buf) return buf; if(buf == arrend(lexer.buf)){ fatal("lexer: out of buffer space"); return nil; } *buf++ = c; return buf; } static char * putrune(char *buf, int c) { buf = putbyte(buf, c); if(utf8·onebyte(c)) return buf; if(utf8·twobyte(c)) return putbyte(buf,advance()); if(utf8·threebyte(c)){ buf = putbyte(buf,advance()); return putbyte(buf,advance()); } if(utf8·fourbyte(c)){ buf = putbyte(buf,advance()); buf = putbyte(buf,advance()); return putbyte(buf,advance()); } fatal("malformed utf8 stream"); return nil; } // ----------------------------------------------------------------------- // here doc exports Tree * heredoc(Tree *tag) { struct Here *h; if(tag->type != Tword) yyerror("bad here tag"); h = emalloc(sizeof(*h)); h->link = nil; if(heres) *endhere = h; else heres = h; h->tag = tag; hexnum(&tmp[9], getpid()); hexnum(&tmp[14], numhere++); h->name = strdup(tmp); return token(Tword, tmp); } void readhere(void) { Io *io; int c, sub; long len; struct Here *h, *nh; char *s, *beg, *end, *tag, line[PAGESIZE+1]; for(h=heres; h; h = nh){ sub = !h->tag->quoted; tag = h->tag->str; if((c=open(h->name,O_WRONLY|O_CREAT,S_IRUSR|S_IWUSR))<0) yyerror("failed to make heredoc"); io = openfd(c); prompt(&lexer.doprompt); beg=line, s=line, end=arrend(line)-1, len=PAGESIZE; while((c=get(runner->cmd.io))!=EOF){ /* out of space: get bigger buffer */ if(s == end){ s = beg; beg = (beg == line) ? emalloc(2*len+1) : erealloc(beg, 2*len+1); memcpy(beg, s, len); s = beg + len; len *= 2; end = beg + len; } if(c!='\n'){ *s++ = c; continue; } /* c == '\n' */ *s = 0; if(tag && strcmp(beg, tag) == 0) break; if(sub) psubst(io, beg); else printstr(io, beg); /* reset */ s = beg; prompt(&lexer.doprompt); printchar(io, c); } flush(io); terminate(io); emitdelhere(h->name); nh = h->link; if(beg != line) efree(beg); efree(h); } heres = nil; lexer.doprompt = 1; } // ----------------------------------------------------------------------- // lexer exported functions // TODO: turn into static tables int iswordchar(int c) { return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; } int isidentchar(int c) { return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); } int yylex(void) { Tree *node; int c, d = peekc(); char *w = lexer.buf; yylval.tree = nil; /* inject tokens */ if(lexer.hadword){ lexer.hadword = 0; if(d=='('){ advance(); strcpy(lexer.buf, "(Tindex)"); return Tindex; } if(iswordchar(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ strcpy(lexer.buf, "^"); return '^'; } } lexer.inquote = 0; skipws(); switch(c=advance()){ case EOF: lexer.haddollar = 0; put3('E','O','F'); return EOF; case '$': lexer.haddollar = 1; if(nextis('#')){ put2('$','#'); return Tcount; } if(nextis('^')){ put2('$','^'); return Tjoin; } put1('$'); return '$'; #if 0 case '@': lexer.haddollar = 0; put1('@'); return Tsubshell; case '!': lexer.haddollar = 0; put1('!'); return Tbang; #endif case '&': lexer.haddollar = 0; if(nextis('&')){ put2('&','&'); return Tandand; } put1('&'); return '&'; case '|': lexer.haddollar = 0; if(nextis('|')){ put2('|','|'); return Toror; } node = maketree(); *w++ = '|'; node->type = Tpipe; node->redir.fd[0] = 1; node->redir.fd[1] = 0; goto redir; case '>': lexer.haddollar = 0; node = maketree(); *w++ = '>'; node->type = Tredir; if(nextis('>')){ node->redir.type = Rappend; *w++ = '>'; }else node->redir.type = Rwrite; node->redir.fd[0] = 1; goto redir; case '<': lexer.haddollar = 0; node = maketree(); *w++ = '<'; node->type = Tredir; if(nextis('<')){ node->redir.type = Rhere; *w++ = '<'; }else if(nextis('>')){ node->redir.type = Rrdwr; *w++ = '>'; }else{ node->redir.type = Rread; } node->redir.fd[0] = 0; /* fallthrough */ redir: if(nextis('[')){ *w++='['; c = advance(); *w++ = c; if(c<'0' || '9'type == Tpipe ? "pipe syntax" : "redirection syntax"); return EOF; } node->redir.fd[0] = 0; do{ node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0'); *w++ = c; c = advance(); }while('0'<=c && c<='9'); if(c == '='){ *w++ = '='; if(node->type==Tredir) node->type = Tdup; c = advance(); } if(c < '0' || '9' < c){ if(node->type == Tpipe) goto badredir; node->redir.type = Rclose; }else{ node->redir.type = Rdupfd; node->redir.fd[1] = node->redir.fd[0]; node->redir.fd[0] = 0; do{ node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0'); *w++ = c; c = advance(); }while('0'<=c && c<='9'); } if(c != ']' || (node->type == Tdup && (node->redir.type = Rhere || node->redir.type == Rappend))) goto badredir; *w++ = ']'; } *w++ = 0; yylval.tree = node; return node->type; case '\'': lexer.hadword = 1; lexer.inquote = 1; lexer.haddollar = 0; for(;;){ c = advance(); if(c==EOF) break; if(c=='\''){ if(peekc()!='\'') break; advance(); } w = putrune(w, c); } if(w) *w = 0; node = token(Tword, lexer.buf); node->quoted = 1; return node->type; default: ; } if(!iswordchar(c)){ put1(c); lexer.haddollar = 0; return c; } for(;;){ switch(c){ /* inject a magic glob character into our stream */ case '*': case '[': case '?': case (int)GLOB: w = putbyte(w, GLOB); /* fallthrough */ default: w = putrune(w, c); c = peekc(); } if(lexer.haddollar ? !isidentchar(c) : !iswordchar(c)) break; advance(); } lexer.hadword = 1; lexer.haddollar = 0; if(w) *w = 0; node = token(Tword, lexer.buf); if((c=iskeyword(lexer.buf))){ node->type = c; lexer.hadword = 0; } node->quoted = 0; yylval.tree = node; return node->type; }