#include "rc.h" #include "parse.h" static int advance(void); // ----------------------------------------------------------------------- // lexer struct Lexer { int c[2]; ushort doprompt; ushort hadword; ushort haddollar; ushort inquote; char buf[BUFSIZ]; }; static struct Lexer lexer = { .c={0, EOF}, .doprompt=1 }; #define put1(b) lexer.buf[0] = (b), lexer.buf[1] = 0; #define put2(b0,b1) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = 0; #define put3(b0,b1,b2) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = b2, lexer.buf[3] = 0; void yyerror(const char *msg) { print(shell.err, "rc:%d: ", runner->line); if(lexer.buf[0] && lexer.buf[0]!='\n') print(shell.err, "%q: ", lexer.buf); print(shell.err, "%s\n", msg); flush(shell.err); lexer.hadword = 0; lexer.haddollar = 0; /* consume remaining tokens */ while(lexer.c[0] !='\n' && lexer.c[0] != EOF) advance(); } int readc(void) { int c; static int peek = EOF; if(peek!=EOF){ c = peek; peek = EOF; return c; } if(runner->flag.eof) return EOF; if(!prompt(&lexer.doprompt)) exit(1); // XXX: hack for signal handling right now... c = get(runner->cmd.io); lexer.doprompt = lexer.doprompt || c=='\n' || c==EOF; if(c==EOF) runner->flag.eof = 1; return c; } static int peekc(void) { if(lexer.c[1] == EOF) lexer.c[1] = readc(); return lexer.c[1]; } static int advance(void) { int c = peekc(); lexer.c[0] = lexer.c[1], lexer.c[1] = EOF; return c; } static void skipws(void) { int c; for(;;){ c = peekc(); if(c== ' ' || c == '\t') advance(); else return; } } static void skipnl(void) { int c; for(;;){ c = peekc(); if(c== ' ' || c == '\t' || c == '\n') advance(); else return; } } static int nextis(int c) { if(peekc()==c){ advance(); return 1; } return 0; } static char * putbyte(char *buf, int c) { if(!buf) return buf; if(buf == arrend(lexer.buf)){ fatal("lexer: out of buffer space"); return nil; } *buf++ = c; return buf; } static char * putrune(char *buf, int c) { buf = putbyte(buf, c); if(utf8·onebyte(c)) return buf; if(utf8·twobyte(c)) return putbyte(buf,advance()); if(utf8·threebyte(c)){ buf = putbyte(buf,advance()); return putbyte(buf,advance()); } if(utf8·fourbyte(c)){ buf = putbyte(buf,advance()); buf = putbyte(buf,advance()); return putbyte(buf,advance()); } fatal("malformed utf8 stream"); return nil; } // ----------------------------------------------------------------------- // exported functions // TODO: turn into static tables int iswordchar(int c) { return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; } int isidentchar(int c) { return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); } int yylex(void) { int c, d = peekc(); Tree *node; char *w = lexer.buf; yylval.tree = nil; /* inject tokens */ if(lexer.hadword){ lexer.hadword = 0; if(d=='('){ advance(); strcpy(lexer.buf, "( [Tindex]"); return Tindex; } if(iswordchar(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ strcpy(lexer.buf, "^"); return '^'; } } lexer.inquote = 0; skipws(); switch(c=advance()){ case EOF: lexer.haddollar = 0; put3('E','O','F'); return EOF; case '$': lexer.haddollar = 1; if(nextis('#')){ put2('$','#'); return Tcount; } if(nextis('^')){ put2('$','^'); return Tjoin; } put1('$'); return '$'; case '@': lexer.haddollar = 0; put1('@'); return Tsubshell; case '!': lexer.haddollar = 0; put1('!'); return Tbang; case '&': lexer.haddollar = 0; if(nextis('&')){ put2('&','&'); return Tandand; } put1('&'); return '&'; case '|': lexer.haddollar = 0; if(nextis('|')){ put2('|','|'); return Toror; } node = maketree(); *w++ = '|'; node->type = Tpipe; node->redir.fd[0] = 1; node->redir.fd[1] = 0; goto redir; case '>': lexer.haddollar = 0; node = maketree(); *w++ = '>'; node->type = Tredir; if(nextis('>')){ node->redir.type = Rappend; *w++ = '>'; }else node->redir.type = Rwrite; node->redir.fd[0] = 1; goto redir; case '<': lexer.haddollar = 0; node = maketree(); *w++ = '<'; node->type = Tredir; if(nextis('<')){ node->redir.type = Rhere; *w++ = '<'; }else if(nextis('>')){ node->redir.type = Rrdwr; *w++ = '>'; }else{ node->redir.type = Rread; } node->redir.fd[0] = 0; /* fallthrough */ redir: if(nextis('[')){ *w++='['; c = advance(); *w++ = c; if(c < '0' || '9' < c){ badredir: *w = 0; yyerror(node->type == Tpipe ? "pipe syntax" : "redirection syntax"); return EOF; } node->redir.fd[0] = 0; do{ node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0'); *w++ = c; c = advance(); }while('0'<=c && c<='9'); if(c == '='){ *w++ = '='; if(node->type==Tredir) node->type = Tdup; c = advance(); } if(c < '0' || '9' < c){ if(node->type == Tpipe) goto badredir; node->redir.type = Rclose; }else{ node->redir.type = Rdupfd; node->redir.fd[1] = node->redir.fd[0]; node->redir.fd[0] = 0; do{ node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0'); *w++ = c; c = advance(); }while('0'<=c && c<='9'); } if(c != ']' || (node->type == Tdup && (node->redir.type = Rhere || node->redir.type == Rappend))) goto badredir; *w++ = ']'; } *w++ = 0; yylval.tree = node; return node->type; case '\'': lexer.hadword = 1; lexer.inquote = 1; lexer.haddollar = 0; for(;;){ c = advance(); if(c==EOF) break; if(c=='\''){ if(peekc()!='\'') break; advance(); } w = putrune(w, c); } if(w) *w = 0; node = token(Tword, lexer.buf); node->quoted = 1; return node->type; default: ; } if(!iswordchar(c)){ put1(c); lexer.haddollar = 0; return c; } for(;;){ w = putrune(w, c); c = peekc(); if(lexer.haddollar ? !isidentchar(c) : !iswordchar(c)) break; advance(); } lexer.hadword = 1; lexer.haddollar = 0; if(w) *w = 0; node = token(Tword, lexer.buf); if((c=iskeyword(lexer.buf))){ node->type = c; lexer.hadword = 0; } node->quoted = 0; yylval.tree = node; return node->type; }