From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- src/cmd/rc/lex.c | 394 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 394 insertions(+) create mode 100644 src/cmd/rc/lex.c (limited to 'src/cmd/rc/lex.c') diff --git a/src/cmd/rc/lex.c b/src/cmd/rc/lex.c new file mode 100644 index 0000000..9ca2453 --- /dev/null +++ b/src/cmd/rc/lex.c @@ -0,0 +1,394 @@ +#include "rc.h" +#include "parse.h" + +static int advance(void); + +// ----------------------------------------------------------------------- +// lexer + +struct Lexer +{ + int c[2]; + ushort doprompt; + ushort hadword; + ushort haddollar; + ushort inquote; + char buf[BUFSIZ]; +}; + +static struct Lexer lexer = { .c={0, EOF}, .doprompt=1 }; + +#define put1(b) lexer.buf[0] = (b), lexer.buf[1] = 0; +#define put2(b0,b1) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = 0; +#define put3(b0,b1,b2) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = b2, lexer.buf[3] = 0; + +void +yyerror(const char *msg) +{ + print(shell.err, "rc:%d: ", runner->line); + + if(lexer.buf[0] && lexer.buf[0]!='\n') + print(shell.err, "%q: ", lexer.buf); + + print(shell.err, "%s\n", msg); + flush(shell.err); + + lexer.hadword = 0; + lexer.haddollar = 0; + + /* consume remaining tokens */ + while(lexer.c[0] !='\n' && lexer.c[0] != EOF) + advance(); +} + +int +readc(void) +{ + int c; + static int peek = EOF; + + if(peek!=EOF){ + c = peek; + peek = EOF; + return c; + } + + if(runner->flag.eof) + return EOF; + + if(!prompt(&lexer.doprompt)) + exit(1); // XXX: hack for signal handling right now... + + c = get(runner->cmd.io); + lexer.doprompt = lexer.doprompt || c=='\n' || c==EOF; + + if(c==EOF) + runner->flag.eof = 1; + + return c; +} + +static +int +peekc(void) +{ + if(lexer.c[1] == EOF) + lexer.c[1] = readc(); + + return lexer.c[1]; +} + +static +int +advance(void) +{ + int c = peekc(); + lexer.c[0] = lexer.c[1], lexer.c[1] = EOF; + + return c; +} + +static +void +skipws(void) +{ + int c; + for(;;){ + c = peekc(); + if(c== ' ' || c == '\t') + advance(); + else + return; + } +} + +static +void +skipnl(void) +{ + int c; + for(;;){ + c = peekc(); + if(c== ' ' || c == '\t' || c == '\n') + advance(); + else + return; + } +} + +static +int +nextis(int c) +{ + if(peekc()==c){ + advance(); + return 1; + } + return 0; +} + +static +char * +putbyte(char *buf, int c) +{ + if(!buf) + return buf; + + if(buf == arrend(lexer.buf)){ + fatal("lexer: out of buffer space"); + return nil; + } + *buf++ = c; + return buf; +} + +static +char * +putrune(char *buf, int c) +{ + buf = putbyte(buf, c); + if(utf8·onebyte(c)) + return buf; + if(utf8·twobyte(c)) + return putbyte(buf,advance()); + if(utf8·threebyte(c)){ + buf = putbyte(buf,advance()); + return putbyte(buf,advance()); + } + if(utf8·fourbyte(c)){ + buf = putbyte(buf,advance()); + buf = putbyte(buf,advance()); + return putbyte(buf,advance()); + } + fatal("malformed utf8 stream"); + + return nil; +} + +// ----------------------------------------------------------------------- +// exported functions + +// TODO: turn into static tables +int +iswordchar(int c) +{ + return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; +} + +int +isidentchar(int c) +{ + return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); +} + +int +yylex(void) +{ + int c, d = peekc(); + Tree *node; + char *w = lexer.buf; + + yylval.tree = nil; + + /* inject tokens */ + if(lexer.hadword){ + lexer.hadword = 0; + if(d=='('){ + advance(); + strcpy(lexer.buf, "( [Tindex]"); + return Tindex; + } + if(iswordchar(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ + strcpy(lexer.buf, "^"); + return '^'; + } + } + + lexer.inquote = 0; + + skipws(); + switch(c=advance()){ + case EOF: + lexer.haddollar = 0; + put3('E','O','F'); + return EOF; + + case '$': + lexer.haddollar = 1; + if(nextis('#')){ + put2('$','#'); + return Tcount; + } + if(nextis('^')){ + put2('$','^'); + return Tjoin; + } + put1('$'); + return '$'; + + case '@': + lexer.haddollar = 0; + put1('@'); + return Tsubshell; + + case '!': + lexer.haddollar = 0; + put1('!'); + return Tbang; + + case '&': + lexer.haddollar = 0; + if(nextis('&')){ + put2('&','&'); + return Tandand; + } + put1('&'); + return '&'; + + case '|': + lexer.haddollar = 0; + if(nextis('|')){ + put2('|','|'); + return Toror; + } + node = maketree(); + *w++ = '|'; + + node->type = Tpipe; + node->redir.fd[0] = 1; + node->redir.fd[1] = 0; + goto redir; + + case '>': + lexer.haddollar = 0; + node = maketree(); + *w++ = '>'; + node->type = Tredir; + + if(nextis('>')){ + node->redir.type = Rappend; + *w++ = '>'; + }else + node->redir.type = Rwrite; + node->redir.fd[0] = 1; + goto redir; + + case '<': + lexer.haddollar = 0; + node = maketree(); + *w++ = '<'; + node->type = Tredir; + + if(nextis('<')){ + node->redir.type = Rhere; + *w++ = '<'; + }else if(nextis('>')){ + node->redir.type = Rrdwr; + *w++ = '>'; + }else{ + node->redir.type = Rread; + } + node->redir.fd[0] = 0; + /* fallthrough */ + redir: + if(nextis('[')){ + *w++='['; + c = advance(); + *w++ = c; + if(c < '0' || '9' < c){ + badredir: + *w = 0; + yyerror(node->type == Tpipe ? "pipe syntax" : "redirection syntax"); + return EOF; + } + node->redir.fd[0] = 0; + do{ + node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0'); + *w++ = c; + c = advance(); + }while('0'<=c && c<='9'); + + if(c == '='){ + *w++ = '='; + if(node->type==Tredir) + node->type = Tdup; + c = advance(); + } + if(c < '0' || '9' < c){ + if(node->type == Tpipe) + goto badredir; + node->redir.type = Rclose; + }else{ + node->redir.type = Rdupfd; + node->redir.fd[1] = node->redir.fd[0]; + node->redir.fd[0] = 0; + do{ + node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0'); + *w++ = c; + c = advance(); + }while('0'<=c && c<='9'); + } + if(c != ']' || (node->type == Tdup && (node->redir.type = Rhere || node->redir.type == Rappend))) + goto badredir; + *w++ = ']'; + } + *w++ = 0; + yylval.tree = node; + + return node->type; + + case '\'': + lexer.hadword = 1; + lexer.inquote = 1; + lexer.haddollar = 0; + for(;;){ + c = advance(); + if(c==EOF) + break; + + if(c=='\''){ + if(peekc()!='\'') + break; + advance(); + } + w = putrune(w, c); + } + if(w) + *w = 0; + node = token(Tword, lexer.buf); + node->quoted = 1; + return node->type; + + default: + ; + } + if(!iswordchar(c)){ + put1(c); + lexer.haddollar = 0; + return c; + } + + for(;;){ + w = putrune(w, c); + c = peekc(); + if(lexer.haddollar ? !isidentchar(c) : !iswordchar(c)) + break; + advance(); + } + + lexer.hadword = 1; + lexer.haddollar = 0; + if(w) + *w = 0; + + node = token(Tword, lexer.buf); + if((c=iskeyword(lexer.buf))){ + node->type = c; + lexer.hadword = 0; + } + + node->quoted = 0; + + yylval.tree = node; + return node->type; +} -- cgit v1.2.1