From d1a19f0d477a6249d8af9322317b8434b86260ea Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Wed, 13 Oct 2021 08:27:37 -0700 Subject: fix(email): bytes error. updated vendoring --- sys/cmd/rc/lex.c | 688 ++++++++++++++++++++++++++----------------------------- 1 file changed, 321 insertions(+), 367 deletions(-) (limited to 'sys/cmd/rc/lex.c') diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c index f6e2b4e..253d05a 100644 --- a/sys/cmd/rc/lex.c +++ b/sys/cmd/rc/lex.c @@ -1,417 +1,371 @@ #include "rc.h" +#include "exec.h" +#include "io.h" +#include "getflags.h" +#include "fns.h" -#define onebyte(c) ((c&0x80)==0x00) -#define twobyte(c) ((c&0xe0)==0xc0) -#define threebyte(c) ((c&0xf0)==0xe0) -#define fourbyte(c) ((c&0xf8)==0xf0) +char *promptstr; +int doprompt; +char tok[NTOK]; -// ----------------------------------------------------------------------- -// globals - -static int lastc, nextc=EOF, lastdol, lastword, doprompt = 1; -static char buf[8*1024]; - -// ----------------------------------------------------------------------- -// utilities - -static uchar nwordc[256] = -{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; +int getnext(void); int wordchr(int c) { - return !nwordc[c] && c!=EOF; + return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; } - -static uchar nquotec[256] = -{ - 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - int -quotechr(char c) +idchr(int c) { - return !nquotec[c] && c!=EOF; + /* + * Formerly: + * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9' + * || c=='_' || c=='*'; + */ + return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); } - -static uchar nvarc[256] = -{ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -}; - +int future = EOF; +int doprompt = 1; +int inquote; +int incomm; +/* + * Look ahead in the input stream + */ int -varchr(char c) +nextc(void) { - return !nvarc[c] && c!=EOF; + if(future==EOF) + future = getnext(); + return future; } +/* + * Consume the lookahead character. + */ -static -void -prompt(void) -{ - shell->cmd.line++; - doprompt = 0; -} - -/* lookahead one byte */ -static int -lookahead(void) +advance(void) { - int c; - - if(nextc != EOF) - return nextc; - if(shell->cmd.eof) - return EOF; - - if(doprompt) - prompt(); - - c = rchr(shell->cmd.io); - doprompt = c == '\n' || c == EOF; - - if(c == EOF) - shell->cmd.eof++; - - return nextc = c; + int c = nextc(); + lastc = future; + future = EOF; + return c; } +/* + * read a character from the input stream + */ -/* consumes the lookahead */ -static int -advance(void) +getnext(void) { - int c = lookahead(); - lastc = nextc, nextc = EOF; - - return c; + int c; + static int peekc = EOF; + if(peekc!=EOF){ + c = peekc; + peekc = EOF; + return c; + } + if(runq->eof) + return EOF; + if(doprompt) + pprompt(); + c = rchr(runq->cmdfd); + if(!inquote && c=='\\'){ + c = rchr(runq->cmdfd); + if(c=='\n' && !incomm){ /* don't continue a comment */ + doprompt = 1; + c=' '; + } + else{ + peekc = c; + c='\\'; + } + } + doprompt = doprompt || c=='\n' || c==EOF; + if(c==EOF) + runq->eof++; + else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); + return c; } -/* - * advance until we no longer hit horizontal space - * consumes all comments - */ -static void -skipws(void) +skipwhite(void) { - int c; - for(;;) { - c = lookahead(); - if(c=='#'){ - for(;;){ - c = lookahead(); - if(c=='\n' || c==EOF) - break; - advance(); - } - } - if(c==' ' || c=='\t') - advance(); - else - return; - } + int c; + for(;;){ + c = nextc(); + /* Why did this used to be if(!inquote && c=='#') ?? */ + if(c=='#'){ + incomm = 1; + for(;;){ + c = nextc(); + if(c=='\n' || c==EOF) { + incomm = 0; + break; + } + advance(); + } + } + if(c==' ' || c=='\t') + advance(); + else return; + } } -/* advance until we no longer hit any space */ void skipnl(void) { - int c; - for(;;) { - skipws(); - if ((c = lookahead()) != '\n') - return; - advance(); - } + int c; + for(;;){ + skipwhite(); + c = nextc(); + if(c!='\n') + return; + advance(); + } } -/* advance if next char is equal to c */ -static int nextis(int c) { - if(lookahead()==c) { - advance(); - return 1; - } - return 0; + if(nextc()==c){ + advance(); + return 1; + } + return 0; } -/* functions to append to our write buffer */ -static char* -putbyte(char *s, int c) +addtok(char *p, int val) { - if(!s) - return s; - if(s == arrend(buf)){ - *s = 0; - rcerror("out of buffer space"); - return nil; - } - *s++ = c; - return s; + if(p==0) + return 0; + if(p==&tok[NTOK-1]){ + *p = 0; + yyerror("token buffer too short"); + return 0; + } + *p++=val; + return p; } -static char* -putrune(char *s, int c) +addutf(char *p, int c) { - s = putbyte(s, c); - if (onebyte(c)) - return s; - if (twobyte(c)) - return putbyte(s, advance()); - if (threebyte(c)) { - putbyte(s, advance()); - return putbyte(s, advance()); - } - if (fourbyte(c)) { - putbyte(s, advance()); - putbyte(s, advance()); - return putbyte(s, advance()); - } - rcerror("malformed utf8 stream"); - return nil; + p = addtok(p, c); + if(twobyte(c)) /* 2-byte escape */ + return addtok(p, advance()); + if(threebyte(c)){ /* 3-byte escape */ + p = addtok(p, advance()); + return addtok(p, advance()); + } + if(fourbyte(c)){ /* 4-byte escape */ + p = addtok(p, advance()); + p = addtok(p, advance()); + return addtok(p, advance()); + } + return p; } +int lastdol; /* was the last token read '$' or '$#' or '"'? */ +int lastword; /* was the last token read a word or compound word terminator? */ -// ----------------------------------------------------------------------- -// main exports - -void -rcerror(char *fmt, ...) -{ - va_list args; - - pfmt(errio, "rc:"); - if (shell->cmd.io) - pfmt(errio, "%s:%d ", shell->cmd.name, shell->cmd.line); - - va_start(args, fmt); - vpfmt(errio, fmt, args); - va_end(args); - - pfmt(errio, "\n"); - - flush(&errio); - lastword = lastdol = 0; - while (lastc != '\n' && lastc != EOF) - advance(); - /* for debugging only */ - abort(); -} - -/* word is only modified in the event of a lexed word */ int -lex(Tree **node) +yylex(void) { - int c; - char *w = buf; - /* - * NOTE: - * we inject tokens into the lexer based on context if last token = word: - * if we see a (, then we interpret that as a subscript - * otherwise, if the next character is the first char of a word, we return a ^ operator. - */ - if(lastword){ - lastword=0; - c = lookahead(); - if(c=='('){ - advance(); - return Tlparen; - } - if(quotechr(c)) - return Tcarot; - } - - skipws(); - switch(c=advance()) { - case EOF: - lastdol = 0; - return EOF; - case '$': - lastdol = 1; - if(nextis('#')) - return Tcount; - if (nextis('"')) - return Tquote; - return Tdol; - case '&': - lastdol = 0; - if(nextis('&')) - return Tandand; - return Tand; - - case '!': - return Tbang; - case '@': - return Tsubshell; - case '~': - return Ttwiddle; - - case '|': - lastdol = 0; - if(nextis('|')){ - skipnl(); - return Toror; - } - (*node) = newtree(); - (*node)->type = Tpipe; - (*node)->redir.fd[0] = 0; - (*node)->redir.fd[1] = 1; - goto redir; - case '>': - (*node) = newtree(); - (*node)->type = Tredir; - if (nextis(c)) - (*node)->redir.type = Rappend; - else - (*node)->redir.type = Rwrite; - (*node)->redir.fd[0] = 1; - goto redir; - case '<': - (*node) = newtree(); - (*node)->type = Tredir; - if(nextis(c)) - (*node)->redir.type = Rhere; - else if(nextis('>')) - (*node)->redir.type = Rrdwr; - else - (*node)->redir.type = Rread; - (*node)->redir.fd[0] = 0; - /* fallthrough */ - redir: - if(nextis('[')) { - c = advance(); - if(c < '0' || '9' < c) { - redirerr: - rcerror("incorrect redirection syntax"); - return EOF; - } - (*node)->redir.fd[0] = 0; - do { - (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0'); - c = advance(); - } while('0'<=c && c<='9'); - - if(c == '=') { - if((*node)->type == Tredir) - (*node)->type = Tdup; - c = advance(); - if('0'<=c && c<='9') { - (*node)->redir.type = Rdupfd; - (*node)->redir.fd[1] = (*node)->redir.fd[0]; - (*node)->redir.fd[0] = 0; - do { - (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0'); - c = advance(); - } while('0'<=c && c<='9'); - } else { - if((*node)->type == Tpipe) - goto redirerr; - (*node)->redir.type = Rclose; - } - } - if (c != ']' - ||(*node)->type==Tdup && ((*node)->redir.type==Rhere || (*node)->redir.type==Rappend)) - goto redirerr; - } - if ((c = ((*node)->type)) == Tpipe) - skipnl(); - return c; - - case '\'': - lastdol = 0; - lastword = 1; - for(;;){ - c = advance(); - if(c==EOF) - break; - if(c=='\''){ - if(lookahead()!='\'') - break; - advance(); - } - w = putrune(w, c); - } - *w = 0; - *node = wordnode(buf); - (*node)->quoted = 1; - return Tword; - } - if (!wordchr(c)) { - lastdol = 0; - return c; - } - for(;;){ - if(c=='*'||c=='['||c=='?'||c==GLOB) - w = putbyte(w, GLOB); - w = putrune(w, c); - c = lookahead(); - if(lastdol?!varchr(c):!wordchr(c)) - break; - advance(); - } - *w = 0; - - if ((c = kwlookup(buf)) == -1) { - (*node) = wordnode(buf); - (*node)->type = c = Tword; - (*node)->quoted = 0; - lastword = 1; - } - - lastdol = 0; - return c; + int c, d = nextc(); + char *w = tok; + struct tree *t; + yylval.tree = 0; + /* + * Embarassing sneakiness: if the last token read was a quoted or unquoted + * WORD then we alter the meaning of what follows. If the next character + * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise, + * if the next character is the first character of a simple or compound word, + * we insert a `^' before it. + */ + if(lastword){ + lastword = 0; + if(d=='('){ + advance(); + strcpy(tok, "( [SUB]"); + return SUB; + } + if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ + strcpy(tok, "^"); + return '^'; + } + } + inquote = 0; + skipwhite(); + switch(c = advance()){ + case EOF: + lastdol = 0; + strcpy(tok, "EOF"); + return EOF; + case '$': + lastdol = 1; + if(nextis('#')){ + strcpy(tok, "$#"); + return COUNT; + } + if(nextis('"')){ + strcpy(tok, "$\""); + return '"'; + } + strcpy(tok, "$"); + return '$'; + case '&': + lastdol = 0; + if(nextis('&')){ + skipnl(); + strcpy(tok, "&&"); + return ANDAND; + } + strcpy(tok, "&"); + return '&'; + case '|': + lastdol = 0; + if(nextis(c)){ + skipnl(); + strcpy(tok, "||"); + return OROR; + } + case '<': + case '>': + lastdol = 0; + /* + * funny redirection tokens: + * redir: arrow | arrow '[' fd ']' + * arrow: '<' | '<<' | '>' | '>>' | '|' + * fd: digit | digit '=' | digit '=' digit + * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' + * some possibilities are nonsensical and get a message. + */ + *w++=c; + t = newtree(); + switch(c){ + case '|': + t->type = PIPE; + t->fd0 = 1; + t->fd1 = 0; + break; + case '>': + t->type = REDIR; + if(nextis(c)){ + t->rtype = APPEND; + *w++=c; + } + else t->rtype = WRITE; + t->fd0 = 1; + break; + case '<': + t->type = REDIR; + if(nextis(c)){ + t->rtype = HERE; + *w++=c; + } else if (nextis('>')){ + t->rtype = RDWR; + *w++=c; + } else t->rtype = READ; + t->fd0 = 0; + break; + } + if(nextis('[')){ + *w++='['; + c = advance(); + *w++=c; + if(c<'0' || '9'type==PIPE?"pipe syntax" + :"redirection syntax"); + return EOF; + } + t->fd0 = 0; + do{ + t->fd0 = t->fd0*10+c-'0'; + *w++=c; + c = advance(); + }while('0'<=c && c<='9'); + if(c=='='){ + *w++='='; + if(t->type==REDIR) + t->type = DUP; + c = advance(); + if('0'<=c && c<='9'){ + t->rtype = DUPFD; + t->fd1 = t->fd0; + t->fd0 = 0; + do{ + t->fd0 = t->fd0*10+c-'0'; + *w++=c; + c = advance(); + }while('0'<=c && c<='9'); + } + else{ + if(t->type==PIPE) + goto RedirErr; + t->rtype = CLOSE; + } + } + if(c!=']' + || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND)) + goto RedirErr; + *w++=']'; + } + *w='\0'; + yylval.tree = t; + if(t->type==PIPE) + skipnl(); + return t->type; + case '\'': + lastdol = 0; + lastword = 1; + inquote = 1; + for(;;){ + c = advance(); + if(c==EOF) + break; + if(c=='\''){ + if(nextc()!='\'') + break; + advance(); + } + w = addutf(w, c); + } + if(w!=0) + *w='\0'; + t = token(tok, WORD); + t->quoted = 1; + yylval.tree = t; + return t->type; + } + if(!wordchr(c)){ + lastdol = 0; + tok[0] = c; + tok[1]='\0'; + return c; + } + for(;;){ + /* next line should have (char)c==GLOB, but ken's compiler is broken */ + if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) + w = addtok(w, GLOB); + w = addutf(w, c); + c = nextc(); + if(lastdol?!idchr(c):!wordchr(c)) break; + advance(); + } + + lastword = 1; + lastdol = 0; + if(w!=0) + *w='\0'; + t = klook(tok); + if(t->type!=WORD) + lastword = 0; + t->quoted = 0; + yylval.tree = t; + return t->type; } -- cgit v1.2.1