From d1a19f0d477a6249d8af9322317b8434b86260ea Mon Sep 17 00:00:00 2001
From: Nicholas Noll <nbnoll@eml.cc>
Date: Wed, 13 Oct 2021 08:27:37 -0700
Subject: fix(email): bytes error. updated vendoring

---
 sys/cmd/rc/lex.c | 688 ++++++++++++++++++++++++++-----------------------------
 1 file changed, 321 insertions(+), 367 deletions(-)

(limited to 'sys/cmd/rc/lex.c')

diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c
index f6e2b4e..253d05a 100644
--- a/sys/cmd/rc/lex.c
+++ b/sys/cmd/rc/lex.c
@@ -1,417 +1,371 @@
 #include "rc.h"
+#include "exec.h"
+#include "io.h"
+#include "getflags.h"
+#include "fns.h"
 
-#define	onebyte(c)	     ((c&0x80)==0x00)
-#define	twobyte(c)	     ((c&0xe0)==0xc0)
-#define	threebyte(c)	 ((c&0xf0)==0xe0)
-#define	fourbyte(c)	     ((c&0xf8)==0xf0)
+char *promptstr;
+int doprompt;
+char tok[NTOK];
 
-// -----------------------------------------------------------------------
-// globals
-
-static int lastc, nextc=EOF, lastdol, lastword, doprompt = 1;
-static char buf[8*1024];
-
-// -----------------------------------------------------------------------
-// utilities
-
-static uchar nwordc[256] =
-{
-	1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
-	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
+int getnext(void);
 
 int
 wordchr(int c)
 {
-	return !nwordc[c] && c!=EOF;
+	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
 }
 
-
-static uchar nquotec[256] =
-{
-	1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
 int
-quotechr(char c)
+idchr(int c)
 {
-	return !nquotec[c] && c!=EOF;
+	/*
+	 * Formerly:
+	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
+	 *	|| c=='_' || c=='*';
+	 */
+	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
 }
-
-static uchar nvarc[256] =
-{
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
-	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
-	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
-
+int future = EOF;
+int doprompt = 1;
+int inquote;
+int incomm;
+/*
+ * Look ahead in the input stream
+ */
 
 int
-varchr(char c)
+nextc(void)
 {
-    return !nvarc[c] && c!=EOF;
+	if(future==EOF)
+		future = getnext();
+	return future;
 }
+/*
+ * Consume the lookahead character.
+ */
 
-static
-void
-prompt(void)
-{
-    shell->cmd.line++;
-    doprompt = 0;
-}
-
-/* lookahead one byte */
-static
 int
-lookahead(void)
+advance(void)
 {
-    int c;
-
-    if(nextc != EOF)
-        return nextc;
-    if(shell->cmd.eof)
-        return EOF;
-
-    if(doprompt)
-        prompt();
-
-    c = rchr(shell->cmd.io);
-    doprompt = c == '\n' || c == EOF;
-
-    if(c == EOF)
-        shell->cmd.eof++;
-
-    return nextc = c;
+	int c = nextc();
+	lastc = future;
+	future = EOF;
+	return c;
 }
+/*
+ * read a character from the input stream
+ */	
 
-/* consumes the lookahead */
-static
 int
-advance(void)
+getnext(void)
 {
-    int c = lookahead();
-    lastc = nextc, nextc = EOF;
-
-    return c;
+	int c;
+	static int peekc = EOF;
+	if(peekc!=EOF){
+		c = peekc;
+		peekc = EOF;
+		return c;
+	}
+	if(runq->eof)
+		return EOF;
+	if(doprompt)
+		pprompt();
+	c = rchr(runq->cmdfd);
+	if(!inquote && c=='\\'){
+		c = rchr(runq->cmdfd);
+		if(c=='\n' && !incomm){		/* don't continue a comment */
+			doprompt = 1;
+			c=' ';
+		}
+		else{
+			peekc = c;
+			c='\\';
+		}
+	}
+	doprompt = doprompt || c=='\n' || c==EOF;
+	if(c==EOF)
+		runq->eof++;
+	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
+	return c;
 }
 
-/* 
- * advance until we no longer hit horizontal space 
- * consumes all comments
- */ 
-static
 void
-skipws(void)
+skipwhite(void)
 {
-    int c;
-    for(;;) {
-        c = lookahead();
-        if(c=='#'){
-            for(;;){
-                c = lookahead();
-                if(c=='\n' || c==EOF)
-                    break;
-                advance();
-            }
-        }
-        if(c==' ' || c=='\t')
-            advance();
-        else 
-            return;
-    }
+	int c;
+	for(;;){
+		c = nextc();
+		/* Why did this used to be  if(!inquote && c=='#') ?? */
+		if(c=='#'){
+			incomm = 1;
+			for(;;){
+				c = nextc();
+				if(c=='\n' || c==EOF) {
+					incomm = 0;
+					break;
+				}
+				advance();
+			}
+		}
+		if(c==' ' || c=='\t')
+			advance();
+		else return;
+	}
 }
 
-/* advance until we no longer hit any space */
 void
 skipnl(void)
 {
-    int c;
-    for(;;) {
-        skipws();
-        if ((c = lookahead()) != '\n')
-            return;
-        advance();
-    }
+	int c;
+	for(;;){
+		skipwhite();
+		c = nextc();
+		if(c!='\n')
+			return;
+		advance();
+	}
 }
 
-/* advance if next char is equal to c */
-static
 int
 nextis(int c)
 {
-    if(lookahead()==c) {
-        advance();
-        return 1;
-    }
-    return 0;
+	if(nextc()==c){
+		advance();
+		return 1;
+	}
+	return 0;
 }
 
-/* functions to append to our write buffer */
-static
 char*
-putbyte(char *s, int c)
+addtok(char *p, int val)
 {
-    if(!s)
-        return s;
-    if(s == arrend(buf)){
-        *s = 0;
-        rcerror("out of buffer space");
-        return nil;
-    }
-    *s++ = c;
-    return s;
+	if(p==0)
+		return 0;
+	if(p==&tok[NTOK-1]){
+		*p = 0;
+		yyerror("token buffer too short");
+		return 0;
+	}
+	*p++=val;
+	return p;
 }
 
-static
 char*
-putrune(char *s, int c)
+addutf(char *p, int c)
 {
-    s = putbyte(s, c);
-    if (onebyte(c))
-        return s;
-    if (twobyte(c))
-        return putbyte(s, advance());
-    if (threebyte(c)) {
-        putbyte(s, advance());
-        return putbyte(s, advance());
-    }
-    if (fourbyte(c)) {
-        putbyte(s, advance());
-        putbyte(s, advance());
-        return putbyte(s, advance());
-    }
-    rcerror("malformed utf8 stream");
-    return nil;
+	p = addtok(p, c);
+	if(twobyte(c))	 /* 2-byte escape */
+		return addtok(p, advance());
+	if(threebyte(c)){	/* 3-byte escape */
+		p = addtok(p, advance());
+		return addtok(p, advance());
+	}
+	if(fourbyte(c)){	/* 4-byte escape */
+		p = addtok(p, advance());
+		p = addtok(p, advance());
+		return addtok(p, advance());
+	}
+	return p;
 }
+int lastdol;	/* was the last token read '$' or '$#' or '"'? */
+int lastword;	/* was the last token read a word or compound word terminator? */
 
-// -----------------------------------------------------------------------
-// main exports
-
-void
-rcerror(char *fmt, ...)
-{
-    va_list args;
-
-    pfmt(errio, "rc:");
-    if (shell->cmd.io)
-        pfmt(errio, "%s:%d ", shell->cmd.name, shell->cmd.line);
-
-    va_start(args, fmt);
-    vpfmt(errio, fmt, args);
-    va_end(args);
-
-    pfmt(errio, "\n");
-
-    flush(&errio);
-    lastword = lastdol = 0;
-    while (lastc != '\n' && lastc != EOF)
-        advance();
-    /* for debugging only */
-    abort();
-}
-
-/* word is only modified in the event of a lexed word */
 int
-lex(Tree **node)
+yylex(void)
 {
-    int c;
-    char *w = buf;
-    /* 
-     * NOTE: 
-     * we inject tokens into the lexer based on context if last token = word:
-     * if we see a (, then we interpret that as a subscript
-     * otherwise, if the next character is the first char of a word, we return a ^ operator.
-     */ 
-    if(lastword){
-        lastword=0;
-        c = lookahead();
-        if(c=='('){
-            advance();
-            return Tlparen;
-        }
-        if(quotechr(c))
-            return Tcarot;
-    }
-
-    skipws();
-    switch(c=advance()) {
-    case EOF:
-        lastdol = 0;
-        return EOF;
-    case '$':
-        lastdol = 1;
-        if(nextis('#'))
-            return Tcount;
-        if (nextis('"'))
-            return Tquote;
-        return Tdol;
-    case '&':
-        lastdol = 0;
-        if(nextis('&'))
-            return Tandand;
-        return Tand;
-
-    case '!':
-        return Tbang;
-    case '@':
-        return Tsubshell;
-    case '~':
-        return Ttwiddle;
-
-    case '|':
-        lastdol = 0;
-        if(nextis('|')){
-            skipnl();
-            return Toror;
-        }
-        (*node) = newtree();
-        (*node)->type = Tpipe;
-        (*node)->redir.fd[0] = 0;
-        (*node)->redir.fd[1] = 1;
-        goto redir;
-    case '>':
-        (*node) = newtree();
-        (*node)->type = Tredir;
-        if (nextis(c))
-            (*node)->redir.type = Rappend;
-        else
-            (*node)->redir.type = Rwrite;
-        (*node)->redir.fd[0] = 1;
-        goto redir;
-    case '<':
-        (*node) = newtree();
-        (*node)->type = Tredir;
-        if(nextis(c))
-            (*node)->redir.type = Rhere;
-        else if(nextis('>'))
-            (*node)->redir.type = Rrdwr;
-        else
-            (*node)->redir.type = Rread;
-        (*node)->redir.fd[0] = 0;
-        /* fallthrough */
-    redir:
-        if(nextis('[')) {
-            c = advance();
-            if(c < '0' || '9' < c) {
-            redirerr:
-                rcerror("incorrect redirection syntax");
-                return EOF;
-            }
-            (*node)->redir.fd[0] = 0;
-            do {
-                (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0');
-                c = advance();
-            } while('0'<=c && c<='9');
-
-            if(c == '=') {
-                if((*node)->type == Tredir)
-                    (*node)->type = Tdup;
-                c = advance();
-                if('0'<=c && c<='9') {
-                    (*node)->redir.type  = Rdupfd;
-                    (*node)->redir.fd[1] = (*node)->redir.fd[0];
-                    (*node)->redir.fd[0] = 0;
-                    do {
-                        (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0');
-                        c = advance();
-                    } while('0'<=c && c<='9');
-                } else {
-                    if((*node)->type == Tpipe)
-                        goto redirerr;
-                    (*node)->redir.type = Rclose;
-                }
-            }
-            if (c != ']'
-            ||(*node)->type==Tdup && ((*node)->redir.type==Rhere || (*node)->redir.type==Rappend))
-                goto redirerr;
-        }
-        if ((c = ((*node)->type)) == Tpipe)
-            skipnl();
-        return c;
-
-    case '\'':
-        lastdol  = 0;
-        lastword = 1;
-        for(;;){
-            c = advance();
-            if(c==EOF)
-                break;
-            if(c=='\''){
-                if(lookahead()!='\'')
-                    break;
-                advance();
-            }
-            w = putrune(w, c);
-        }
-        *w = 0;
-        *node = wordnode(buf);
-        (*node)->quoted = 1;
-        return Tword;
-    }
-    if (!wordchr(c)) {
-        lastdol = 0;
-        return c;
-    }
-    for(;;){
-        if(c=='*'||c=='['||c=='?'||c==GLOB)
-            w = putbyte(w, GLOB);
-        w = putrune(w, c);
-        c = lookahead();
-        if(lastdol?!varchr(c):!wordchr(c))
-            break;
-        advance();
-    }
-    *w = 0;
-
-    if ((c = kwlookup(buf)) == -1) {
-        (*node) = wordnode(buf);
-        (*node)->type = c = Tword;
-        (*node)->quoted   = 0;
-        lastword = 1;
-    }
-
-    lastdol  = 0;
-    return c;
+	int c, d = nextc();
+	char *w = tok;
+	struct tree *t;
+	yylval.tree = 0;
+	/*
+	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
+	 * WORD then we alter the meaning of what follows.  If the next character
+	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
+	 * if the next character is the first character of a simple or compound word,
+	 * we insert a `^' before it.
+	 */
+	if(lastword){
+		lastword = 0;
+		if(d=='('){
+			advance();
+			strcpy(tok, "( [SUB]");
+			return SUB;
+		}
+		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
+			strcpy(tok, "^");
+			return '^';
+		}
+	}
+	inquote = 0;
+	skipwhite();
+	switch(c = advance()){
+	case EOF:
+		lastdol = 0;
+		strcpy(tok, "EOF");
+		return EOF;
+	case '$':
+		lastdol = 1;
+		if(nextis('#')){
+			strcpy(tok, "$#");
+			return COUNT;
+		}
+		if(nextis('"')){
+			strcpy(tok, "$\"");
+			return '"';
+		}
+		strcpy(tok, "$");
+		return '$';
+	case '&':
+		lastdol = 0;
+		if(nextis('&')){
+			skipnl();
+			strcpy(tok, "&&");
+			return ANDAND;
+		}
+		strcpy(tok, "&");
+		return '&';
+	case '|':
+		lastdol = 0;
+		if(nextis(c)){
+			skipnl();
+			strcpy(tok, "||");
+			return OROR;
+		}
+	case '<':
+	case '>':
+		lastdol = 0;
+		/*
+		 * funny redirection tokens:
+		 *	redir:	arrow | arrow '[' fd ']'
+		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
+		 *	fd:	digit | digit '=' | digit '=' digit
+		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
+		 * some possibilities are nonsensical and get a message.
+		 */
+		*w++=c;
+		t = newtree();
+		switch(c){
+		case '|':
+			t->type = PIPE;
+			t->fd0 = 1;
+			t->fd1 = 0;
+			break;
+		case '>':
+			t->type = REDIR;
+			if(nextis(c)){
+				t->rtype = APPEND;
+				*w++=c;
+			}
+			else t->rtype = WRITE;
+			t->fd0 = 1;
+			break;
+		case '<':
+			t->type = REDIR;
+			if(nextis(c)){
+				t->rtype = HERE;
+				*w++=c;
+			} else if (nextis('>')){
+				t->rtype = RDWR;
+				*w++=c;
+			} else t->rtype = READ;
+			t->fd0 = 0;
+			break;
+		}
+		if(nextis('[')){
+			*w++='[';
+			c = advance();
+			*w++=c;
+			if(c<'0' || '9'<c){
+			RedirErr:
+				*w = 0;
+				yyerror(t->type==PIPE?"pipe syntax"
+						:"redirection syntax");
+				return EOF;
+			}
+			t->fd0 = 0;
+			do{
+				t->fd0 = t->fd0*10+c-'0';
+				*w++=c;
+				c = advance();
+			}while('0'<=c && c<='9');
+			if(c=='='){
+				*w++='=';
+				if(t->type==REDIR)
+					t->type = DUP;
+				c = advance();
+				if('0'<=c && c<='9'){
+					t->rtype = DUPFD;
+					t->fd1 = t->fd0;
+					t->fd0 = 0;
+					do{
+						t->fd0 = t->fd0*10+c-'0';
+						*w++=c;
+						c = advance();
+					}while('0'<=c && c<='9');
+				}
+				else{
+					if(t->type==PIPE)
+						goto RedirErr;
+					t->rtype = CLOSE;
+				}
+			}
+			if(c!=']'
+			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
+				goto RedirErr;
+			*w++=']';
+		}
+		*w='\0';
+		yylval.tree = t;
+		if(t->type==PIPE)
+			skipnl();
+		return t->type;
+	case '\'':
+		lastdol = 0;
+		lastword = 1;
+		inquote = 1;
+		for(;;){
+			c = advance();
+			if(c==EOF)
+				break;
+			if(c=='\''){
+				if(nextc()!='\'')
+					break;
+				advance();
+			}
+			w = addutf(w, c);
+		}
+		if(w!=0)
+			*w='\0';
+		t = token(tok, WORD);
+		t->quoted = 1;
+		yylval.tree = t;
+		return t->type;
+	}
+	if(!wordchr(c)){
+		lastdol = 0;
+		tok[0] = c;
+		tok[1]='\0';
+		return c;
+	}
+	for(;;){
+		/* next line should have (char)c==GLOB, but ken's compiler is broken */
+		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
+			w = addtok(w, GLOB);
+		w = addutf(w, c);
+		c = nextc();
+		if(lastdol?!idchr(c):!wordchr(c)) break;
+		advance();
+	}
+
+	lastword = 1;
+	lastdol = 0;
+	if(w!=0)
+		*w='\0';
+	t = klook(tok);
+	if(t->type!=WORD)
+		lastword = 0;
+	t->quoted = 0;
+	yylval.tree = t;
+	return t->type;
 }
-- 
cgit v1.2.1