From 6f2cac23a7e759c464ef52103fac929e1eeb6b10 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Sat, 9 Oct 2021 10:30:50 -0700 Subject: feat(rc): added shell --- sys/cmd/rc/lex.c | 417 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 417 insertions(+) create mode 100644 sys/cmd/rc/lex.c (limited to 'sys/cmd/rc/lex.c') diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c new file mode 100644 index 0000000..f6e2b4e --- /dev/null +++ b/sys/cmd/rc/lex.c @@ -0,0 +1,417 @@ +#include "rc.h" + +#define onebyte(c) ((c&0x80)==0x00) +#define twobyte(c) ((c&0xe0)==0xc0) +#define threebyte(c) ((c&0xf0)==0xe0) +#define fourbyte(c) ((c&0xf8)==0xf0) + +// ----------------------------------------------------------------------- +// globals + +static int lastc, nextc=EOF, lastdol, lastword, doprompt = 1; +static char buf[8*1024]; + +// ----------------------------------------------------------------------- +// utilities + +static uchar nwordc[256] = +{ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int +wordchr(int c) +{ + return !nwordc[c] && c!=EOF; +} + + +static uchar nquotec[256] = +{ + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +int +quotechr(char c) +{ + return !nquotec[c] && c!=EOF; +} + +static uchar nvarc[256] = +{ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + + +int +varchr(char c) +{ + return !nvarc[c] && c!=EOF; +} + +static +void +prompt(void) +{ + shell->cmd.line++; + doprompt = 0; +} + +/* lookahead one byte */ +static +int +lookahead(void) +{ + int c; + + if(nextc != EOF) + return nextc; + if(shell->cmd.eof) + return EOF; + + if(doprompt) + prompt(); + + c = rchr(shell->cmd.io); + doprompt = c == '\n' || c == EOF; + + if(c == EOF) + shell->cmd.eof++; + + return nextc = c; +} + +/* consumes the lookahead */ +static +int +advance(void) +{ + int c = lookahead(); + lastc = nextc, nextc = EOF; + + return c; +} + +/* + * advance until we no longer hit horizontal space + * consumes all comments + */ +static +void +skipws(void) +{ + int c; + for(;;) { + c = lookahead(); + if(c=='#'){ + for(;;){ + c = lookahead(); + if(c=='\n' || c==EOF) + break; + advance(); + } + } + if(c==' ' || c=='\t') + advance(); + else + return; + } +} + +/* advance until we no longer hit any space */ +void +skipnl(void) +{ + int c; + for(;;) { + skipws(); + if ((c = lookahead()) != '\n') + return; + advance(); + } +} + +/* advance if next char is equal to c */ +static +int +nextis(int c) +{ + if(lookahead()==c) { + advance(); + return 1; + } + return 0; +} + +/* functions to append to our write buffer */ +static +char* +putbyte(char *s, int c) +{ + if(!s) + return s; + if(s == arrend(buf)){ + *s = 0; + rcerror("out of buffer space"); + return nil; + } + *s++ = c; + return s; +} + +static +char* +putrune(char *s, int c) +{ + s = putbyte(s, c); + if (onebyte(c)) + return s; + if (twobyte(c)) + return putbyte(s, advance()); + if (threebyte(c)) { + putbyte(s, advance()); + return putbyte(s, advance()); + } + if (fourbyte(c)) { + putbyte(s, advance()); + putbyte(s, advance()); + return putbyte(s, advance()); + } + rcerror("malformed utf8 stream"); + return nil; +} + +// ----------------------------------------------------------------------- +// main exports + +void +rcerror(char *fmt, ...) +{ + va_list args; + + pfmt(errio, "rc:"); + if (shell->cmd.io) + pfmt(errio, "%s:%d ", shell->cmd.name, shell->cmd.line); + + va_start(args, fmt); + vpfmt(errio, fmt, args); + va_end(args); + + pfmt(errio, "\n"); + + flush(&errio); + lastword = lastdol = 0; + while (lastc != '\n' && lastc != EOF) + advance(); + /* for debugging only */ + abort(); +} + +/* word is only modified in the event of a lexed word */ +int +lex(Tree **node) +{ + int c; + char *w = buf; + /* + * NOTE: + * we inject tokens into the lexer based on context if last token = word: + * if we see a (, then we interpret that as a subscript + * otherwise, if the next character is the first char of a word, we return a ^ operator. + */ + if(lastword){ + lastword=0; + c = lookahead(); + if(c=='('){ + advance(); + return Tlparen; + } + if(quotechr(c)) + return Tcarot; + } + + skipws(); + switch(c=advance()) { + case EOF: + lastdol = 0; + return EOF; + case '$': + lastdol = 1; + if(nextis('#')) + return Tcount; + if (nextis('"')) + return Tquote; + return Tdol; + case '&': + lastdol = 0; + if(nextis('&')) + return Tandand; + return Tand; + + case '!': + return Tbang; + case '@': + return Tsubshell; + case '~': + return Ttwiddle; + + case '|': + lastdol = 0; + if(nextis('|')){ + skipnl(); + return Toror; + } + (*node) = newtree(); + (*node)->type = Tpipe; + (*node)->redir.fd[0] = 0; + (*node)->redir.fd[1] = 1; + goto redir; + case '>': + (*node) = newtree(); + (*node)->type = Tredir; + if (nextis(c)) + (*node)->redir.type = Rappend; + else + (*node)->redir.type = Rwrite; + (*node)->redir.fd[0] = 1; + goto redir; + case '<': + (*node) = newtree(); + (*node)->type = Tredir; + if(nextis(c)) + (*node)->redir.type = Rhere; + else if(nextis('>')) + (*node)->redir.type = Rrdwr; + else + (*node)->redir.type = Rread; + (*node)->redir.fd[0] = 0; + /* fallthrough */ + redir: + if(nextis('[')) { + c = advance(); + if(c < '0' || '9' < c) { + redirerr: + rcerror("incorrect redirection syntax"); + return EOF; + } + (*node)->redir.fd[0] = 0; + do { + (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0'); + c = advance(); + } while('0'<=c && c<='9'); + + if(c == '=') { + if((*node)->type == Tredir) + (*node)->type = Tdup; + c = advance(); + if('0'<=c && c<='9') { + (*node)->redir.type = Rdupfd; + (*node)->redir.fd[1] = (*node)->redir.fd[0]; + (*node)->redir.fd[0] = 0; + do { + (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0'); + c = advance(); + } while('0'<=c && c<='9'); + } else { + if((*node)->type == Tpipe) + goto redirerr; + (*node)->redir.type = Rclose; + } + } + if (c != ']' + ||(*node)->type==Tdup && ((*node)->redir.type==Rhere || (*node)->redir.type==Rappend)) + goto redirerr; + } + if ((c = ((*node)->type)) == Tpipe) + skipnl(); + return c; + + case '\'': + lastdol = 0; + lastword = 1; + for(;;){ + c = advance(); + if(c==EOF) + break; + if(c=='\''){ + if(lookahead()!='\'') + break; + advance(); + } + w = putrune(w, c); + } + *w = 0; + *node = wordnode(buf); + (*node)->quoted = 1; + return Tword; + } + if (!wordchr(c)) { + lastdol = 0; + return c; + } + for(;;){ + if(c=='*'||c=='['||c=='?'||c==GLOB) + w = putbyte(w, GLOB); + w = putrune(w, c); + c = lookahead(); + if(lastdol?!varchr(c):!wordchr(c)) + break; + advance(); + } + *w = 0; + + if ((c = kwlookup(buf)) == -1) { + (*node) = wordnode(buf); + (*node)->type = c = Tword; + (*node)->quoted = 0; + lastword = 1; + } + + lastdol = 0; + return c; +} -- cgit v1.2.1