aboutsummaryrefslogtreecommitdiff
path: root/sys/cmd/rc/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cmd/rc/lex.c')
-rw-r--r--sys/cmd/rc/lex.c417
1 files changed, 417 insertions, 0 deletions
diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c
new file mode 100644
index 0000000..f6e2b4e
--- /dev/null
+++ b/sys/cmd/rc/lex.c
@@ -0,0 +1,417 @@
+#include "rc.h"
+
+#define onebyte(c) ((c&0x80)==0x00)
+#define twobyte(c) ((c&0xe0)==0xc0)
+#define threebyte(c) ((c&0xf0)==0xe0)
+#define fourbyte(c) ((c&0xf8)==0xf0)
+
+// -----------------------------------------------------------------------
+// globals
+
+static int lastc, nextc=EOF, lastdol, lastword, doprompt = 1;
+static char buf[8*1024];
+
+// -----------------------------------------------------------------------
+// utilities
+
+static uchar nwordc[256] =
+{
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+int
+wordchr(int c)
+{
+ return !nwordc[c] && c!=EOF;
+}
+
+
+static uchar nquotec[256] =
+{
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+int
+quotechr(char c)
+{
+ return !nquotec[c] && c!=EOF;
+}
+
+static uchar nvarc[256] =
+{
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+
+int
+varchr(char c)
+{
+ return !nvarc[c] && c!=EOF;
+}
+
+static
+void
+prompt(void)
+{
+ shell->cmd.line++;
+ doprompt = 0;
+}
+
+/* lookahead one byte */
+static
+int
+lookahead(void)
+{
+ int c;
+
+ if(nextc != EOF)
+ return nextc;
+ if(shell->cmd.eof)
+ return EOF;
+
+ if(doprompt)
+ prompt();
+
+ c = rchr(shell->cmd.io);
+ doprompt = c == '\n' || c == EOF;
+
+ if(c == EOF)
+ shell->cmd.eof++;
+
+ return nextc = c;
+}
+
+/* consumes the lookahead */
+static
+int
+advance(void)
+{
+ int c = lookahead();
+ lastc = nextc, nextc = EOF;
+
+ return c;
+}
+
+/*
+ * advance until we no longer hit horizontal space
+ * consumes all comments
+ */
+static
+void
+skipws(void)
+{
+ int c;
+ for(;;) {
+ c = lookahead();
+ if(c=='#'){
+ for(;;){
+ c = lookahead();
+ if(c=='\n' || c==EOF)
+ break;
+ advance();
+ }
+ }
+ if(c==' ' || c=='\t')
+ advance();
+ else
+ return;
+ }
+}
+
+/* advance until we no longer hit any space */
+void
+skipnl(void)
+{
+ int c;
+ for(;;) {
+ skipws();
+ if ((c = lookahead()) != '\n')
+ return;
+ advance();
+ }
+}
+
+/* advance if next char is equal to c */
+static
+int
+nextis(int c)
+{
+ if(lookahead()==c) {
+ advance();
+ return 1;
+ }
+ return 0;
+}
+
+/* functions to append to our write buffer */
+static
+char*
+putbyte(char *s, int c)
+{
+ if(!s)
+ return s;
+ if(s == arrend(buf)){
+ *s = 0;
+ rcerror("out of buffer space");
+ return nil;
+ }
+ *s++ = c;
+ return s;
+}
+
+static
+char*
+putrune(char *s, int c)
+{
+ s = putbyte(s, c);
+ if (onebyte(c))
+ return s;
+ if (twobyte(c))
+ return putbyte(s, advance());
+ if (threebyte(c)) {
+ putbyte(s, advance());
+ return putbyte(s, advance());
+ }
+ if (fourbyte(c)) {
+ putbyte(s, advance());
+ putbyte(s, advance());
+ return putbyte(s, advance());
+ }
+ rcerror("malformed utf8 stream");
+ return nil;
+}
+
+// -----------------------------------------------------------------------
+// main exports
+
+void
+rcerror(char *fmt, ...)
+{
+ va_list args;
+
+ pfmt(errio, "rc:");
+ if (shell->cmd.io)
+ pfmt(errio, "%s:%d ", shell->cmd.name, shell->cmd.line);
+
+ va_start(args, fmt);
+ vpfmt(errio, fmt, args);
+ va_end(args);
+
+ pfmt(errio, "\n");
+
+ flush(&errio);
+ lastword = lastdol = 0;
+ while (lastc != '\n' && lastc != EOF)
+ advance();
+ /* for debugging only */
+ abort();
+}
+
+/* word is only modified in the event of a lexed word */
+int
+lex(Tree **node)
+{
+ int c;
+ char *w = buf;
+ /*
+ * NOTE:
+ * we inject tokens into the lexer based on context if last token = word:
+ * if we see a (, then we interpret that as a subscript
+ * otherwise, if the next character is the first char of a word, we return a ^ operator.
+ */
+ if(lastword){
+ lastword=0;
+ c = lookahead();
+ if(c=='('){
+ advance();
+ return Tlparen;
+ }
+ if(quotechr(c))
+ return Tcarot;
+ }
+
+ skipws();
+ switch(c=advance()) {
+ case EOF:
+ lastdol = 0;
+ return EOF;
+ case '$':
+ lastdol = 1;
+ if(nextis('#'))
+ return Tcount;
+ if (nextis('"'))
+ return Tquote;
+ return Tdol;
+ case '&':
+ lastdol = 0;
+ if(nextis('&'))
+ return Tandand;
+ return Tand;
+
+ case '!':
+ return Tbang;
+ case '@':
+ return Tsubshell;
+ case '~':
+ return Ttwiddle;
+
+ case '|':
+ lastdol = 0;
+ if(nextis('|')){
+ skipnl();
+ return Toror;
+ }
+ (*node) = newtree();
+ (*node)->type = Tpipe;
+ (*node)->redir.fd[0] = 0;
+ (*node)->redir.fd[1] = 1;
+ goto redir;
+ case '>':
+ (*node) = newtree();
+ (*node)->type = Tredir;
+ if (nextis(c))
+ (*node)->redir.type = Rappend;
+ else
+ (*node)->redir.type = Rwrite;
+ (*node)->redir.fd[0] = 1;
+ goto redir;
+ case '<':
+ (*node) = newtree();
+ (*node)->type = Tredir;
+ if(nextis(c))
+ (*node)->redir.type = Rhere;
+ else if(nextis('>'))
+ (*node)->redir.type = Rrdwr;
+ else
+ (*node)->redir.type = Rread;
+ (*node)->redir.fd[0] = 0;
+ /* fallthrough */
+ redir:
+ if(nextis('[')) {
+ c = advance();
+ if(c < '0' || '9' < c) {
+ redirerr:
+ rcerror("incorrect redirection syntax");
+ return EOF;
+ }
+ (*node)->redir.fd[0] = 0;
+ do {
+ (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0');
+ c = advance();
+ } while('0'<=c && c<='9');
+
+ if(c == '=') {
+ if((*node)->type == Tredir)
+ (*node)->type = Tdup;
+ c = advance();
+ if('0'<=c && c<='9') {
+ (*node)->redir.type = Rdupfd;
+ (*node)->redir.fd[1] = (*node)->redir.fd[0];
+ (*node)->redir.fd[0] = 0;
+ do {
+ (*node)->redir.fd[0] = 10*(*node)->redir.fd[0]+(c-'0');
+ c = advance();
+ } while('0'<=c && c<='9');
+ } else {
+ if((*node)->type == Tpipe)
+ goto redirerr;
+ (*node)->redir.type = Rclose;
+ }
+ }
+ if (c != ']'
+ ||(*node)->type==Tdup && ((*node)->redir.type==Rhere || (*node)->redir.type==Rappend))
+ goto redirerr;
+ }
+ if ((c = ((*node)->type)) == Tpipe)
+ skipnl();
+ return c;
+
+ case '\'':
+ lastdol = 0;
+ lastword = 1;
+ for(;;){
+ c = advance();
+ if(c==EOF)
+ break;
+ if(c=='\''){
+ if(lookahead()!='\'')
+ break;
+ advance();
+ }
+ w = putrune(w, c);
+ }
+ *w = 0;
+ *node = wordnode(buf);
+ (*node)->quoted = 1;
+ return Tword;
+ }
+ if (!wordchr(c)) {
+ lastdol = 0;
+ return c;
+ }
+ for(;;){
+ if(c=='*'||c=='['||c=='?'||c==GLOB)
+ w = putbyte(w, GLOB);
+ w = putrune(w, c);
+ c = lookahead();
+ if(lastdol?!varchr(c):!wordchr(c))
+ break;
+ advance();
+ }
+ *w = 0;
+
+ if ((c = kwlookup(buf)) == -1) {
+ (*node) = wordnode(buf);
+ (*node)->type = c = Tword;
+ (*node)->quoted = 0;
+ lastword = 1;
+ }
+
+ lastdol = 0;
+ return c;
+}