aboutsummaryrefslogtreecommitdiff
path: root/src/cmd/rc/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/cmd/rc/lex.c')
-rw-r--r--src/cmd/rc/lex.c394
1 files changed, 394 insertions, 0 deletions
diff --git a/src/cmd/rc/lex.c b/src/cmd/rc/lex.c
new file mode 100644
index 0000000..9ca2453
--- /dev/null
+++ b/src/cmd/rc/lex.c
@@ -0,0 +1,394 @@
+#include "rc.h"
+#include "parse.h"
+
+static int advance(void);
+
+// -----------------------------------------------------------------------
+// lexer
+
+struct Lexer
+{
+ int c[2];
+ ushort doprompt;
+ ushort hadword;
+ ushort haddollar;
+ ushort inquote;
+ char buf[BUFSIZ];
+};
+
+static struct Lexer lexer = { .c={0, EOF}, .doprompt=1 };
+
+#define put1(b) lexer.buf[0] = (b), lexer.buf[1] = 0;
+#define put2(b0,b1) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = 0;
+#define put3(b0,b1,b2) lexer.buf[0] = (b0), lexer.buf[1] = (b1), lexer.buf[2] = b2, lexer.buf[3] = 0;
+
+void
+yyerror(const char *msg)
+{
+ print(shell.err, "rc:%d: ", runner->line);
+
+ if(lexer.buf[0] && lexer.buf[0]!='\n')
+ print(shell.err, "%q: ", lexer.buf);
+
+ print(shell.err, "%s\n", msg);
+ flush(shell.err);
+
+ lexer.hadword = 0;
+ lexer.haddollar = 0;
+
+ /* consume remaining tokens */
+ while(lexer.c[0] !='\n' && lexer.c[0] != EOF)
+ advance();
+}
+
+int
+readc(void)
+{
+ int c;
+ static int peek = EOF;
+
+ if(peek!=EOF){
+ c = peek;
+ peek = EOF;
+ return c;
+ }
+
+ if(runner->flag.eof)
+ return EOF;
+
+ if(!prompt(&lexer.doprompt))
+ exit(1); // XXX: hack for signal handling right now...
+
+ c = get(runner->cmd.io);
+ lexer.doprompt = lexer.doprompt || c=='\n' || c==EOF;
+
+ if(c==EOF)
+ runner->flag.eof = 1;
+
+ return c;
+}
+
+static
+int
+peekc(void)
+{
+ if(lexer.c[1] == EOF)
+ lexer.c[1] = readc();
+
+ return lexer.c[1];
+}
+
+static
+int
+advance(void)
+{
+ int c = peekc();
+ lexer.c[0] = lexer.c[1], lexer.c[1] = EOF;
+
+ return c;
+}
+
+static
+void
+skipws(void)
+{
+ int c;
+ for(;;){
+ c = peekc();
+ if(c== ' ' || c == '\t')
+ advance();
+ else
+ return;
+ }
+}
+
+static
+void
+skipnl(void)
+{
+ int c;
+ for(;;){
+ c = peekc();
+ if(c== ' ' || c == '\t' || c == '\n')
+ advance();
+ else
+ return;
+ }
+}
+
+static
+int
+nextis(int c)
+{
+ if(peekc()==c){
+ advance();
+ return 1;
+ }
+ return 0;
+}
+
+static
+char *
+putbyte(char *buf, int c)
+{
+ if(!buf)
+ return buf;
+
+ if(buf == arrend(lexer.buf)){
+ fatal("lexer: out of buffer space");
+ return nil;
+ }
+ *buf++ = c;
+ return buf;
+}
+
+static
+char *
+putrune(char *buf, int c)
+{
+ buf = putbyte(buf, c);
+ if(utf8·onebyte(c))
+ return buf;
+ if(utf8·twobyte(c))
+ return putbyte(buf,advance());
+ if(utf8·threebyte(c)){
+ buf = putbyte(buf,advance());
+ return putbyte(buf,advance());
+ }
+ if(utf8·fourbyte(c)){
+ buf = putbyte(buf,advance());
+ buf = putbyte(buf,advance());
+ return putbyte(buf,advance());
+ }
+ fatal("malformed utf8 stream");
+
+ return nil;
+}
+
+// -----------------------------------------------------------------------
+// exported functions
+
+// TODO: turn into static tables
+int
+iswordchar(int c)
+{
+ return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
+}
+
+int
+isidentchar(int c)
+{
+ return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
+}
+
+int
+yylex(void)
+{
+ int c, d = peekc();
+ Tree *node;
+ char *w = lexer.buf;
+
+ yylval.tree = nil;
+
+ /* inject tokens */
+ if(lexer.hadword){
+ lexer.hadword = 0;
+ if(d=='('){
+ advance();
+ strcpy(lexer.buf, "( [Tindex]");
+ return Tindex;
+ }
+ if(iswordchar(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
+ strcpy(lexer.buf, "^");
+ return '^';
+ }
+ }
+
+ lexer.inquote = 0;
+
+ skipws();
+ switch(c=advance()){
+ case EOF:
+ lexer.haddollar = 0;
+ put3('E','O','F');
+ return EOF;
+
+ case '$':
+ lexer.haddollar = 1;
+ if(nextis('#')){
+ put2('$','#');
+ return Tcount;
+ }
+ if(nextis('^')){
+ put2('$','^');
+ return Tjoin;
+ }
+ put1('$');
+ return '$';
+
+ case '@':
+ lexer.haddollar = 0;
+ put1('@');
+ return Tsubshell;
+
+ case '!':
+ lexer.haddollar = 0;
+ put1('!');
+ return Tbang;
+
+ case '&':
+ lexer.haddollar = 0;
+ if(nextis('&')){
+ put2('&','&');
+ return Tandand;
+ }
+ put1('&');
+ return '&';
+
+ case '|':
+ lexer.haddollar = 0;
+ if(nextis('|')){
+ put2('|','|');
+ return Toror;
+ }
+ node = maketree();
+ *w++ = '|';
+
+ node->type = Tpipe;
+ node->redir.fd[0] = 1;
+ node->redir.fd[1] = 0;
+ goto redir;
+
+ case '>':
+ lexer.haddollar = 0;
+ node = maketree();
+ *w++ = '>';
+ node->type = Tredir;
+
+ if(nextis('>')){
+ node->redir.type = Rappend;
+ *w++ = '>';
+ }else
+ node->redir.type = Rwrite;
+ node->redir.fd[0] = 1;
+ goto redir;
+
+ case '<':
+ lexer.haddollar = 0;
+ node = maketree();
+ *w++ = '<';
+ node->type = Tredir;
+
+ if(nextis('<')){
+ node->redir.type = Rhere;
+ *w++ = '<';
+ }else if(nextis('>')){
+ node->redir.type = Rrdwr;
+ *w++ = '>';
+ }else{
+ node->redir.type = Rread;
+ }
+ node->redir.fd[0] = 0;
+ /* fallthrough */
+ redir:
+ if(nextis('[')){
+ *w++='[';
+ c = advance();
+ *w++ = c;
+ if(c < '0' || '9' < c){
+ badredir:
+ *w = 0;
+ yyerror(node->type == Tpipe ? "pipe syntax" : "redirection syntax");
+ return EOF;
+ }
+ node->redir.fd[0] = 0;
+ do{
+ node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0');
+ *w++ = c;
+ c = advance();
+ }while('0'<=c && c<='9');
+
+ if(c == '='){
+ *w++ = '=';
+ if(node->type==Tredir)
+ node->type = Tdup;
+ c = advance();
+ }
+ if(c < '0' || '9' < c){
+ if(node->type == Tpipe)
+ goto badredir;
+ node->redir.type = Rclose;
+ }else{
+ node->redir.type = Rdupfd;
+ node->redir.fd[1] = node->redir.fd[0];
+ node->redir.fd[0] = 0;
+ do{
+ node->redir.fd[0] = 10*node->redir.fd[0]+(c-'0');
+ *w++ = c;
+ c = advance();
+ }while('0'<=c && c<='9');
+ }
+ if(c != ']' || (node->type == Tdup && (node->redir.type = Rhere || node->redir.type == Rappend)))
+ goto badredir;
+ *w++ = ']';
+ }
+ *w++ = 0;
+ yylval.tree = node;
+
+ return node->type;
+
+ case '\'':
+ lexer.hadword = 1;
+ lexer.inquote = 1;
+ lexer.haddollar = 0;
+ for(;;){
+ c = advance();
+ if(c==EOF)
+ break;
+
+ if(c=='\''){
+ if(peekc()!='\'')
+ break;
+ advance();
+ }
+ w = putrune(w, c);
+ }
+ if(w)
+ *w = 0;
+ node = token(Tword, lexer.buf);
+ node->quoted = 1;
+ return node->type;
+
+ default:
+ ;
+ }
+ if(!iswordchar(c)){
+ put1(c);
+ lexer.haddollar = 0;
+ return c;
+ }
+
+ for(;;){
+ w = putrune(w, c);
+ c = peekc();
+ if(lexer.haddollar ? !isidentchar(c) : !iswordchar(c))
+ break;
+ advance();
+ }
+
+ lexer.hadword = 1;
+ lexer.haddollar = 0;
+ if(w)
+ *w = 0;
+
+ node = token(Tword, lexer.buf);
+ if((c=iskeyword(lexer.buf))){
+ node->type = c;
+ lexer.hadword = 0;
+ }
+
+ node->quoted = 0;
+
+ yylval.tree = node;
+ return node->type;
+}