From bf03074e346b004659196b6c17eee04dbffd3ac2 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Fri, 15 Oct 2021 16:18:02 -0700 Subject: feat(rc): working prototype of input->compile->print loop --- sys/cmd/rc/lex.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 sys/cmd/rc/lex.c (limited to 'sys/cmd/rc/lex.c') diff --git a/sys/cmd/rc/lex.c b/sys/cmd/rc/lex.c new file mode 100644 index 0000000..ec9e94d --- /dev/null +++ b/sys/cmd/rc/lex.c @@ -0,0 +1,207 @@ +#include "rc.h" +#include "parse.h" + +static int advance(void); +// ----------------------------------------------------------------------- +// lexer + +struct Lexer +{ + int c[2]; + ushort doprompt; + char buf[BUFSIZ]; +}; + +static struct Lexer lexer = { .c={0, EOF}, .doprompt=1 }; + +void +yyerror(char *msg) +{ + print(errio, "\nrc: "); + + if(lexer.buf[0] && lexer.buf[0]!='\n') + print(errio, "@ %q: ", lexer.buf); + + print(errio, "%s\n", msg); + flush(errio); + + while(lexer.c[0] !='\n' && lexer.c[0] != EOF) + advance(); +} + +int +readc(void) +{ + int c; + static int peek = EOF; + + if(peek!=EOF){ + c = peek; + peek = EOF; + return c; + } + if(shell->flag.eof) + return EOF; + + if(!prompt(&lexer.doprompt)) + exit(1); // XXX: hack for signal handling right now... + + c = get(shell->cmd.io); + + lexer.doprompt = lexer.doprompt || c=='\n' || c==EOF; + + if(c==EOF) + shell->flag.eof = 1; + + return c; +} + +static +int +peekc(void) +{ + if(lexer.c[1] == EOF) + lexer.c[1] = readc(); + + return lexer.c[1]; +} + +static +int +advance(void) +{ + int c = peekc(); + lexer.c[0] = lexer.c[1], lexer.c[1] = EOF; + + return c; +} + +static +void +skipws(void) +{ + int c; + for(;;){ + c = peekc(); + if(c== ' ' || c == '\t') + advance(); + else + return; + } +} + +static +void +skipnl(void) +{ + int c; + for(;;){ + c = peekc(); + if(c== ' ' || c == '\t' || c == '\n') + advance(); + else + return; + } +} + +static +int +nextis(int c) +{ + if(peekc()==c){ + advance(); + return 1; + } + return 0; +} + +static +char * +putbyte(char *buf, int c) +{ + if(!buf) + return buf; + + if(buf == arrend(lexer.buf)){ + fatal("lexer: out of buffer space"); + return nil; + } + *buf++ = c; + return buf; +} + +#define onebyte(c) ((c&0x80)==0x00) +#define twobyte(c) ((c&0xe0)==0xc0) +#define threebyte(c) ((c&0xf0)==0xe0) +#define fourbyte(c) ((c&0xf8)==0xf0) + +static +char * +putrune(char *buf, int c) +{ + buf = putbyte(buf, c); + if(onebyte(c)) + return buf; + if(twobyte(c)) + return putbyte(buf,c); + if(threebyte(c)){ + buf = putbyte(buf,c); + return putbyte(buf,c); + } + if(fourbyte(c)){ + buf = putbyte(buf,c); + buf = putbyte(buf,c); + return putbyte(buf,c); + } + fatal("malformed utf8 stream"); + + return nil; +} + +// ----------------------------------------------------------------------- +// exported functions + +int +iswordchar(int c) +{ + return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; +} + +int +yylex(void) +{ + int c; + Tree *node; + char *w = lexer.buf; + + yylval.tree = nil; + + skipws(); + switch(c=advance()){ + case EOF: + return EOF; + case '&': + lexer.buf[0] = '&'; + lexer.buf[1] = 0; + return '&'; + + default: + ; + } + if(!iswordchar(c)) + return c; + + for(;;){ + w = putrune(w, c); + c = peekc(); + if(!iswordchar(c)) + break; + advance(); + } + w[0] = 0; + + node = token(Tword, lexer.buf); + + yylval.tree = node; + return node->type; +} -- cgit v1.2.1