From 425ef692da7e74112f88f0b368f3286dba84f846 Mon Sep 17 00:00:00 2001 From: Nicholas Noll Date: Thu, 18 Jun 2020 19:45:40 -0700 Subject: feat: working parser for rc shell language --- sys/cmd/rc/parse.c | 430 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 430 insertions(+) create mode 100644 sys/cmd/rc/parse.c (limited to 'sys/cmd/rc/parse.c') diff --git a/sys/cmd/rc/parse.c b/sys/cmd/rc/parse.c new file mode 100644 index 0000000..d963c12 --- /dev/null +++ b/sys/cmd/rc/parse.c @@ -0,0 +1,430 @@ +#include "rc.h" + +// ----------------------------------------------------------------------- +// global data + +static int lasta, nexta=EOF; +static Tree *node; /* if token was lexed as a tree node (redirs and words), its here */ + +static uchar prectab[256] = { + [Kif] = 1, [Kfor] = 1, [Kswitch] = 1, [Kelse] = 1, + [Aandand] = 2, [Aoror] = 2, + [Kbang] = 3, [Ksubsh] = 3, + [Apipe] = 4, + [Acarot] = 5, + [Adol] = 6, [Acount] = 6, [Aquote] = 6, + [Asub] = 7, +}; + +// ----------------------------------------------------------------------- +// helpers + +static +int +lookahead(void) +{ + int tok; + + if (nexta != EOF) + return nexta; + + tok = lex(&node); + return nexta = tok; +} + +static +int +advance(void) +{ + int tok = lookahead(); + lasta = nexta, nexta = EOF; + node = nil; + + return tok; +} + +static +int +nextis(int tok) +{ + if (lookahead() == tok) { + advance(); + return 1; + } + return 0; +} + +// ----------------------------------------------------------------------- +// subparsers + +static Tree *word(void); +static Tree *comword(void); +static Tree *cmd(int prec); + +static +Tree* +body(void) +{ + int tok; + Tree *l, *r; + l = cmd(1); +loop: + switch((tok=lookahead())){ + case '&': + l = tree1('&', l); + /* fallthrough */ + case ';': case '\n': + advance(); + r = cmd(1); + l = tree2(';', l, r); + goto loop; + default: + ; + } + + return l; +} + +static +Tree* +brace(void) +{ + Tree *t; + + if (!nextis('{')) + rcerror("not a brace"); + t = tree1(Abrace, body()); + if (!nextis('}')) + rcerror("unmatched brace"); + + return t; +} + +static +Tree* +paren(void) +{ + Tree *t; + + if (!nextis('(')) + rcerror("not a paren"); + t = tree1(Aparen, body()); + if (!nextis(')')) + rcerror("unmatched paren"); + + return t; +} + +/* TODO: fill in */ +static +Tree* +heredoc(Tree* t) +{ + return t; +} + +static +Tree* +redir(void) +{ + int tok; + Tree *t; + + switch (tok = lookahead()) { + case Adup: + t = node; + advance(); + break; + case Aredir: + advance(); + t = hang1(node, (node->redir.type == Rhere) ? heredoc(word()) : word()); + break; + default: + t = nil; + } + + return t; +} + +static +Tree* +epilog(void) +{ + Tree *t, *tt; + + t = redir(); + while((tt = redir())) + t = hang2(t, t->child[0], tt); + + return t; +} + +static +Tree* +sword(void) +{ + int tok; + if (Kstart < (tok=lookahead()) && tok < Kend) + return node; + + return comword(); +} + +static +Tree* +word(void) +{ + int tok; + Tree *t; + + t = sword(); + while(nextis('^')) + t = tree2('^', t, sword()); + + return t; +} + + +static +Tree* +words(void) +{ + Tree *t, *tt; + t = word(); + while((tt=word())) + t = tree2(Awords, t, tt); + + return t; +} + +static +Tree* +comword(void) +{ + int tok; + Tree *t, *tt; + + switch(tok=lookahead()){ + case Adol: + advance(); + t = word(); + if(nextis('(')) { + t = tree2(Asub, t, words()); + if (!nextis(')')) + rcerror("malformed index expression"); + } + return tree1(Adol, t); + case Acount: + advance(); + return tree1(Acount, word()); + case Atick: + advance(); + return tree1(Atick, brace()); + case Alparen: + return paren(); + case Aredir: + advance(); + t = hang1(node, brace()); + t->type = Apipefd; + return t; + case Aword: + t = node; + advance(); + return t; + } + return nil; +} + +static +Tree* +first(void) +{ + int tok; + Tree *t; + + t = comword(); + while(nextis('^')) { + t = tree2('^', t, word()); + } + + return t; +} + +/* simple _or_ assignment */ +static +Tree* +simple_or_assign(void) +{ + int tok; + Tree *t, *tt; + + /* can't continue */ + if (!(t = first())) + return nil; + + /* is an assignment */ +assign: + if(nextis('=')) + return tree3(Aeq, t, word(), cmd(prectab[Kbang])); + + /* is a 'simple' */ +simple: + switch ((tok=lookahead())) { + case Aredir: + case Adup: + t = tree2(Aargs, t, redir()); + goto simple; + default: + if ((tt = word())) { + t = tree2(Aargs, t, tt); + goto simple; + } + /* fallthrough */ + } + + return simplehang(t); +} + +static +Tree* +opand(void) +{ + int tok; + Tree *t, *tt; + + switch(tok=lookahead()) { + case Kif: + advance(); + t = paren(); + skipnl(); + tt = cmd(prectab[Kif]); + t = tree2(Kif, t, tt); + return t; + + case Kelse: + advance(); + skipnl(); + t = tree1(Kelse, cmd(prectab[Kelse])); + return t; + + case Kfor: + advance(); + if (!nextis('(')) + rcerror("malformed for statement"); + t = word(); + if (nextis(Kin)) { + advance(); + tt = words(); + t = tree3(Kin, t, tt, nil); + } else + t = tree3(Kin, t, nil, nil); + skipnl(); + tt = cmd(prectab[Kfor]); + t->child[2] = tt; + return t; + + case Kswitch: + advance(); + t = word(); + skipnl(); + tt = brace(); + t = tree2(Kswitch, t, tt); + return t; + + case Kfunc: + advance(); + t = words(); + if ((tok=lookahead()) == '{') { + tt = brace(); + t = tree2(Kfunc, t, tt); + } else + t = tree1(Kfunc, t); + return t; + + case Ksubsh: + advance(); + t = tree1(Ksubsh, cmd(prectab[Ksubsh])); + return t; + + case Kbang: + advance(); + t = tree1(Kbang, cmd(prectab[Kbang])); + return t; + + case Ktwiddle: + advance(); + tt = word(); + t = tree2(Ktwiddle, tt, words()); + return t; + + case Albrace: + t = brace(); + tt = epilog(); + return epihang(t, tt); + + case Aredir: /* fallthrough */ + case Adup: + t = redir(); + tt = cmd(prectab[Kbang]); + t = hang2(t, t->child[0], tt); + return t; + } + + return simple_or_assign(); +} + +static +Tree * +cmd(int prec) +{ + int np, tok; + Tree *l, *r, *p; + + if (!(l = opand())) + return nil; + + for(;;) { + tok = lookahead(); + np = prectab[tok]; + if (np < prec) + break; + p = node; + advance(); + r = cmd(np+1); + if (tok == Apipe) + l = hang2(p, l, r); + else + l = tree2(tok, l, r); + } + + return l; +} + +// ----------------------------------------------------------------------- +// main function + +int +parse(void) +{ + int tok; + Tree *t, *tt; + + t = cmd(1); +loop: + switch(tok=lookahead()) { + case '&': + t = tree1('&', t); + /* fallthrough */ + case ';': + advance(); + tt = cmd(1); + t = tree2(';', t, tt); + goto loop; + case '\n': case EOF: + pfmt(errio, "%t", t); + break; + default: + rcerror("unrecognized token: %d", tok); + } + return 0; +} -- cgit v1.2.1