From ce05175372a9ddca1a225db0765ace1127a39293 Mon Sep 17 00:00:00 2001 From: Nicholas Date: Fri, 12 Nov 2021 09:22:01 -0800 Subject: chore: simplified organizational structure --- sys/libsre/lex.c | 246 ------------------------------------------------------- 1 file changed, 246 deletions(-) delete mode 100644 sys/libsre/lex.c (limited to 'sys/libsre/lex.c') diff --git a/sys/libsre/lex.c b/sys/libsre/lex.c deleted file mode 100644 index f4c6ac2..0000000 --- a/sys/libsre/lex.c +++ /dev/null @@ -1,246 +0,0 @@ -#include "sre.h" - -static -State * -state(Machine *m, int t) -{ - if (m->state >= m->statestk + arrlen(m->statestk)) - panicf("regexp vm: out of state space"); - - m->state->type = t; - m->state->l = nil; - m->state->r = nil; - - return m->state++; -} - -static -int -poptor(Parser *p) -{ - if (p->optor <= p->optorstk) - panicf("regexp parser: opand stack underflow"); - - return *--p->optor; -} - -static -void -pushtor(Parser *p, int t) -{ - if (p->optor >= arrend(p->optorstk)) - panicf("regexp parser: opand stack overflow"); - - *p->optor++ = t; -} - -static -void -pushand(Parser *p, State *beg, State *end) -{ - if (p->node >= arrend(p->nodestk)) - panicf("regexp parser: opand stack overflow"); - - p->node->beg = beg; - p->node->end = end; - - p->node++; -} - -static -Node * -popand(Parser *p) -{ - if (p->node <= p->nodestk) - panicf("regexp parser: opand stack underflow"); - - return --p->node; -} - -static -void -operateuntil(Parser *p, int prec) -{ - Node *o1, *o2, *t; - State *s1, *s2; - - while (prec == Trparen || p->optor[-1] >= prec) { - switch (poptor(p)) { - case Tor: - o1 = popand(p); - o2 = popand(p); - - s1 = state(p->mach, Tor); - s2 = state(p->mach, Tnop); - s1->l = o1->beg; - s1->r = o2->beg; - - o1->end->out = s2; - o2->end->out = s2; - - pushand(p, s1, s2); - break; - - case Tcat: - o1 = popand(p); - o2 = popand(p); - - o1->end->out = o2->beg; - pushand(p, o1->beg, o2->end); - break; - - case Tstar: - o1 = popand(p); - s1 = state(p->mach, Tor); - o1->end->out = s1; - s1->l = o1->beg; - pushand(p, s1, s1); - break; - - case Tplus: - o1 = popand(p); - s1 = state(p->mach, Tor); - o1->end->out = s1; - s1->l = o1->beg; - pushand(p, o1->beg, s1); - break; - - case Tqmark: - o1 = popand(p); - s1 = state(p->mach, Tor); - s2 = state(p->mach, Tnop); - s1->l = o1->beg; - s1->r = s2; - o1->end->out = s2; - pushand(p, s1, s2); - break; - - default: - panicf("unsupported regexp operator"); - } - } -} - -static -void -operator(Parser *p, int t) -{ - operateuntil(p, t); - pushtor(p, t); - p->wasopand = (t != Tstar && t != Tqmark && t != Tplus && t != Trparen); -} - -static -void -operand(Parser *p, int t) -{ - State *new; - if (p->wasopand) - operator(p, Tcat); - - new = state(p->mach, t); - pushand(p, new, new); - p->wasopand = true; -} - -#define cinc 20 -int -lex(Parser *p) -{ - int c, t; - byte *class; - long n, cap; - - c = *p->re++; - switch (c) { - case '\\': - if (*p->re) - if ((c = *p->re++) == '\n') - c = '\n'; - break; - case '\0': - c = Tend, - --p->re; - break; - case '*': - c = Tstar; - break; - case '?': - c = Tqmark; - break; - case '+': - c = Tplus; - break; - case '|': - c = Tor; - break; - case '.': - c = Tany; - break; - case '(': - c = Tlparen; - break; - case ')': - c = Trparen; - break; - case '^': - c = Tbol; - break; - case '$': - c = Teol; - break; - case '[': - goto charclass; - default: - ; - } - return c; - -charclass: - panicf("to implement"); -} - -#undef cinc - -static -State* -optimize(State *entry) -{ - State *curr, *next; - for (curr=entry; curr->type != Tend; curr++) { - next = curr->out; - while (next->type == Tnop) - next = next->out; - curr->out = next; - } - - return entry; -} - -void -sre·compile(Machine *mach, byte *regexp) -{ - int tok; - Parser p; - Node *prog; - - p = (Parser) { - .re = regexp, - .mach = mach, - .node = p.nodestk, - }; - - pushtor(&p, Tstart - 1); - while ((tok = lex(&p)) != Tend) { - if ((tok & isoptor) == Toperator) - operator(&p, tok); - else - operand(&p, tok); - } - operateuntil(&p, Tstart); - operand(&p, Tend); - operateuntil(&p, Tstart); - - prog = popand(&p); - mach->entry = optimize(prog->beg); -} -- cgit v1.2.1