#include "sre.h" static State * state(Machine *m, int t) { if (m->state >= m->statestk + arrlen(m->statestk)) panicf("regexp vm: out of state space"); m->state->type = t; m->state->l = nil; m->state->r = nil; return m->state++; } static int poptor(Parser *p) { if (p->optor <= p->optorstk) panicf("regexp parser: opand stack underflow"); return *--p->optor; } static void pushtor(Parser *p, int t) { if (p->optor >= arrend(p->optorstk)) panicf("regexp parser: opand stack overflow"); *p->optor++ = t; } static void pushand(Parser *p, State *beg, State *end) { if (p->node >= arrend(p->nodestk)) panicf("regexp parser: opand stack overflow"); p->node->beg = beg; p->node->end = end; p->node++; } static Node * popand(Parser *p) { if (p->node <= p->nodestk) panicf("regexp parser: opand stack underflow"); return --p->node; } static void operateuntil(Parser *p, int prec) { Node *o1, *o2, *t; State *s1, *s2; while (prec == Trparen || p->optor[-1] >= prec) { switch (poptor(p)) { case Tor: o1 = popand(p); o2 = popand(p); s1 = state(p->mach, Tor); s2 = state(p->mach, Tnop); s1->l = o1->beg; s1->r = o2->beg; o1->end->out = s2; o2->end->out = s2; pushand(p, s1, s2); break; case Tcat: o1 = popand(p); o2 = popand(p); o1->end->out = o2->beg; pushand(p, o1->beg, o2->end); break; case Tstar: o1 = popand(p); s1 = state(p->mach, Tor); o1->end->out = s1; s1->l = o1->beg; pushand(p, s1, s1); break; case Tplus: o1 = popand(p); s1 = state(p->mach, Tor); o1->end->out = s1; s1->l = o1->beg; pushand(p, o1->beg, s1); break; case Tqmark: o1 = popand(p); s1 = state(p->mach, Tor); s2 = state(p->mach, Tnop); s1->l = o1->beg; s1->r = s2; o1->end->out = s2; pushand(p, s1, s2); break; default: panicf("unsupported regexp operator"); } } } static void operator(Parser *p, int t) { operateuntil(p, t); pushtor(p, t); p->wasopand = (t != Tstar && t != Tqmark && t != Tplus && t != Trparen); } static void operand(Parser *p, int t) { State *new; if (p->wasopand) operator(p, Tcat); new = state(p->mach, t); pushand(p, new, new); p->wasopand = true; } #define cinc 20 int lex(Parser *p) { int c, t; byte *class; long n, cap; c = *p->re++; switch (c) { case '\\': if (*p->re) if ((c = *p->re++) == '\n') c = '\n'; break; case '\0': c = Tend, --p->re; break; case '*': c = Tstar; break; case '?': c = Tqmark; break; case '+': c = Tplus; break; case '|': c = Tor; break; case '.': c = Tany; break; case '(': c = Tlparen; break; case ')': c = Trparen; break; case '^': c = Tbol; break; case '$': c = Teol; break; case '[': goto charclass; default: ; } return c; charclass: panicf("to implement"); } #undef cinc static State* optimize(State *entry) { State *curr, *next; for (curr=entry; curr->type != Tend; curr++) { next = curr->out; while (next->type == Tnop) next = next->out; curr->out = next; } return entry; } void sre·compile(Machine *mach, byte *regexp) { int tok; Parser p; Node *prog; p = (Parser) { .re = regexp, .mach = mach, .node = p.nodestk, }; pushtor(&p, Tstart - 1); while ((tok = lex(&p)) != Tend) { if ((tok & isoptor) == Toperator) operator(&p, tok); else operand(&p, tok); } operateuntil(&p, Tstart); operand(&p, Tend); operateuntil(&p, Tstart); prog = popand(&p); mach->entry = optimize(prog->beg); }