aboutsummaryrefslogtreecommitdiff
path: root/sys/libsre/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/libsre/lex.c')
-rw-r--r--sys/libsre/lex.c246
1 files changed, 0 insertions, 246 deletions
diff --git a/sys/libsre/lex.c b/sys/libsre/lex.c
deleted file mode 100644
index f4c6ac2..0000000
--- a/sys/libsre/lex.c
+++ /dev/null
@@ -1,246 +0,0 @@
-#include "sre.h"
-
-static
-State *
-state(Machine *m, int t)
-{
- if (m->state >= m->statestk + arrlen(m->statestk))
- panicf("regexp vm: out of state space");
-
- m->state->type = t;
- m->state->l = nil;
- m->state->r = nil;
-
- return m->state++;
-}
-
-static
-int
-poptor(Parser *p)
-{
- if (p->optor <= p->optorstk)
- panicf("regexp parser: opand stack underflow");
-
- return *--p->optor;
-}
-
-static
-void
-pushtor(Parser *p, int t)
-{
- if (p->optor >= arrend(p->optorstk))
- panicf("regexp parser: opand stack overflow");
-
- *p->optor++ = t;
-}
-
-static
-void
-pushand(Parser *p, State *beg, State *end)
-{
- if (p->node >= arrend(p->nodestk))
- panicf("regexp parser: opand stack overflow");
-
- p->node->beg = beg;
- p->node->end = end;
-
- p->node++;
-}
-
-static
-Node *
-popand(Parser *p)
-{
- if (p->node <= p->nodestk)
- panicf("regexp parser: opand stack underflow");
-
- return --p->node;
-}
-
-static
-void
-operateuntil(Parser *p, int prec)
-{
- Node *o1, *o2, *t;
- State *s1, *s2;
-
- while (prec == Trparen || p->optor[-1] >= prec) {
- switch (poptor(p)) {
- case Tor:
- o1 = popand(p);
- o2 = popand(p);
-
- s1 = state(p->mach, Tor);
- s2 = state(p->mach, Tnop);
- s1->l = o1->beg;
- s1->r = o2->beg;
-
- o1->end->out = s2;
- o2->end->out = s2;
-
- pushand(p, s1, s2);
- break;
-
- case Tcat:
- o1 = popand(p);
- o2 = popand(p);
-
- o1->end->out = o2->beg;
- pushand(p, o1->beg, o2->end);
- break;
-
- case Tstar:
- o1 = popand(p);
- s1 = state(p->mach, Tor);
- o1->end->out = s1;
- s1->l = o1->beg;
- pushand(p, s1, s1);
- break;
-
- case Tplus:
- o1 = popand(p);
- s1 = state(p->mach, Tor);
- o1->end->out = s1;
- s1->l = o1->beg;
- pushand(p, o1->beg, s1);
- break;
-
- case Tqmark:
- o1 = popand(p);
- s1 = state(p->mach, Tor);
- s2 = state(p->mach, Tnop);
- s1->l = o1->beg;
- s1->r = s2;
- o1->end->out = s2;
- pushand(p, s1, s2);
- break;
-
- default:
- panicf("unsupported regexp operator");
- }
- }
-}
-
-static
-void
-operator(Parser *p, int t)
-{
- operateuntil(p, t);
- pushtor(p, t);
- p->wasopand = (t != Tstar && t != Tqmark && t != Tplus && t != Trparen);
-}
-
-static
-void
-operand(Parser *p, int t)
-{
- State *new;
- if (p->wasopand)
- operator(p, Tcat);
-
- new = state(p->mach, t);
- pushand(p, new, new);
- p->wasopand = true;
-}
-
-#define cinc 20
-int
-lex(Parser *p)
-{
- int c, t;
- byte *class;
- long n, cap;
-
- c = *p->re++;
- switch (c) {
- case '\\':
- if (*p->re)
- if ((c = *p->re++) == '\n')
- c = '\n';
- break;
- case '\0':
- c = Tend,
- --p->re;
- break;
- case '*':
- c = Tstar;
- break;
- case '?':
- c = Tqmark;
- break;
- case '+':
- c = Tplus;
- break;
- case '|':
- c = Tor;
- break;
- case '.':
- c = Tany;
- break;
- case '(':
- c = Tlparen;
- break;
- case ')':
- c = Trparen;
- break;
- case '^':
- c = Tbol;
- break;
- case '$':
- c = Teol;
- break;
- case '[':
- goto charclass;
- default:
- ;
- }
- return c;
-
-charclass:
- panicf("to implement");
-}
-
-#undef cinc
-
-static
-State*
-optimize(State *entry)
-{
- State *curr, *next;
- for (curr=entry; curr->type != Tend; curr++) {
- next = curr->out;
- while (next->type == Tnop)
- next = next->out;
- curr->out = next;
- }
-
- return entry;
-}
-
-void
-sre·compile(Machine *mach, byte *regexp)
-{
- int tok;
- Parser p;
- Node *prog;
-
- p = (Parser) {
- .re = regexp,
- .mach = mach,
- .node = p.nodestk,
- };
-
- pushtor(&p, Tstart - 1);
- while ((tok = lex(&p)) != Tend) {
- if ((tok & isoptor) == Toperator)
- operator(&p, tok);
- else
- operand(&p, tok);
- }
- operateuntil(&p, Tstart);
- operand(&p, Tend);
- operateuntil(&p, Tstart);
-
- prog = popand(&p);
- mach->entry = optimize(prog->beg);
-}