#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_ENDS 10
#define S_SPLIT 256
#define S_MATCH 257
/* Operators, ordered by ascending precedence */
typedef enum ops {
OP_START, /* Sentinel, implicit */
OP_END, /* Implicit */
OP_CHAR,
OP_RPAR,
OP_LPAR,
OP_OR,
OP_CONCAT, /* Implicit */
OP_STAR,
} optype_t;
struct state {
int c;
struct state *next;
struct state *next2;
};
struct expr {
struct state *start;
struct state **ptrs[MAX_ENDS];
int nptrs;
};
int tokval;
struct expr exprstack[10];
struct expr *exprp = exprstack;
optype_t opstack[10];
optype_t *opp = opstack;
int need_concat;
static struct state *
new_state(int c, struct state *next, struct state *next2)
{
struct state *s;
s
= malloc(sizeof(struct state
));
s->c = c;
s->next = next;
s->next2 = next2;
return (s);
}
void
patch(struct expr *e, struct state *s)
{
int i;
for (i = 0; i < e->nptrs; i++)
*e->ptrs[i] = s;
}
optype_t
next_token(char **s)
{
char c;
c = *(*s)++;
switch (c) {
case '\0':
return (OP_END);
case '(':
return (OP_LPAR);
case ')':
return (OP_RPAR);
case '*':
return (OP_STAR);
case '|':
return (OP_OR);
case '\\':
tokval = *(*s)++;
return (OP_CHAR);
default:
tokval = c;
return (OP_CHAR);
}
}
void
push_expr(struct state *start, struct state **ptr)
{
struct expr *e;
e = exprp++;
e->start = start;
*e->ptrs = ptr;
e->nptrs = 1;
}
void
push_expr_arr(struct state *start, struct state **ptrs[], int n)
{
struct expr *e;
e = exprp++;
e->start = start;
e->nptrs = n;
memcpy(e
->ptrs
, ptrs
, n
* sizeof(struct state
*));
}
struct expr *
pop_expr()
{
return (--exprp);
}
void
push_op(optype_t op)
{
*opp++ = op;
}
optype_t
pop_op()
{
*opp = OP_START;
return (*--opp);
}
void
evalopsuntil(optype_t op)
{
struct state *new;
struct expr *e1, *e2;
optype_t cur;
int i;
while (*(opp-1) >= op) {
cur = pop_op();
if (cur == OP_STAR) {
e1 = pop_expr();
new = new_state(S_SPLIT, e1->start, NULL);
patch(e1, new);
push_expr(new, &new->next2);
} else if (cur == OP_CONCAT) {
e2 = pop_expr();
e1 = pop_expr();
patch(e1, e2->start);
push_expr_arr(e1->start, e2->ptrs, e2->nptrs);
} else if (cur == OP_OR) {
e2 = pop_expr();
e1 = pop_expr();
new = new_state(S_SPLIT, e1->start, e2->start);
/* XXX overflow */
for (i = 0; i < e2->nptrs; i++)
e1->ptrs[e1->nptrs + i] = e2->ptrs[i];
push_expr_arr(new, e1->ptrs, e1->nptrs + e2->nptrs);
} else if (cur == OP_START)
break;
}
}
void
operator(optype_t op)
{
if (*(opp - 1) >= op) {
evalopsuntil(op);
if (op != OP_RPAR)
push_op(op);
} else
push_op(op);
if (op == OP_RPAR && need_concat)
operator(OP_CONCAT);
if (op == OP_STAR || op == OP_RPAR)
need_concat = 1;
else
need_concat = 0;
}
struct state *
compile(char *s)
{
struct state *new, *start;
struct expr *e;
int op;
need_concat = 0;
start = NULL;
opp = opstack;
exprp = exprstack;
push_op(OP_START);
while (1) {
op = next_token(&s);
if (op == OP_END)
break;
if (op == OP_CHAR) {
if (need_concat)
operator(OP_CONCAT);
new = new_state(tokval, NULL, NULL);
push_expr(new, &new->next);
need_concat = 1;
} else
operator(op);
}
evalopsuntil(OP_START);
e = pop_expr();
new = new_state(S_MATCH, NULL, NULL);
patch(e, new);
return (e->start);
}
int
main(void)
{
compile("a(b|c)*d");
compile("abcd");
compile("a(b|c)d");
compile("a(bb)*a");
compile("a(bc)d");
compile("ab*cd");
compile("a(bc)*d");
compile("a(b|c)d");
compile("a(bc|de)fg");
return (0);
}