Commit d0af8f93 authored by Tollef Fog Heen's avatar Tollef Fog Heen

Switch regex engine to PCRE

We no longer use POSIX style regexes, but rather PCRE regexes.  This
introduces a hard dependency on libpcre.

Test suite passes, but only lightly tested.

The regex functions are in its own source file to make it easy to
change the decision if somebody wants to use non-PCRE for some reason.

git-svn-id: http://www.varnish-cache.org/svn/trunk/varnish-cache@4334 d4fa192b-c00b-0410-8231-f00ffab90ce4
parent e7012fd1
......@@ -50,13 +50,13 @@ SVNID("$Id$")
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include "shmlog.h"
#include "cli.h"
#include "cli_priv.h"
#include "cache.h"
#include "hash_slinger.h"
#include "vre.h"
#include "cache_ban.h"
......@@ -116,7 +116,7 @@ BAN_Free(struct ban *b)
bt = VTAILQ_FIRST(&b->tests);
VTAILQ_REMOVE(&b->tests, bt, list);
if (bt->flags & BAN_T_REGEXP)
regfree(&bt->re);
VRE_free(&bt->re);
if (bt->dst != NULL)
free(bt->dst);
if (bt->src != NULL)
......@@ -137,10 +137,13 @@ ban_cond_str(const struct ban_test *bt, const char *p)
if (p == NULL)
return(!(bt->flags & BAN_T_NOT));
if (bt->flags & BAN_T_REGEXP)
i = regexec(&bt->re, p, 0, NULL, 0);
else
if (bt->flags & BAN_T_REGEXP) {
i = VRE_exec(bt->re, p, strlen(p), 0, 0, NULL, 0);
if (i >= 0)
i = 0;
} else {
i = strcmp(bt->dst, p);
}
if (bt->flags & BAN_T_NOT)
return (!i);
return (i);
......@@ -199,15 +202,13 @@ ban_cond_obj_http(const struct ban_test *bt, const struct object *o,
static int
ban_parse_regexp(struct cli *cli, struct ban_test *bt, const char *a3)
{
int i;
char buf[512];
i = regcomp(&bt->re, a3, REG_EXTENDED | REG_ICASE | REG_NOSUB);
if (i) {
(void)regerror(i, &bt->re, buf, sizeof buf);
regfree(&bt->re);
VSL(SLT_Debug, 0, "REGEX: <%s>", buf);
cli_out(cli, "%s", buf);
const char *error;
int erroroffset;
bt->re = VRE_compile(a3, 0, &error, &erroroffset);
if (bt->re == NULL) {
VSL(SLT_Debug, 0, "REGEX: <%s>", error);
cli_out(cli, "%s", error);
cli_result(cli, CLIS_PARAM);
return (-1);
}
......
......@@ -44,7 +44,7 @@ struct ban_test {
int flags;
#define BAN_T_REGEXP (1 << 0)
#define BAN_T_NOT (1 << 1)
regex_t re;
vre_t *re;
char *dst;
char *src;
};
......
......@@ -40,47 +40,47 @@ SVNID("$Id$")
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <regex.h>
#include "shmlog.h"
#include "vrt.h"
#include "vre.h"
#include "vcl.h"
#include "cache.h"
void
VRT_re_init(void **rep, const char *re, int sub)
VRT_re_init(void **rep, const char *re)
{
regex_t *t;
vre_t *t;
const char *error;
int erroroffset;
t = calloc(sizeof *t, 1);
XXXAN(t);
/* This was already check-compiled by the VCL compiler */
AZ(regcomp(t, re, REG_EXTENDED | REG_ICASE | (sub ? 0 : REG_NOSUB)));
t = VRE_compile(re, 0, &error, &erroroffset);
AN(t);
*rep = t;
}
void
VRT_re_fini(void *rep)
{
if (rep != NULL)
regfree(rep);
VRE_free((vre_t**)&rep);
}
int
VRT_re_match(const char *s, void *re)
{
regex_t *t;
vre_t *t;
int i;
if (s == NULL)
return (0);
AN(re);
t = re;
i = regexec(t, s, 0, NULL, 0);
if (i == 0)
i = VRE_exec(t, s, strlen(s), 0, 0, NULL, 0);
if (i >= 0)
return (1);
assert(i == REG_NOMATCH);
assert(i == VRE_ERROR_NOMATCH);
return (0);
}
......@@ -88,8 +88,8 @@ const char *
VRT_regsub(const struct sess *sp, int all, const char *str, void *re,
const char *sub)
{
regmatch_t pm[10];
regex_t *t;
int ovector[30];
vre_t *t;
int i, l;
txt res;
char *b0;
......@@ -100,10 +100,11 @@ VRT_regsub(const struct sess *sp, int all, const char *str, void *re,
if (str == NULL)
return ("");
t = re;
i = regexec(t, str, 10, pm, 0);
memset(&ovector, 0, sizeof(ovector));
i = VRE_exec(t, str, strlen(str), 0, 0, ovector, 30);
/* If it didn't match, we can return the original string */
if (i == REG_NOMATCH)
if (i == VRE_ERROR_NOMATCH)
return(str);
u = WS_Reserve(sp->http->ws, 0);
......@@ -112,8 +113,7 @@ VRT_regsub(const struct sess *sp, int all, const char *str, void *re,
do {
/* Copy prefix to match */
Tadd(&res, str, pm[0].rm_so);
Tadd(&res, str, ovector[0]);
for (s = sub ; *s != '\0'; s++ ) {
if (*s != '\\' || s[1] == '\0') {
if (res.b < res.e)
......@@ -123,19 +123,20 @@ VRT_regsub(const struct sess *sp, int all, const char *str, void *re,
s++;
if (isdigit(*s)) {
x = *s - '0';
l = pm[x].rm_eo - pm[x].rm_so;
Tadd(&res, str + pm[x].rm_so, l);
l = ovector[2*x+1] - ovector[2*x];
Tadd(&res, str + ovector[2*x], l);
continue;
} else {
if (res.b < res.e)
*res.b++ = *s;
}
}
str += pm[0].rm_eo;
str += ovector[1];
if (!all)
break;
i = regexec(t, str, 10, pm, 0);
} while (i != REG_NOMATCH);
memset(&ovector, 0, sizeof(ovector));
i = VRE_exec(t, str, strlen(str), 0, 0, ovector, 30);
} while (i != VRE_ERROR_NOMATCH);
/* Copy suffix to match */
l = strlen(str) + 1;
......
......@@ -36,7 +36,6 @@ SVNID("$Id$")
#include <errno.h>
#include <fcntl.h>
#include <regex.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
......@@ -51,6 +50,7 @@ SVNID("$Id$")
#include "libvarnish.h"
#include "shmlog.h"
#include "vre.h"
#include "varnishapi.h"
static int b_flag, c_flag;
......@@ -80,7 +80,7 @@ static enum shmlogtag last[65536];
#define F_MATCH (1 << 1)
static int match_tag = -1;
static regex_t match_re;
static vre_t *match_re;
static void
h_order_finish(int fd)
......@@ -132,7 +132,7 @@ h_order(void *priv, enum shmlogtag tag, unsigned fd, unsigned len,
assert(ob[fd] != NULL);
}
if (tag == match_tag &&
!regexec(&match_re, ptr, 0, NULL, 0))
VRE_exec(match_re, ptr, len, 0, 0, NULL, 0) > 0)
flg[fd] |= F_MATCH;
if ((tag == SLT_BackendOpen || tag == SLT_SessionOpen ||
......@@ -198,6 +198,8 @@ static void
do_order(struct VSL_data *vd, int argc, char **argv)
{
int i;
const char *error;
int erroroffset;
if (argc == 2) {
match_tag = name2tag(argv[0]);
......@@ -205,11 +207,9 @@ do_order(struct VSL_data *vd, int argc, char **argv)
fprintf(stderr, "Tag \"%s\" unknown\n", argv[0]);
exit(2);
}
i = regcomp(&match_re, argv[1], REG_EXTENDED | REG_NOSUB);
if (i) {
char buf[BUFSIZ];
regerror(i, &match_re, buf, sizeof buf);
fprintf(stderr, "%s\n", buf);
match_re = VRE_compile(argv[1], 0, &error, &erroroffset);
if (match_re == NULL) {
fprintf(stderr, "Invalid regex: %s\n", error);
exit(2);
}
}
......
......@@ -75,6 +75,37 @@ AC_SUBST(NET_LIBS)
AC_CHECK_LIBM
AC_SUBST(LIBM)
PKG_PROG_PKG_CONFIG
if test -n $PKG_CONFIG; then
PKG_CHECK_MODULES([PCRE], [libpcre])
else
AC_CHECK_PROG(PCRE_CONFIG, pcre-config, pcre-config)
AC_ARG_WITH(pcre-config,
AS_HELP_STRING([--with-pcre-config=PATH],
[Location of PCRE pcre-config (auto)]),
[pcre_config="$withval"],
[pcre_config=""])
if test "x$pcre_config" != "x" ; then
AC_MSG_CHECKING(for $pcre_config)
if test -f $pcre_config ; then
PCRE_CONFIG=$pcre_config
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(no - searching PATH)
fi
fi
if test "x$PCRE_CONFIG" = "x"; then
AC_CHECK_PROGS(PCRE_CONFIG, pcre-config)
fi
PCRE_CFLAGS=`$PCRE_CONFIG --cflags`
PCRE_LIBS=`$PCRE_CONFIG --libs`
fi
AC_SUBST(PCRE_CFLAGS)
AC_SUBST(PCRE_LIBS)
# Checks for header files.
AC_HEADER_STDC
AC_HEADER_SYS_WAIT
......
/*-
* Copyright (c) 2009 Redpill Linpro AS
* All rights reserved.
*
* Author: Tollef Fog Heen <tfheen@redpill-linpro.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $Id$
*
* Regular expression support
*
*/
struct vre;
typedef struct vre vre_t;
/* This maps to PCRE error codes */
#define VRE_ERROR_NOMATCH (-1)
/* And those to PCRE options */
#define VRE_CASELESS 0x00000001
vre_t *VRE_compile(const char *, int, const char **, int *);
int VRE_exec(const vre_t *, const char *, int, int, int, int *, int);
void VRE_free(vre_t **);
......@@ -136,7 +136,7 @@ struct vrt_ref {
void VRT_acl_log(const struct sess *, const char *msg);
/* Regexp related */
void VRT_re_init(void **, const char *, int sub);
void VRT_re_init(void **, const char *);
void VRT_re_fini(void *);
int VRT_re_match(const char *, void *re);
const char *VRT_regsub(const struct sess *sp, int all, const char *,
......
# $Id$
INCLUDES = -I$(top_srcdir)/include
INCLUDES = -I$(top_srcdir)/include @PCRE_CFLAGS@
lib_LTLIBRARIES = libvarnish.la
......@@ -23,12 +23,13 @@ libvarnish_la_SOURCES = \
vev.c \
vlu.c \
vpf.c \
vre.c \
vsb.c \
vsha256.c \
vss.c \
vtmpfile.c
libvarnish_la_LIBADD = ${RT_LIBS} ${NET_LIBS} ${LIBM}
libvarnish_la_LIBADD = ${RT_LIBS} ${NET_LIBS} ${LIBM} @PCRE_LIBS@
DISTCLEANFILES = svn_version.c
svn_version.c: FORCE
......
/*-
* Copyright (c) 2006-2009 Redpill Linpro AS
* All rights reserved.
*
* Author: Tollef Fog Heen <tfheen@redpill-linpro.com>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <pcre.h>
#include "libvarnish.h"
#include "miniobj.h"
#include "vre.h"
struct vre {
unsigned magic;
#define VRE_MAGIC 0xe83097dc
pcre *re;
};
vre_t *VRE_compile(const char *pattern, int options,
const char **errptr, int *erroffset) {
vre_t *v;
*errptr = NULL; *erroffset = 0;
ALLOC_OBJ(v, VRE_MAGIC);
AN(v);
v->re = pcre_compile(pattern, options, errptr, erroffset, NULL);
if (v->re == NULL) {
VRE_free(&v);
return NULL;
}
return v;
}
int VRE_exec(const vre_t *code, const char *subject, int length,
int startoffset, int options, int *ovector, int ovecsize) {
CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
int ov[30];
if (ovector == NULL) {
ovector = ov;
ovecsize = sizeof(ov)/sizeof(ov[0]);
}
return pcre_exec(code->re, NULL, subject, length,
startoffset, options, ovector, ovecsize);
}
void VRE_free(vre_t **vv) {
vre_t *v = *vv;
*vv = NULL;
CHECK_OBJ(v, VRE_MAGIC);
pcre_free(v->re);
v->magic = 0;
FREE_OBJ(v);
}
......@@ -40,13 +40,13 @@ SVNID("$Id$")
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "shmlog.h"
#include "vre.h"
#include "miniobj.h"
#include "varnishapi.h"
......@@ -84,8 +84,8 @@ struct VSL_data {
#define M_SELECT (1 << 3)
int regflags;
regex_t *regincl;
regex_t *regexcl;
vre_t *regincl;
vre_t *regexcl;
unsigned long skip;
unsigned long keep;
......@@ -170,7 +170,7 @@ VSL_New(void)
assert(VSL_S_BACKEND == M_BACKEND);
vd = calloc(sizeof *vd, 1);
assert(vd != NULL);
vd->regflags = REG_EXTENDED | REG_NOSUB;
vd->regflags = 0;
vd->magic = VSL_MAGIC;
vd->fd = -1;
return (vd);
......@@ -272,7 +272,6 @@ int
VSL_NextLog(struct VSL_data *vd, unsigned char **pp)
{
unsigned char *p;
regmatch_t rm;
unsigned u, l;
int i;
......@@ -315,19 +314,19 @@ VSL_NextLog(struct VSL_data *vd, unsigned char **pp)
if (vd->c_opt && !(vd->map[u] & M_CLIENT))
continue;
if (vd->regincl != NULL) {
rm.rm_so = 0;
rm.rm_eo = l;
i = regexec(vd->regincl,
(char *)p + SHMLOG_DATA, 1, &rm, 0);
if (i == REG_NOMATCH)
i = VRE_exec(vd->regincl,
(char *)p + SHMLOG_DATA,
SHMLOG_LEN(p) - SHMLOG_DATA, /* Length */
0, 0, NULL, 0);
if (i == VRE_ERROR_NOMATCH)
continue;
}
if (vd->regexcl != NULL) {
rm.rm_so = 0;
rm.rm_eo = l;
i = regexec(vd->regexcl,
(char *)p + SHMLOG_DATA, 1, &rm, 0);
if (i != REG_NOMATCH)
i = VRE_exec(vd->regincl,
(char *)p + SHMLOG_DATA,
SHMLOG_LEN(p) - SHMLOG_DATA, /* Length */
0, 0, NULL, 0);
if (i != VRE_ERROR_NOMATCH)
continue;
}
*pp = p;
......@@ -412,9 +411,9 @@ vsl_r_arg(struct VSL_data *vd, const char *opt)
static int
vsl_IX_arg(struct VSL_data *vd, const char *opt, int arg)
{
int i;
regex_t **rp;
char buf[BUFSIZ];
vre_t **rp;
const char *error;
int erroroffset;
CHECK_OBJ_NOTNULL(vd, VSL_MAGIC);
if (arg == 'I')
......@@ -425,15 +424,9 @@ vsl_IX_arg(struct VSL_data *vd, const char *opt, int arg)
fprintf(stderr, "Option %c can only be given once", arg);
return (-1);
}
*rp = calloc(sizeof(regex_t), 1);
*rp = VRE_compile(opt, vd->regflags, &error, &erroroffset);
if (*rp == NULL) {
perror("malloc");
return (-1);
}
i = regcomp(*rp, opt, vd->regflags);
if (i) {
regerror(i, *rp, buf, sizeof buf);
fprintf(stderr, "%s", buf);
fprintf(stderr, "Illegal regex: %s\n", error);
return (-1);
}
return (1);
......@@ -547,7 +540,7 @@ VSL_Arg(struct VSL_data *vd, int arg, const char *opt)
case 'i': case 'x': return (vsl_ix_arg(vd, opt, arg));
case 'r': return (vsl_r_arg(vd, opt));
case 'I': case 'X': return (vsl_IX_arg(vd, opt, arg));
case 'C': vd->regflags = REG_ICASE; return (1);
case 'C': vd->regflags = VRE_CASELESS; return (1);
case 's': return (vsl_s_arg(vd, opt));
case 'k': return (vsl_k_arg(vd, opt));
default:
......
......@@ -198,7 +198,7 @@ unsigned vcc_UintVal(struct tokenlist *tl);
double vcc_DoubleVal(struct tokenlist *tl);
/* vcc_string.c */
char *vcc_regexp(struct tokenlist *tl, int sub);
char *vcc_regexp(struct tokenlist *tl);
int vcc_StringVal(struct tokenlist *tl);
void vcc_ExpectedStringval(struct tokenlist *tl);
......
......@@ -276,7 +276,7 @@ vcl_output_lang_h(struct vsb *sb)
vsb_cat(sb, "#define VRT_ACL_MAXADDR\t\t16\t/* max(IPv4, IPv6) */\n");
vsb_cat(sb, "\nvoid VRT_acl_log(const struct sess *, const char *ms");
vsb_cat(sb, "g);\n\n/* Regexp related */\nvoid VRT_re_init(void **,");
vsb_cat(sb, " const char *, int sub);\nvoid VRT_re_fini(void *);\n");
vsb_cat(sb, " const char *);\nvoid VRT_re_fini(void *);\n");
vsb_cat(sb, "int VRT_re_match(const char *, void *re);\n");
vsb_cat(sb, "const char *VRT_regsub(const struct sess *sp, int all,");
vsb_cat(sb, " const char *,\n void *, const char *);\n");
......
......@@ -225,7 +225,7 @@ Cond_String(const struct var *vp, struct tokenlist *tl)
tl->t->tok == '~' ? "" : "!");
vcc_NextToken(tl);
ExpectErr(tl, CSTR);
p = vcc_regexp(tl, 0);
p = vcc_regexp(tl);
ERRCHK(tl);
vcc_NextToken(tl);
Fb(tl, 1, "%s, %s)\n", vp->rname, p);
......
......@@ -34,7 +34,6 @@ SVNID("$Id$")
#include <stdio.h>
#include <string.h>
#include <regex.h>
#include "vsb.h"
......@@ -43,30 +42,30 @@ SVNID("$Id$")
#include "libvarnish.h"
#include "vrt.h"
#include "vre.h"
/*--------------------------------------------------------------------*/
char *
vcc_regexp(struct tokenlist *tl, int sub)
vcc_regexp(struct tokenlist *tl)
{
char buf[BUFSIZ], *p;
regex_t t;
int i;
vre_t *t;
const char *error;
int erroroffset;
Expect(tl, CSTR);
if (tl->err)
return (NULL);
memset(&t, 0, sizeof t);
i = regcomp(&t, tl->t->dec, REG_EXTENDED | (sub ? 0 : REG_NOSUB));
if (i != 0) {
(void)regerror(i, &t, buf, sizeof buf);
t = VRE_compile(tl->t->dec, 0, &error, &erroroffset);
if (t == NULL) {
vsb_printf(tl->sb,
"Regexp compilation error:\n\n%s\n\n", buf);
"Regexp compilation error:\n\n%s\n\n", error);
vcc_ErrWhere(tl, tl->t);
regfree(&t);
return (NULL);
}
regfree(&t);
VRE_free(&t);
sprintf(buf, "VGC_re_%u", tl->recnt++);
p = TlAlloc(tl, strlen(buf) + 1);
strcpy(p, buf);
......@@ -74,7 +73,7 @@ vcc_regexp(struct tokenlist *tl, int sub)
Fh(tl, 0, "static void *%s;\n", buf);
Fi(tl, 0, "\tVRT_re_init(&%s, ",buf);
EncToken(tl->fi, tl->t);
Fi(tl, 0, ", %d);\n", sub);
Fi(tl, 0, ");\n");
Ff(tl, 0, "\tVRT_re_fini(%s);\n", buf);
return (p);
}
......@@ -108,7 +107,7 @@ vcc_regsub(struct tokenlist *tl, int all)
Expect(tl, CSTR);
if (tl->err)
return (0);
p = vcc_regexp(tl, 1);
p = vcc_regexp(tl);
vcc_NextToken(tl);
Fb(tl, 0, ", %s, ", p);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment