Changes to work with pcre2 directly for match()

Based on the VRE_unpack() PR in
https://github.com/varnishcache/varnish-cache/pull/3657

As discussed with Geoff, we might still switch to VRE_capture()
once it is cooked, see https://github.com/varnishcache/varnish-cache/pull/3659
parent 96250e04
..
.. NB: This file is machine generated, DO NOT EDIT!
..
.. Edit vmod.vcc and run make instead
.. Edit ../src/vmod_re.vcc and run make instead
..
.. role:: ref(emphasis)
=======
VMOD re
vmod_re
=======
-------------------------------------------------------------------------
......
......@@ -29,30 +29,21 @@
#include "config.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdlib.h>
#include <string.h>
#include "cache/cache.h"
#include "vcl.h"
#include "vre.h"
#include "vre_pcre2.h"
#include "vsb.h"
#include "vcc_if.h"
/* pcreapi(3):
*
* The first two-thirds of the vector is used to pass back captured
* substrings, each substring using a pair of integers. The remaining
* third of the vector is used as workspace by pcre_exec() while matching
* capturing subpatterns, and is not available for passing back
* information.
*
* XXX: if vre were to expose the pcre and pcre_extra objects, then we
* could use pcre_fullinfo() to determine the highest backref for each
* regex, and wouldn't need this arbitrary limit ...
*/
#define MAX_MATCHES 11
#define MAX_OV ((MAX_MATCHES) * 3)
#define MAX_OV_USED ((MAX_MATCHES) * 2)
#define MAX_OV ((MAX_MATCHES) * 2)
struct vmod_re_regex {
unsigned magic;
......@@ -65,7 +56,7 @@ typedef struct ov_s {
unsigned magic;
#define OV_MAGIC 0x844bfa39
const char *subject;
int ovector[MAX_OV_USED];
int ovector[MAX_OV];
} ov_t;
static void
......@@ -82,12 +73,32 @@ errmsg(VRT_CTX, const char *fmt, ...)
va_end(args);
}
static vre_t *
re_compile(const char *pattern, unsigned options, char *errbuf,
size_t errbufsz, int *erroffset)
{
static vre_t *vre;
struct vsb vsb[1];
int errcode;
vre = VRE_compile(pattern, options, &errcode, erroffset, 1);
if (vre != NULL)
return (vre);
AN(VSB_init(vsb, errbuf, errbufsz));
AZ(VRE_error(vsb, errcode));
AZ(VSB_finish(vsb));
VSB_fini(vsb);
return (NULL);
}
VCL_VOID
vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name,
VCL_STRING pattern, VCL_INT limit, VCL_INT limit_recursion)
{
struct vmod_re_regex *re;
vre_t *vre;
char errbuf[VRE_ERROR_LEN];
int erroffset;
const char *error;
......@@ -109,10 +120,11 @@ vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name,
return;
}
if ((vre = VRE_compile(pattern, 0, &error, &erroffset)) == NULL) {
vre = re_compile(pattern, 0, errbuf, sizeof errbuf, &erroffset);
if (vre == NULL) {
VRT_fail(ctx, "vmod re: error compiling regex \"%s\" in %s "
"constructor: %s (at offset %d)", pattern, vcl_name,
error, erroffset);
errbuf, erroffset);
return;
}
......@@ -120,7 +132,7 @@ vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name,
AN(re);
re->vre = vre;
re->vre_limits.match = limit;
re->vre_limits.match_recursion = limit_recursion;
re->vre_limits.depth = limit_recursion;
*rep = re;
}
......@@ -144,8 +156,12 @@ match(VRT_CTX, const vre_t *vre, VCL_STRING subject, struct vmod_priv *task,
const struct vre_limits *vre_limits)
{
ov_t *ov;
int s, nov[MAX_OV];
size_t cp, len;
int i, r = 0, s, nov[MAX_OV];
size_t cp;
pcre2_match_context *re_ctx;
pcre2_match_data *data;
pcre2_code *re;
PCRE2_SIZE *ovector;
AN(vre);
if (subject == NULL)
......@@ -167,30 +183,60 @@ match(VRT_CTX, const vre_t *vre, VCL_STRING subject, struct vmod_priv *task,
CAST_OBJ_NOTNULL(ov, task->priv, OV_MAGIC);
}
len = strlen(subject);
s = VRE_exec(vre, subject, len, 0, 0, nov, MAX_OV, vre_limits);
if (s <= VRE_ERROR_NOMATCH) {
if (s < VRE_ERROR_NOMATCH)
// BEGIN duplication with vre
re = VRE_unpack(vre);
AN(re);
data = pcre2_match_data_create_from_pattern(re, NULL);
if (data == NULL) {
VRT_fail(ctx, "vmod_re: failed to create match data");
return 0;
}
// END duplication with vre
// BEGIN unneeded overhead (unless we get access to re_ctx also)
re_ctx = pcre2_match_context_create(NULL);
if (re_ctx == NULL) {
VRT_fail(ctx, "vmod_re: failed to create context");
goto out;
}
pcre2_set_depth_limit(re_ctx, vre_limits->depth);
pcre2_set_match_limit(re_ctx, vre_limits->match);
// END unneeded overhead
s = pcre2_match(re, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
0, data, re_ctx);
if (s <= PCRE2_ERROR_NOMATCH) {
if (s < PCRE2_ERROR_NOMATCH)
VSLb(ctx->vsl, SLT_VCL_Error,
"vmod re: regex match returned %d", s);
return 0;
goto out;
}
if (s == 0) {
if (s > MAX_MATCHES) {
VSLb(ctx->vsl, SLT_VCL_Error,
"vmod re: capturing substrings exceed max %d",
MAX_MATCHES - 1);
s = MAX_MATCHES;
}
ovector = pcre2_get_ovector_pointer(data);
assert (s <= pcre2_get_ovector_count(data));
task->len = sizeof(*ov);
ov->subject = subject;
memset(ov->ovector, -1, sizeof(ov->ovector));
cp = s;
cp *= 2 * sizeof(*nov);
assert(cp <= sizeof(ov->ovector));
memcpy(ov->ovector, nov, cp);
return 1;
for (i = 0; i < s * 2; i++)
ov->ovector[i] = ovector[i];
for ( ; i < MAX_OV; i++)
ov->ovector[i] = -1;
r = 1;
out: // XXX goto because this might be throw-away code
AN(data);
pcre2_match_data_free(data);
if (re_ctx != NULL)
pcre2_match_context_free(re_ctx);
return (r);
}
static VCL_STRING
......@@ -223,7 +269,7 @@ backref(VRT_CTX, VCL_INT refnum, VCL_STRING fallback,
CAST_OBJ_NOTNULL(ov, task->priv, OV_MAGIC);
refnum *= 2;
assert(refnum + 1 < MAX_OV_USED);
assert(refnum + 1 < MAX_OV);
if (ov->ovector[refnum] == -1)
return fallback;
......@@ -255,9 +301,9 @@ get_limits(const struct vmod_re_regex *re, struct vre_limits *limits,
limits->match = re->vre_limits.match;
if (limit_recursion > 0)
limits->match_recursion = limit_recursion;
limits->depth = limit_recursion;
else
limits->match_recursion = re->vre_limits.match_recursion;
limits->depth = re->vre_limits.depth;
return (limits);
}
......@@ -308,8 +354,8 @@ vmod_match_dyn(VRT_CTX, struct vmod_priv *task, VCL_STRING pattern,
VCL_STRING subject, VCL_INT limit, VCL_INT limit_recursion)
{
vre_t *vre;
char errbuf[VRE_ERROR_LEN];
int erroffset;
const char *error;
VCL_BOOL dyn_return;
struct vre_limits limits;
......@@ -329,14 +375,14 @@ vmod_match_dyn(VRT_CTX, struct vmod_priv *task, VCL_STRING pattern,
}
limits.match = limit;
limits.match_recursion = limit_recursion;
limits.depth = limit_recursion;
task->len = 0;
vre = VRE_compile(pattern, 0, &error, &erroffset);
vre = re_compile(pattern, 0, errbuf, sizeof errbuf, &erroffset);
if (vre == NULL) {
VSLb(ctx->vsl, SLT_VCL_Error,
"vmod re: error compiling regex \"%s\": %s (position %d)",
pattern, error, erroffset);
pattern, errbuf, erroffset);
return 0;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment