Changes to work with pcre2 directly for match()

Based on the VRE_unpack() PR in
https://github.com/varnishcache/varnish-cache/pull/3657

As discussed with Geoff, we might still switch to VRE_capture()
once it is cooked, see https://github.com/varnishcache/varnish-cache/pull/3659
parent 96250e04
.. ..
.. NB: This file is machine generated, DO NOT EDIT! .. NB: This file is machine generated, DO NOT EDIT!
.. ..
.. Edit vmod.vcc and run make instead .. Edit ../src/vmod_re.vcc and run make instead
.. ..
.. role:: ref(emphasis) .. role:: ref(emphasis)
======= =======
VMOD re vmod_re
======= =======
------------------------------------------------------------------------- -------------------------------------------------------------------------
......
...@@ -29,30 +29,21 @@ ...@@ -29,30 +29,21 @@
#include "config.h" #include "config.h"
#define PCRE2_CODE_UNIT_WIDTH 8
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "cache/cache.h" #include "cache/cache.h"
#include "vcl.h" #include "vcl.h"
#include "vre.h" #include "vre.h"
#include "vre_pcre2.h"
#include "vsb.h"
#include "vcc_if.h" #include "vcc_if.h"
/* pcreapi(3):
*
* The first two-thirds of the vector is used to pass back captured
* substrings, each substring using a pair of integers. The remaining
* third of the vector is used as workspace by pcre_exec() while matching
* capturing subpatterns, and is not available for passing back
* information.
*
* XXX: if vre were to expose the pcre and pcre_extra objects, then we
* could use pcre_fullinfo() to determine the highest backref for each
* regex, and wouldn't need this arbitrary limit ...
*/
#define MAX_MATCHES 11 #define MAX_MATCHES 11
#define MAX_OV ((MAX_MATCHES) * 3) #define MAX_OV ((MAX_MATCHES) * 2)
#define MAX_OV_USED ((MAX_MATCHES) * 2)
struct vmod_re_regex { struct vmod_re_regex {
unsigned magic; unsigned magic;
...@@ -65,7 +56,7 @@ typedef struct ov_s { ...@@ -65,7 +56,7 @@ typedef struct ov_s {
unsigned magic; unsigned magic;
#define OV_MAGIC 0x844bfa39 #define OV_MAGIC 0x844bfa39
const char *subject; const char *subject;
int ovector[MAX_OV_USED]; int ovector[MAX_OV];
} ov_t; } ov_t;
static void static void
...@@ -82,12 +73,32 @@ errmsg(VRT_CTX, const char *fmt, ...) ...@@ -82,12 +73,32 @@ errmsg(VRT_CTX, const char *fmt, ...)
va_end(args); va_end(args);
} }
static vre_t *
re_compile(const char *pattern, unsigned options, char *errbuf,
size_t errbufsz, int *erroffset)
{
static vre_t *vre;
struct vsb vsb[1];
int errcode;
vre = VRE_compile(pattern, options, &errcode, erroffset, 1);
if (vre != NULL)
return (vre);
AN(VSB_init(vsb, errbuf, errbufsz));
AZ(VRE_error(vsb, errcode));
AZ(VSB_finish(vsb));
VSB_fini(vsb);
return (NULL);
}
VCL_VOID VCL_VOID
vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name, vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name,
VCL_STRING pattern, VCL_INT limit, VCL_INT limit_recursion) VCL_STRING pattern, VCL_INT limit, VCL_INT limit_recursion)
{ {
struct vmod_re_regex *re; struct vmod_re_regex *re;
vre_t *vre; vre_t *vre;
char errbuf[VRE_ERROR_LEN];
int erroffset; int erroffset;
const char *error; const char *error;
...@@ -109,10 +120,11 @@ vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name, ...@@ -109,10 +120,11 @@ vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name,
return; return;
} }
if ((vre = VRE_compile(pattern, 0, &error, &erroffset)) == NULL) { vre = re_compile(pattern, 0, errbuf, sizeof errbuf, &erroffset);
if (vre == NULL) {
VRT_fail(ctx, "vmod re: error compiling regex \"%s\" in %s " VRT_fail(ctx, "vmod re: error compiling regex \"%s\" in %s "
"constructor: %s (at offset %d)", pattern, vcl_name, "constructor: %s (at offset %d)", pattern, vcl_name,
error, erroffset); errbuf, erroffset);
return; return;
} }
...@@ -120,7 +132,7 @@ vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name, ...@@ -120,7 +132,7 @@ vmod_regex__init(VRT_CTX, struct vmod_re_regex **rep, const char *vcl_name,
AN(re); AN(re);
re->vre = vre; re->vre = vre;
re->vre_limits.match = limit; re->vre_limits.match = limit;
re->vre_limits.match_recursion = limit_recursion; re->vre_limits.depth = limit_recursion;
*rep = re; *rep = re;
} }
...@@ -144,8 +156,12 @@ match(VRT_CTX, const vre_t *vre, VCL_STRING subject, struct vmod_priv *task, ...@@ -144,8 +156,12 @@ match(VRT_CTX, const vre_t *vre, VCL_STRING subject, struct vmod_priv *task,
const struct vre_limits *vre_limits) const struct vre_limits *vre_limits)
{ {
ov_t *ov; ov_t *ov;
int s, nov[MAX_OV]; int i, r = 0, s, nov[MAX_OV];
size_t cp, len; size_t cp;
pcre2_match_context *re_ctx;
pcre2_match_data *data;
pcre2_code *re;
PCRE2_SIZE *ovector;
AN(vre); AN(vre);
if (subject == NULL) if (subject == NULL)
...@@ -167,30 +183,60 @@ match(VRT_CTX, const vre_t *vre, VCL_STRING subject, struct vmod_priv *task, ...@@ -167,30 +183,60 @@ match(VRT_CTX, const vre_t *vre, VCL_STRING subject, struct vmod_priv *task,
CAST_OBJ_NOTNULL(ov, task->priv, OV_MAGIC); CAST_OBJ_NOTNULL(ov, task->priv, OV_MAGIC);
} }
len = strlen(subject); // BEGIN duplication with vre
s = VRE_exec(vre, subject, len, 0, 0, nov, MAX_OV, vre_limits); re = VRE_unpack(vre);
if (s <= VRE_ERROR_NOMATCH) { AN(re);
if (s < VRE_ERROR_NOMATCH) data = pcre2_match_data_create_from_pattern(re, NULL);
if (data == NULL) {
VRT_fail(ctx, "vmod_re: failed to create match data");
return 0;
}
// END duplication with vre
// BEGIN unneeded overhead (unless we get access to re_ctx also)
re_ctx = pcre2_match_context_create(NULL);
if (re_ctx == NULL) {
VRT_fail(ctx, "vmod_re: failed to create context");
goto out;
}
pcre2_set_depth_limit(re_ctx, vre_limits->depth);
pcre2_set_match_limit(re_ctx, vre_limits->match);
// END unneeded overhead
s = pcre2_match(re, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
0, data, re_ctx);
if (s <= PCRE2_ERROR_NOMATCH) {
if (s < PCRE2_ERROR_NOMATCH)
VSLb(ctx->vsl, SLT_VCL_Error, VSLb(ctx->vsl, SLT_VCL_Error,
"vmod re: regex match returned %d", s); "vmod re: regex match returned %d", s);
return 0; goto out;
} }
if (s == 0) { if (s > MAX_MATCHES) {
VSLb(ctx->vsl, SLT_VCL_Error, VSLb(ctx->vsl, SLT_VCL_Error,
"vmod re: capturing substrings exceed max %d", "vmod re: capturing substrings exceed max %d",
MAX_MATCHES - 1); MAX_MATCHES - 1);
s = MAX_MATCHES; s = MAX_MATCHES;
} }
ovector = pcre2_get_ovector_pointer(data);
assert (s <= pcre2_get_ovector_count(data));
task->len = sizeof(*ov); task->len = sizeof(*ov);
ov->subject = subject; ov->subject = subject;
memset(ov->ovector, -1, sizeof(ov->ovector)); for (i = 0; i < s * 2; i++)
cp = s; ov->ovector[i] = ovector[i];
cp *= 2 * sizeof(*nov); for ( ; i < MAX_OV; i++)
assert(cp <= sizeof(ov->ovector)); ov->ovector[i] = -1;
memcpy(ov->ovector, nov, cp);
return 1; r = 1;
out: // XXX goto because this might be throw-away code
AN(data);
pcre2_match_data_free(data);
if (re_ctx != NULL)
pcre2_match_context_free(re_ctx);
return (r);
} }
static VCL_STRING static VCL_STRING
...@@ -223,7 +269,7 @@ backref(VRT_CTX, VCL_INT refnum, VCL_STRING fallback, ...@@ -223,7 +269,7 @@ backref(VRT_CTX, VCL_INT refnum, VCL_STRING fallback,
CAST_OBJ_NOTNULL(ov, task->priv, OV_MAGIC); CAST_OBJ_NOTNULL(ov, task->priv, OV_MAGIC);
refnum *= 2; refnum *= 2;
assert(refnum + 1 < MAX_OV_USED); assert(refnum + 1 < MAX_OV);
if (ov->ovector[refnum] == -1) if (ov->ovector[refnum] == -1)
return fallback; return fallback;
...@@ -255,9 +301,9 @@ get_limits(const struct vmod_re_regex *re, struct vre_limits *limits, ...@@ -255,9 +301,9 @@ get_limits(const struct vmod_re_regex *re, struct vre_limits *limits,
limits->match = re->vre_limits.match; limits->match = re->vre_limits.match;
if (limit_recursion > 0) if (limit_recursion > 0)
limits->match_recursion = limit_recursion; limits->depth = limit_recursion;
else else
limits->match_recursion = re->vre_limits.match_recursion; limits->depth = re->vre_limits.depth;
return (limits); return (limits);
} }
...@@ -308,8 +354,8 @@ vmod_match_dyn(VRT_CTX, struct vmod_priv *task, VCL_STRING pattern, ...@@ -308,8 +354,8 @@ vmod_match_dyn(VRT_CTX, struct vmod_priv *task, VCL_STRING pattern,
VCL_STRING subject, VCL_INT limit, VCL_INT limit_recursion) VCL_STRING subject, VCL_INT limit, VCL_INT limit_recursion)
{ {
vre_t *vre; vre_t *vre;
char errbuf[VRE_ERROR_LEN];
int erroffset; int erroffset;
const char *error;
VCL_BOOL dyn_return; VCL_BOOL dyn_return;
struct vre_limits limits; struct vre_limits limits;
...@@ -329,14 +375,14 @@ vmod_match_dyn(VRT_CTX, struct vmod_priv *task, VCL_STRING pattern, ...@@ -329,14 +375,14 @@ vmod_match_dyn(VRT_CTX, struct vmod_priv *task, VCL_STRING pattern,
} }
limits.match = limit; limits.match = limit;
limits.match_recursion = limit_recursion; limits.depth = limit_recursion;
task->len = 0; task->len = 0;
vre = VRE_compile(pattern, 0, &error, &erroffset); vre = re_compile(pattern, 0, errbuf, sizeof errbuf, &erroffset);
if (vre == NULL) { if (vre == NULL) {
VSLb(ctx->vsl, SLT_VCL_Error, VSLb(ctx->vsl, SLT_VCL_Error,
"vmod re: error compiling regex \"%s\": %s (position %d)", "vmod re: error compiling regex \"%s\": %s (position %d)",
pattern, error, erroffset); pattern, errbuf, erroffset);
return 0; return 0;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment