For body matches, use temporary space on heap rather than workspace

The main reason is an upcoming improvement which needs workspace.

But also using workspace here probably does not come with too relevant
an advantage, because regular expression matching and copying are
already expensive, and the need to copy should be the exception.
parent 7668930a
......@@ -463,12 +463,21 @@ struct re_iter_priv {
size_t len;
};
static int
reip_free(struct re_iter_priv *reip, int ret)
{
CHECK_OBJ_NOTNULL(reip, RE_ITER_PRIV_MAGIC);
free(reip->buf);
reip->buf = NULL;
reip->len = 0;
return (ret);
}
static int v_matchproto_(objiterate_f)
match_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
{
struct re_iter_priv *reip;
VCL_STRING subject;
void *p;
ov_t *ov;
int i;
......@@ -481,8 +490,10 @@ match_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
#endif
/* already have a match ? */
if (reip->s > PCRE2_ERROR_NOMATCH)
if (reip->s > PCRE2_ERROR_NOMATCH) {
AZ(reip->buf);
return (0);
}
if (reip->s == PCRE2_ERROR_NOMATCH) {
AZ(reip->startoffset);
......@@ -493,11 +504,8 @@ match_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
}
else if (reip->s == PCRE2_ERROR_PARTIAL) {
AN(reip->buf);
if (reip->len + len > WS_ReservationSize(reip->ctx->ws)) {
errmsg(reip->ctx, "vmod re: insufficient workspace "
"while iterating (append)");
return (-1);
}
reip->buf = realloc(reip->buf, reip->len + len);
AN(reip->buf);
memcpy(reip->buf + reip->len, ptr, len);
reip->len += len;
......@@ -527,15 +535,11 @@ match_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
return (0);
if (reip->s == PCRE2_ERROR_PARTIAL) {
if (len > WS_ReservationSize(reip->ctx->ws)) {
errmsg(reip->ctx, "vmod re: insufficient workspace "
"for partial copy");
return (-1);
}
reip->buf = WS_Reservation(reip->ctx->ws);
AZ(reip->buf);
reip->buf = malloc(len);
AN(reip->buf);
memcpy(reip->buf, ptr, len);
reip->len = len;
reip->startoffset = ov->ovector[0];
return (0);
......@@ -543,42 +547,40 @@ match_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
reip->options |= PCRE2_NOTBOL;
if (reip->s < PCRE2_ERROR_PARTIAL)
return (1);
if (reip->s < PCRE2_ERROR_PARTIAL) {
return (reip_free(reip, 1));
}
if (reip->s == PCRE2_ERROR_NOMATCH) {
reip->startoffset = 0;
reip->buf = NULL;
reip->len = 0;
return (0);
return (reip_free(reip, 0));
}
assert(reip->s > PCRE2_ERROR_NOMATCH);
if (reip->buf == NULL && (flush & OBJ_ITER_FLUSH) == 0) {
/* no need to copy */
WS_Release(reip->ctx->ws, 0);
/* no buffer && no flush -> no copy needed */
if (reip->buf == NULL && (flush & OBJ_ITER_FLUSH) == 0)
return (0);
}
len = (ssize_t)ov->ovector[1] - (ssize_t)ov->ovector[0];
/*
* we have a match, and we either can not hold on to the memory (because
* OBJ_ITER_FLUSH), or it is in our temporary buffer, which we want to
* free. Copy the match out to workspace
*/
assert(ov->ovector[0] >= 0);
assert(ov->ovector[1] >= ov->ovector[0]);
len = (typeof(len))ov->ovector[1] - (typeof(len))ov->ovector[0];
assert(len >= 0);
if (reip->buf == NULL && len > WS_ReservationSize(reip->ctx->ws)) {
ov->subject = (len == 0) ? "" :
WS_Copy(reip->ctx->ws, subject + ov->ovector[0], len);
if (ov->subject == NULL) {
errmsg(reip->ctx, "vmod re: insufficient workspace "
"for match copy");
WS_Release(reip->ctx->ws, 0);
return (-1);
"for match copy");
return (reip_free(reip, -1));
}
ov->subject = p = WS_Reservation(reip->ctx->ws);
if (reip->buf == NULL)
memcpy(p, subject + ov->ovector[0], len);
else
memmove(p, subject + ov->ovector[0], len);
WS_Release(reip->ctx->ws, len);
/* we have copied subject from start of match, fix all offsets */
len = ov->ovector[0];
for (i = 0; i < reip->s * 2; i++) {
......@@ -586,7 +588,7 @@ match_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
ov->ovector[i] -= len;
}
return (0);
return (reip_free(reip, 0));
}
VCL_BOOL
......@@ -620,11 +622,6 @@ vmod_regex_match_body(VRT_CTX, struct vmod_re_regex *re, VCL_ENUM which,
if (task->priv == NULL)
return (0);
if (! WS_ReserveAll(ctx->ws)) {
errmsg(ctx, "vmod re: no workspace reservation possible");
return (0);
}
INIT_OBJ(reip, RE_ITER_PRIV_MAGIC);
reip->options = PCRE2_PARTIAL_HARD;
reip->s = PCRE2_ERROR_NOMATCH;
......@@ -667,6 +664,8 @@ vmod_regex_match_body(VRT_CTX, struct vmod_re_regex *re, VCL_ENUM which,
if (reip->s == PCRE2_ERROR_PARTIAL)
(void) match_iter_f(reip, OBJ_ITER_END, "", 0);
AZ(reip->buf);
assert(reip->s != PCRE2_ERROR_PARTIAL);
if (reip->s > PCRE2_ERROR_NOMATCH) {
......@@ -676,7 +675,6 @@ vmod_regex_match_body(VRT_CTX, struct vmod_re_regex *re, VCL_ENUM which,
// errror or no match
memset(reip, 0, sizeof *reip);
WS_Release(ctx->ws, 0);
return (0);
}
......
......@@ -204,7 +204,7 @@ Description
method to reduce memory requirements. In particular, unlike
implementations in other vmods, this implementation does _not_
read the full body object into a contiguous memory region. It
might, however, require up to roughly as much workspace as all
might, however, require as much temporary heap space as all
body segments which the match found by the pattern spans.
Under ideal conditions, when the pattern spans only a single
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment