subreqs_from_body - simplified and memory-saving request creation

parent 7d378554
......@@ -24,6 +24,7 @@ stamp-h1
# test suite
zfr_iter_test
*.log
*.trs
......
......@@ -7,12 +7,21 @@ vmod_LTLIBRARIES = \
libvmod_zipflow_la_LDFLAGS = $(VMOD_LDFLAGS) $(ZLIB_LIBS)
libvmod_zipflow_la_SOURCES = \
vmod_zipflow.c
vmod_zipflow.c \
zfr_iter.c \
zfr_iter.h
nodist_libvmod_zipflow_la_SOURCES = \
../foreign/zipflow/zipflow.c \
vcc_zipflow_if.c \
vcc_zipflow_if.h
noinst_PROGRAMS = zfr_iter_test
zfr_iter_test_SOURCES = zfr_iter.c zfr_iter.h
zfr_iter_test_CFLAGS = $(AM_CFLAGS) -DTEST_DRIVER
zfr_iter_test_LDFLAGS = $(VMOD_LDFLAGS) $(ZLIB_LIBS) \
$(VARNISHSRC)/lib/libvarnish/libvarnish.la
@BUILD_VMOD_ZIPFLOW@
# Test suite
......@@ -27,11 +36,13 @@ AM_VTC_LOG_FLAGS = \
-p vmod_path="$(abs_builddir)/.libs:$(vmoddir):$(VARNISHAPI_VMODDIR)"
TESTS = \
zfr_iter_test \
vtc/vmod_zipflow.vtc \
vtc/empty.vtc \
vtc/coverage.vtc \
vtc/sub.vtc \
vtc/sub-coalesce.vtc
vtc/sub-coalesce.vtc \
vtc/sub-body.vtc
# Documentation
......
......@@ -13,6 +13,7 @@
// we can not change external interfaces / code
-efile(766, *_if.c)
-emacro(776, RUP2) // macro is not for int
-emacro(835, VCT*)
// must always be included to ensure sanity
-efile(766, config.h)
......
......@@ -38,11 +38,12 @@
#include <vtim.h>
#include <VSC_main.h>
#include "vcc_zipflow_if.h"
#include "zlib.h"
#include "zipflow.h"
#include "vcc_zipflow_if.h"
#include "zfr_iter.h"
static void __attribute__((constructor))
assert_zlib(void)
{
......@@ -88,6 +89,9 @@ VSTAILQ_HEAD(zipflow_head, zipflow_request);
struct zipflow_top {
unsigned magic;
#define ZIPFLOW_TOP_MAGIC 0x5743145e
unsigned body_req:1;
unsigned body_resp:1;
struct zfr_iter_s *zis;
struct zipflow_head head;
ZIP *zip;
struct req *req;
......@@ -146,14 +150,12 @@ static const struct vdp vdp_zipsub[1] = {{
.priv1 = NULL
}};
static struct zipflow_request *
new_zipflow_request(VRT_CTX, struct zipflow_top *zft)
static void
init_zipflow_request(struct zipflow_request *zfr, struct zipflow_top *zft)
{
struct zipflow_request *zfr;
WS_TOP_ALLOC_OBJ(ctx, zfr, ZIPFLOW_REQUEST_MAGIC);
if (zfr == NULL)
return (NULL);
CHECK_OBJ_NOTNULL(zfr, ZIPFLOW_REQUEST_MAGIC);
CHECK_OBJ_NOTNULL(zft, ZIPFLOW_TOP_MAGIC);
VSTAILQ_INSERT_TAIL(&zft->head, zfr, list);
......@@ -164,6 +166,18 @@ new_zipflow_request(VRT_CTX, struct zipflow_top *zft)
zfr->atime = -1;
zfr->mtime = -1;
zfr->top = zft;
}
static struct zipflow_request *
new_zipflow_request(VRT_CTX, struct zipflow_top *zft)
{
struct zipflow_request *zfr;
WS_TOP_ALLOC_OBJ(ctx, zfr, ZIPFLOW_REQUEST_MAGIC);
if (zfr == NULL)
return (NULL);
init_zipflow_request(zfr, zft);
return (zfr);
}
......@@ -227,6 +241,45 @@ vmod_subreq(VRT_CTX, VCL_STRING uri, VCL_STRING host)
zfr->uri = uri;
}
VCL_VOID
vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which)
{
struct zipflow_request *zfr;
struct zipflow_top *zft;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
if (vmod_is_subreq(ctx)) {
VRT_fail(ctx, "subreqs_from_body can not be "
"called from a subrequest");
return;
}
zfr = get_zipflow_request(ctx);
if (zfr == NULL)
return;
zft = zfr->top;
CHECK_OBJ_NOTNULL(zft, ZIPFLOW_TOP_MAGIC);
// because $Restrict
AN(ctx->req);
if (which == VENUM(req_body)) {
if (ctx->req->req_body_status != BS_CACHED) {
VRT_fail(ctx, "subreqs_from_body can only "
"be used with a cached request body "
"(see std.cache_req_body())");
return;
}
zft->body_req = 1;
}
else if (which == VENUM(resp_body)) {
zft->body_resp = 1;
zfr->bundle = 0;
} else
WRONG("which");
}
VCL_BOOL
vmod_is_subreq(VRT_CTX)
{
......@@ -418,6 +471,57 @@ vmod_event(VRT_CTX, struct vmod_priv *priv, enum vcl_event_e e)
return (0);
}
/*
* ============================================================
* Iterators: Iterate over a body to create sub requests
*/
static void
zfr_include(struct req *preq, struct zipflow_request *zfr);
static void
zfr_iter_include(void *priv, const char *u, const char *h)
{
struct zipflow_top *zft;
struct zipflow_request *zfr, this[1];
CAST_OBJ_NOTNULL(zft, priv, ZIPFLOW_TOP_MAGIC);
while ((zfr = VSTAILQ_FIRST(&zft->head)) != NULL)
zfr_include(zft->req, zfr);
INIT_OBJ(this, ZIPFLOW_REQUEST_MAGIC);
init_zipflow_request(this, zft);
this->host = h;
this->uri = u;
assert(this == VSTAILQ_FIRST(&zft->head));
while ((zfr = VSTAILQ_FIRST(&zft->head)) != NULL)
zfr_include(zft->req, zfr);
}
static void
zft_iter_req_body(struct zipflow_top *zft)
{
struct zfr_iter_s zis[1];
struct req *req;
CHECK_OBJ_NOTNULL(zft, ZIPFLOW_TOP_MAGIC);
req = zft->req;
CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
INIT_OBJ(zis, ZFR_ITER_MAGIC);
zis->zft = zft;
zis->func = zfr_iter_include;
zis->priv = zft;
if (zft->body_req) {
(void) VRB_Iterate(req->wrk, req->vsl, req,
zfr_iter, (void *)zis);
zfr_iter_fini(zis);
}
}
/*
* ============================================================
* VDP
......@@ -499,6 +603,19 @@ vdp_zipflow_init(VRT_CTX, struct vdp_ctx *vdc, void **priv, struct objcore *oc)
return (1);
}
if (zft->body_resp) {
zfr->bundle = 0;
WS_TOP_ALLOC_OBJ(ctx, zft->zis, ZFR_ITER_MAGIC);
if (zft->zis == NULL) {
VSLb(vdc->vsl, SLT_Error, "zipflow: workspace "
"overflow for body_resp");
return (1);
}
zft->zis->zft = zft;
zft->zis->func = zfr_iter_include;
zft->zis->priv = zft;
}
AZ(zft->req);
zft->req = vdc->req;
AZ(zft->zip);
......@@ -544,15 +661,17 @@ vdp_zipflow_fini(struct vdp_ctx *vdc, void **priv)
AN(priv);
zfr = *priv;
*priv = NULL;
if (zfr != NULL) {
CHECK_OBJ(zfr, ZIPFLOW_REQUEST_MAGIC);
zft = zfr->top;
CHECK_OBJ_NOTNULL(zft, ZIPFLOW_TOP_MAGIC);
r = zip_close(zft->zip);
if (r)
VSLb(vdc->vsl, SLT_Error, "zip_close returned %d", r);
memset(zfr, 0, sizeof *zfr);
}
if (zfr == NULL)
return (0);
CHECK_OBJ(zfr, ZIPFLOW_REQUEST_MAGIC);
zft = zfr->top;
CHECK_OBJ_NOTNULL(zft, ZIPFLOW_TOP_MAGIC);
r = zip_close(zft->zip);
if (r)
VSLb(vdc->vsl, SLT_Error, "zip_close returned %d", r);
memset(zfr, 0, sizeof *zfr);
return (0);
}
......@@ -583,9 +702,6 @@ vdp_zipsub_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
return (0);
}
static void
zfr_include(struct req *preq, struct zipflow_request *zfr);
static int
vdp_zipflow_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
const void *ptr, ssize_t len)
......@@ -594,12 +710,26 @@ vdp_zipflow_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
struct zipflow_top *zft;
int r, c;
r = vdp_zipsub_bytes(vdc, act, priv, ptr, len);
AN(priv);
CAST_OBJ_NOTNULL(zfr, *priv, ZIPFLOW_REQUEST_MAGIC);
zft = zfr->top;
if (zft->zis != NULL) {
r = zfr_iter(zft->zis, (act == VDP_END) ? OBJ_ITER_END : 0,
ptr, len);
}
else
r = vdp_zipsub_bytes(vdc, act, priv, ptr, len);
if (r == 0 && act != VDP_END)
return (r);
TAKE_OBJ_NOTNULL(zfr, priv, ZIPFLOW_REQUEST_MAGIC);
zft = zfr->top;
*priv = NULL;
if (zft->zis != NULL) {
zfr_iter_fini(zft->zis);
zft->zis = NULL;
}
memset(zfr, 0, sizeof *zfr);
CHECK_OBJ_NOTNULL(zft, ZIPFLOW_TOP_MAGIC);
......@@ -608,6 +738,8 @@ vdp_zipflow_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
zfr_include(zft->req, zfr);
}
zft_iter_req_body(zft);
c = zip_close(zft->zip);
if (c)
VSLb(vdc->vsl, SLT_Error, "zip_close returned %d", r);
......
......@@ -47,7 +47,7 @@ processing.
If *host* is omitted (default), it is taken from the parent request.
This function can be called any number of times to add multiple
files, it can eben be called from a sub request, which is to say that
files, it can even be called from a sub request, which is to say that
more files can be added while requests for files are processed.
The sub request can be identified using `zipflow.is_subreq()`_. In the
......@@ -57,6 +57,47 @@ used to control how zipflow handles the body.
Only sub requests with reponse status 200 will be included in the
resulting zip file.
$Function VOID subreqs_from_body(ENUM {req_body, resp_body } which)
$Restrict client
*Note* this function should eventually be superseded with something
more versatile.
Parse the given body for tokens in one of the following formats,
separated by any whitespace (``\\r\\n\\t\\s``)
* ``http://``\ *host*\ *url*
* ``https://``\ *host*\ *url*
* ``//``\ *host*\ *url*
* ``url``
with *host* containing any non-whitespace character except for ``/``
and *url* starting with ``/`` and run a sub request for each token as
if ``subreq(``\ *url*\ ``, ``\ *host*\ ``)`` was invoced, but not
using any workspace memory.
This function can only be called from the top level, that is, not from
a sub request.
For ``which = resp_body``, bundling of the current body is turned
off.
For ``which = req_body``, ``std.cache_req_body()`` must have been
called earlier.
The order of subrequests is:
* `zipflow.subreq()`_ initiated by the top level request
* ``resp_body``
* each subreq followed by any `zipflow.subreq()`_ initiated from it
* ``req_body``
* each subreq followed by any `zipflow.subreq()`_ initiated from it
$Function BOOL is_subreq()
$Restrict client
......
varnishtest "vmod-zipflow sub requests from bodies"
feature cmd "type curl && type unzip"
varnish v1 -vcl {
import zipflow;
import std;
backend proforma none;
sub vcl_recv {
std.cache_req_body(1M);
return (synth(200));
}
sub synth_top {
zipflow.subreq("/fromvcl");
synthetic(" /FIRST/FROM/RESP/file /file1");
zipflow.subreqs_from_body(resp_body);
zipflow.subreqs_from_body(req_body);
set resp.filters += " zipflow";
}
sub synth_sub {
synthetic("sub " + req.url);
}
sub vcl_synth {
if (zipflow.is_subreq()) {
call synth_sub;
zipflow.meta(name=req.url);
if (req.url ~ "/file1") {
zipflow.meta(name="file1.changed");
zipflow.subreq("/file3");
} else
if (req.url ~ "/file3") {
zipflow.subreq("/file4");
}
} else {
call synth_top;
}
return (deliver);
}
} -start
client c1 {
txreq -body {
/REQ/first/file
http://thishost/path/file1
https://thishost/another/file2
//thishost/file3
}
rxresp
expect resp.status == 200
txreq
rxresp
expect resp.status == 500
} -run
# all default
shell "curl --data-raw \"/REQ/CURL/first http://thishost/path/file1 https://thishost/another/file2 //thishost/xxx/file3\" -so t.zip -H 'Host: ${v1_addr}' http://${v1_addr}:${v1_port}/ && unzip -Z t.zip"
/*-
* Copyright 2022,2023 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Nils Goroll <nils.goroll@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* iterate over a body and call callback for anything which looks like a url
*/
#include "config.h"
#include <string.h> // memset() for INIT_OBJ()
#include <stdlib.h> // strtoul()
#include <stdio.h>
#include <cache/cache.h>
#include <vct.h>
#include "zfr_iter.h"
/* This function exists because flexelint does not understand (p += strlen(s))
* in place
*/
static inline int
pincr(const char **p, size_t l)
{
*p += l;
return (1);
}
#define tok(p, e, s) ( \
pdiff(p,e) >= strlen(s) && \
strncmp(p, s, strlen(s)) == 0 && \
pincr(&p, strlen(s)) \
)
int
zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen)
{
const char *pp, *p, *e, *h, *u;
struct zfr_iter_s *zis;
char *hh, *uu;
size_t l;
CAST_OBJ_NOTNULL(zis, priv, ZFR_ITER_MAGIC);
AZ(zis->end_seen);
zis->end_seen = ((flush & OBJ_ITER_END) != 0);
#ifdef DEBUG
printf(
"buf %zu >%.*s<\t"
"ptr %zu >%.*s<\n",
zis->len, (int)zis->len, zis->buf,
len, (int)len, (const char *)ptr);
#endif
if (zis->len > 0)
AN(zis->buf);
if (alen == 0 && zis->len == 0)
return (0);
assert(alen >= 0);
l = (size_t) alen;
if (zis->len == 0)
p = ptr;
else {
zis->buf = realloc(zis->buf, zis->len + l);
AN(zis->buf);
memcpy(zis->buf + zis->len, ptr, l);
zis->len += l;
p = zis->buf;
l = zis->len;
}
AN(p);
pp = p;
e = p + l;
while (p < e) {
if (vct_islws(*p)) {
p++;
continue;
}
if (tok(p, e, "https://") ||
tok(p, e, "http://")) {
p -= 2;
// no need to keep this prefix in the buffer
pp = p;
}
h = NULL;
if (tok(p, e, "//")) {
h = u = p;
p -= 2;
while (u < e && *u != '/')
u++;
} else
u = p;
while (p < e && !vct_islws(*p))
p++;
if (*u != '/')
break;
if (p < e || flush & OBJ_ITER_END) {
/* match! */
hh = NULL;
if (h) {
assert(u > h);
hh = strndup(h, pdiff(h, u));
AN(hh);
}
assert(p > u);
uu = strndup(u, pdiff(u, p));
AN(uu);
AN(zis->func);
zis->func(zis->priv, uu, hh);
free(hh);
free(uu);
pp = p;
}
}
assert(e >= pp);
l = pdiff(pp, e);
if (flush & OBJ_ITER_END || l == 0) {
zis->len = 0;
return (0);
}
if (zis->len)
memmove(zis->buf, pp, l);
else {
zis->buf = realloc(zis->buf, l);
memcpy(zis->buf, pp, l);
}
zis->len = l;
return (0);
}
void
zfr_iter_fini(struct zfr_iter_s *zis)
{
CHECK_OBJ_NOTNULL(zis, ZFR_ITER_MAGIC);
if (zis->len > 0 && zis->end_seen == 0)
(void) zfr_iter(zis, OBJ_ITER_END, "", (ssize_t)0);
free(zis->buf);
zis->buf = NULL;
zis->len = 0;
}
#ifdef TEST_DRIVER
#include <stdio.h>
struct expect {
const char *u, *h, **pfx;
};
const char *pfx_host[7] = {
"https://",
"http://",
"//",
" https://",
" http://",
" //",
NULL
};
const char *pfx_nohost[2] = {
"",
NULL
};
static struct expect testcase[7] = {
{ .u = "/url", .h = "host", .pfx = pfx_host },
{ .u = "/", .h = "host", .pfx = pfx_host },
{ .u = "///", .h = "ho-t", .pfx = pfx_host },
{ .u = "/url", .h = NULL, .pfx = pfx_nohost },
{ .u = "/", .h = NULL, .pfx = pfx_nohost },
{ .u = "/a", .h = NULL, .pfx = pfx_nohost },
{ NULL }
};
struct cb_want_priv {
unsigned magic;
#define CBW_MAGIC 0x79d8b905
unsigned count;
const struct expect *e;
};
static void
cb_not(void *priv, const char *u, const char *h)
{
WRONG("not to be called");
}
static void
cb_want(void *priv, const char *u, const char *h)
{
struct cb_want_priv *p;
const struct expect *e;
CAST_OBJ_NOTNULL(p, priv, CBW_MAGIC);
e = p->e;
if (e->u == NULL)
AZ(u);
else
AZ(strcmp(e->u, u));
if (e->h == NULL)
AZ(h);
else
AZ(strcmp(e->h, h));
p->count++;
}
static void
t_steps(const struct expect *want, unsigned n, const char *s)
{
struct cb_want_priv wp[1];
struct zfr_iter_s zis[1];
size_t l, step;
const char *t;
INIT_OBJ(zis, ZFR_ITER_MAGIC);
INIT_OBJ(wp, CBW_MAGIC);
wp->e = want;
zis->priv = wp;
zis->func = cb_want;
for (step = 1; step < strlen(s); step++) {
wp->count = 0;
zis->end_seen = 0;
for (t = s, l = strlen(t);
l > step;
l -= step, t += step)
zfr_iter(zis, 0, t, step);
AN(l);
zfr_iter(zis, OBJ_ITER_END, t, l);
AN(zis->end_seen);
assert(wp->count == n);
}
}
int
main(void) {
const struct expect *want;
struct zfr_iter_s zis[1];
const char **p, *t;
char s[256], u[64];
unsigned n;
size_t l;
// base test of tok()
strcpy(s, "https://");
t = s;
assert(tok(t, strchr(s, '\0'), "https://"));
for (want = testcase; want->u != NULL; want++) {
INIT_OBJ(zis, ZFR_ITER_MAGIC);
zis->priv = NULL;
zis->func = cb_not;
zfr_iter(zis, 0, "", 0);
n = 0;
*s = '\0';
for (p = want->pfx; *p != NULL; p++) {
AN(p);
AN(*p);
if (**p == '\0')
l = snprintf(u, sizeof u, "\t%s", want->u);
else {
l = snprintf(u, sizeof u, "\t%s%s%s",
*p, want->h, want->u);
}
assert(l < sizeof u);
if (sizeof s - strlen(s) - 1 < l)
break;
(void) strcat(s, u);
n++;
t_steps(want, n, s);
}
}
zfr_iter_fini(zis);
printf("OK\n");
return (0);
}
#endif
/*-
* Copyright 2022,2023 UPLEX Nils Goroll Systemoptimierung
* All rights reserved
*
* Author: Nils Goroll <nils.goroll@uplex.de>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* iterate over a body and call callback for anything which looks like a url
*/
typedef void zfr_iter_f(void *priv, const char *u, const char *h);
struct zfr_iter_s {
unsigned magic;
#define ZFR_ITER_MAGIC 0x2d560bd8
unsigned end_seen;
struct zipflow_top *zft;
char *buf;
size_t len;
zfr_iter_f *func;
void *priv;
};
void zfr_iter_fini(struct zfr_iter_s *zis);
int zfr_iter(void *, unsigned flush,
const void *ptr, ssize_t len);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment