Add form parsing

parent 33336160
......@@ -47,6 +47,7 @@ TESTS = \
vtc/sub.vtc \
vtc/sub-coalesce.vtc \
vtc/sub-body.vtc \
vtc/sub-body-form.vtc \
vtc/ex1.vtc \
vtc/ex2.vtc \
vtc/ex3.vtc
......
......@@ -107,6 +107,7 @@ struct zipflow_top {
#define ZIPFLOW_TOP_MAGIC 0x5743145e
unsigned body_req:1;
unsigned body_resp:1;
objiterate_f *zfr_f;
struct zfr_iter_s *zis;
struct zipflow_head head;
ZIP *zip;
......@@ -268,7 +269,7 @@ vmod_subreq(VRT_CTX, VCL_STRING uri, VCL_STRING host)
}
VCL_VOID
vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which)
vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which, VCL_ENUM format)
{
struct zipflow_request *zfr;
struct zipflow_top *zft;
......@@ -304,6 +305,13 @@ vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which)
zfr->bundle = 0;
} else
WRONG("which");
if (format == VENUM(whitespace))
zft->zfr_f = zfr_iter_ws;
else if (format == VENUM(form))
zft->zfr_f = zfr_iter_form;
else
WRONG("format");
}
VCL_BOOL
......@@ -542,8 +550,9 @@ zft_iter_req_body(struct zipflow_top *zft)
zis->priv = zft;
if (zft->body_req) {
AN(zft->zfr_f);
(void) VRB_Iterate(req->wrk, req->vsl, req,
zfr_iter_ws, (void *)zis);
zft->zfr_f, (void *)zis);
zfr_iter_fini(zis);
}
}
......@@ -764,7 +773,8 @@ vdp_zipflow_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
zft = zfr->top;
if (zft->zis != NULL) {
r = zfr_iter_ws(zft->zis, (act == VDP_END) ? OBJ_ITER_END : 0,
AN(zft->zfr_f);
r = zft->zfr_f(zft->zis, (act == VDP_END) ? OBJ_ITER_END : 0,
ptr, len);
}
else
......
......@@ -12,7 +12,7 @@ import zipflow \[as name\] \[from \"path\"\]
VOID subreq(STRING url, STRING host)
VOID subreqs_from_body(ENUM which)
VOID subreqs_from_body(ENUM which, ENUM format)
BOOL is_subreq()
......@@ -147,15 +147,28 @@ handles the body.
Only sub requests with reponse status 200 will be included in the
resulting zip file.
## VOID subreqs_from_body(ENUM {req_body, resp_body} which) {#zipflow.subreqs_from_body()}
## VOID subreqs_from_body(ENUM which, ENUM format) {#zipflow.subreqs_from_body()}
VOID subreqs_from_body(
ENUM {req_body, resp_body} which,
ENUM {whitespace, form} format=whitespace
)
Restricted to: `client`.
*Note* this function should eventually be superseded with something more
versatile.
Parse the given body for tokens in one of the following formats,
separated by any whitespace (`\\r\\n\\t\\s`)
The given body is parsed according to the *format* argument, which can
be:
- `whitespace` (default) for separation of URLs by any whitespace
(`\\r\\n\\t\\s`), or
- `form` for `application/x-www-form-urlencoded`
(`key=value&key=value`\...) with values url-encoded where only
values are considered.
The tokens are expected to be in format
- `http://`*hosturl*
- `https://`*hosturl*
......
......@@ -134,15 +134,25 @@ used to control how zipflow handles the body.
Only sub requests with reponse status 200 will be included in the
resulting zip file.
$Function VOID subreqs_from_body(ENUM {req_body, resp_body } which)
$Function VOID subreqs_from_body(ENUM {req_body, resp_body } which,
ENUM {whitespace, form} format="whitespace")
$Restrict client
*Note* this function should eventually be superseded with something
more versatile.
Parse the given body for tokens in one of the following formats,
separated by any whitespace (``\\r\\n\\t\\s``)
The given body is parsed according to the *format* argument, which can
be:
* ``whitespace`` (default) for separation of URLs by any whitespace
(``\\r\\n\\t\\s``), or
* ``form`` for ``application/x-www-form-urlencoded``
(``key=value&key=value``\ ...) with values url-encoded where only
values are considered.
The tokens are expected to be in format
* ``http://``\ *host*\ *url*
* ``https://``\ *host*\ *url*
......
varnishtest "vmod-zipflow sub requests from bodies post form"
feature cmd "type curl && type unzip"
varnish v1 -vcl {
import zipflow;
import std;
backend proforma none;
sub vcl_recv {
std.cache_req_body(1M);
return (synth(200));
}
sub synth_top {
zipflow.subreq("/fromvcl");
zipflow.subreq("/404");
synthetic(" aaa=/FIRST/FROM/RESP/file&bbb=/file1");
zipflow.subreqs_from_body(resp_body, format=form);
zipflow.subreqs_from_body(req_body, format=form);
set resp.filters = "zipflow";
}
sub synth_sub {
set resp.http.is-subreq = zipflow.is_subreq();
synthetic("sub " + req.url);
}
sub vcl_synth {
if (req.url == "/404") {
set resp.status = 404;
set resp.body = "404";
}
else if (zipflow.is_subreq()) {
call synth_sub;
zipflow.meta(name=req.url);
if (req.url ~ "/file1") {
zipflow.meta(name="file1.changed");
zipflow.subreq("/file3");
} else
if (req.url ~ "/file3") {
zipflow.subreq("/file4");
}
}
else {
call synth_top;
}
return (deliver);
}
} -start
logexpect l1 -v v1 -g request -q "ReqURL ~ \"/REQ/first/file\"" {
fail add * ReqURL "valid"
expect * * ReqURL "/fromvcl"
expect * * ReqURL "/404"
expect * * ReqURL "/FIRST/FROM/RESP/file"
expect * * ReqURL "/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/REQ/first/file"
expect * * ReqURL "/path/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/another/file2"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
fail clear
} -start
client c1 {
txreq -body {
foo=/REQ/first/file
&foo=http://thishost/path/file1&
in valid
https://invalid
// https://
&bazz=https://thishost/another/file2
&zrrr=//thishost/file3
}
rxresp
expect resp.status == 200
txreq
rxresp
expect resp.status == 500
} -run
logexpect l1 -wait
# all default
shell "curl --data-raw \"/REQ/CURL/first http://thishost/path/file1 https://thishost/another/file2 //thishost/xxx/file3\" -so t.zip -H 'Host: ${v1_addr}' http://${v1_addr}:${v1_port}/ && unzip -Z t.zip"
......@@ -204,13 +204,117 @@ zfr_iter_fini(struct zfr_iter_s *zis)
zis->len = 0;
}
/* iterate over application/x-www-form-urlencoded
* by converting to whitespace format and passing to zfr_iter_ws()
*/
int
zfr_iter_form(void *priv, unsigned flush, const void *ptr, ssize_t alen)
{
struct zfr_iter_form_s *zfrm;
struct zfr_iter_s *zis;
const char *pp, *p, *e;
unsigned end;
size_t l;
int r = 0;
CAST_OBJ_NOTNULL(zis, priv, ZFR_ITER_MAGIC);
zfrm = &zis->form;
assert(alen >= 0);
l = (size_t) alen;
pp = ptr;
e = pp + l;
while (pp < e) {
switch (zfrm->state) {
case FRMST_KEY:
if (*pp != '=')
break;
zfrm->state = FRMST_VAL;
break;
case FRMST_VAL:
p = pp;
while (p < e &&
*p != '%' &&
*p != '&')
p++;
end = 0;
if (p != pp) {
end = (p == e) ? flush : 0;
r = zfr_iter_ws(priv, end,
pp, (ssize_t)pdiff(pp, p));
if (r)
return (r);
pp = p;
}
if (pp == e)
continue;
else if (*pp == '%')
zfrm->state = FRMST_PERC;
else if (end)
break;
else if (*pp == '&') {
r = zfr_iter_ws(priv, 0,
" ", (ssize_t)1);
if (r)
return (r);
zfrm->state = FRMST_KEY;
}
else
WRONG("impossible form val");
break;
case FRMST_PERC:
if (*pp >= '0' && *pp <= '9')
zfrm->hex = (unsigned char)*pp - '0';
else if (*pp >= 'a' && *pp <= 'f')
zfrm->hex = 0xa + (unsigned char)*pp - 'a';
else if (*pp >= 'A' && *pp <= 'F')
zfrm->hex = 0xa + (unsigned char)*pp - 'A';
else
return (-2);
zfrm->hex <<= 4;
zfrm->state = FRMST_PERC1;
break;
case FRMST_PERC1:
if (*pp >= '0' && *pp <= '9')
zfrm->hex |= *pp - '0';
else if (*pp >= 'a' && *pp <= 'f')
zfrm->hex |= 0xa + *pp - 'a';
else if (*pp >= 'A' && *pp <= 'F')
zfrm->hex |= 0xa + *pp - 'A';
else
return (-3);
end = (pp == e) ? flush : 0;
r = zfr_iter_ws(priv, end,
&zfrm->hex, (ssize_t)1);
if (r)
return (r);
zfrm->state = FRMST_VAL;
break;
default:
WRONG("zfrm->state");
}
pp++;
}
assert(pp == e);
AZ(r);
if ((flush & OBJ_ITER_END) != 0 && zis->end_seen == 0)
r = zfr_iter_ws(priv, flush, "", (ssize_t)0);
return (r);
}
#ifdef TEST_DRIVER
#include <stdio.h>
struct expect {
const char *u, *h, **pfx;
const char *eu, *u, *h, **pfx;
};
/* testcase whitespace
* -------------
*/
static const char *pfx_ws_host[7] = {
"https://",
"http://",
......@@ -227,7 +331,7 @@ static const char *pfx_ws_nohost[2] = {
};
// incomplete tokens to skip
static const char *invalid[10] = {
static const char *invalid_ws[10] = {
// lone prefixes
"https://",
"http:///",
......@@ -254,6 +358,53 @@ static struct expect tc_ws[9] = {
{ NULL }
};
/* testcase form
* -------------
*/
static const char *pfx_form_host[7] = {
"https%3a%2F%2F",
"http%3a%2F%2F",
"%2F%2F",
" https%3a%2F%2F",
" http%3a%2F%2F",
" %2F%2F",
NULL
};
static const char *pfx_form_nohost[2] = {
"",
NULL
};
// incomplete tokens to skip
static const char *invalid_form[10] = {
// lone prefixes
"https%3a%2F%2F",
"http%3a%2F%2F%2F",
"%2F%2F",
"%2F%2F%2F",
"%2F%2F%2Fa",
"%2F%2Fa",
// partials
"http",
"invalid",
"a",
NULL
};
static struct expect tc_form[10] = {
{ .eu = "/url", .u = "/url", .h = "host", .pfx = pfx_form_host },
{ .eu = "/url", .u = "%2Furl", .h = "host", .pfx = pfx_form_host },
{ .eu = "/", .u = "%2F", .h = "host", .pfx = pfx_form_host },
{ .eu = "///", .u = "%2F%2F%2F", .h = "ho-t", .pfx = pfx_form_host },
{ .eu = "/", .u = "%2F", .h = "h", .pfx = pfx_form_host },
{ .eu = "///", .u = "%2F%2F%2F", .h = "h", .pfx = pfx_form_host },
{ .eu = "/url", .u = "%2Furl", .h = NULL, .pfx = pfx_form_nohost },
{ .eu = "/", .u = "%2F", .h = NULL, .pfx = pfx_form_nohost },
{ .eu = "/a", .u = "%2Fa", .h = NULL, .pfx = pfx_form_nohost },
{ NULL }
};
struct cb_want_priv {
unsigned magic;
#define CBW_MAGIC 0x79d8b905
......@@ -267,8 +418,10 @@ cb_not(void *priv, const char *u, const char *h)
WRONG("not to be called");
}
typedef void want_f(void *priv, const char *u, const char *h);
static void
cb_want(void *priv, const char *u, const char *h)
cb_want_ws(void *priv, const char *u, const char *h)
{
struct cb_want_priv *p;
const struct expect *e;
......@@ -294,7 +447,34 @@ cb_want(void *priv, const char *u, const char *h)
}
static void
t_steps(const struct expect *want, unsigned n, const char *s)
cb_want_form(void *priv, const char *u, const char *h)
{
struct cb_want_priv *p;
const struct expect *e;
CAST_OBJ_NOTNULL(p, priv, CBW_MAGIC);
e = p->e;
if (e->eu == NULL)
AZ(u);
else {
AN(u);
AZ(strcmp(e->eu, u));
}
if (e->h == NULL)
AZ(h);
else {
AN(h);
AZ(strcmp(e->h, h));
}
p->count++;
}
static void
t_steps(objiterate_f *f, want_f *wf,
const struct expect *want, unsigned n, const char *s)
{
struct cb_want_priv wp[1];
struct zfr_iter_s zis[1];
......@@ -306,16 +486,16 @@ t_steps(const struct expect *want, unsigned n, const char *s)
wp->e = want;
zis->priv = wp;
zis->func = cb_want;
zis->func = wf;
for (step = 1; step < strlen(s); step++) {
wp->count = 0;
zis->end_seen = 0;
for (t = s, l = strlen(t);
l > step;
l -= step, t += step)
zfr_iter_ws(zis, 0, t, step);
f(zis, 0, t, step);
AN(l);
zfr_iter_ws(zis, OBJ_ITER_END, t, l);
f(zis, OBJ_ITER_END, t, l);
AN(zis->end_seen);
assert(wp->count == n);
}
......@@ -325,7 +505,7 @@ int
main(void) {
const struct expect *want;
struct zfr_iter_s zis[1];
const char **p, *t, **inv = invalid;
const char **p, *t, **inv = invalid_ws;
char s[256], u[64];
unsigned n;
size_t l;
......@@ -346,7 +526,7 @@ main(void) {
for (p = want->pfx; *p != NULL; p++, inv++) {
AN(inv);
if (*inv == NULL)
inv = invalid;
inv = invalid_ws;
AN(p);
AN(*p);
if (**p == '\0') {
......@@ -365,7 +545,54 @@ main(void) {
(void) strcat(s, u);
n++;
t_steps(want, n, s);
t_steps(zfr_iter_ws, cb_want_ws, want, n, s);
if (*inv == NULL)
continue;
if (sizeof s - strlen(s) - 2 < strlen(*inv))
break;
(void) strcat(s, "\r");
(void) strcat(s, *inv);
t_steps(zfr_iter_ws, cb_want_ws, want, n, s);
inv++;
}
}
zfr_iter_fini(zis);
// form
inv = invalid_form;
for (want = tc_form; want->u != NULL; want++) {
INIT_OBJ(zis, ZFR_ITER_MAGIC);
zis->priv = NULL;
zis->func = cb_not;
zfr_iter_ws(zis, 0, "", 0);
n = 0;
*s = '\0';
for (p = want->pfx; *p != NULL; p++, inv++) {
AN(inv);
if (*inv == NULL)
inv = invalid_form;
AN(p);
AN(*p);
if (**p == '\0') {
l = snprintf(u, sizeof u, " %s\tfoo=%s&",
*inv, want->u);
}
else {
l = snprintf(u, sizeof u, " %s&bar=%s%s%s&",
*inv, *p, want->h, want->u);
}
assert(l < sizeof u);
inv++;
if (sizeof s - strlen(s) - 1 < l)
break;
(void) strcat(s, u);
n++;
t_steps(zfr_iter_form, cb_want_form, want, n, s);
if (*inv == NULL)
continue;
......@@ -374,7 +601,7 @@ main(void) {
break;
(void) strcat(s, "\r");
(void) strcat(s, *inv);
t_steps(want, n, s);
t_steps(zfr_iter_form, cb_want_form, want, n, s);
inv++;
}
}
......
......@@ -30,6 +30,18 @@
typedef void zfr_iter_f(void *priv, const char *u, const char *h);
enum zfr_iter_form_e {
FRMST_KEY = 0, // discarding, looking for =
FRMST_VAL, // in value after =
FRMST_PERC, // % seen
FRMST_PERC1 // %x seen, high in hex
};
struct zfr_iter_form_s {
enum zfr_iter_form_e state;
unsigned char hex;
};
struct zfr_iter_s {
unsigned magic;
#define ZFR_ITER_MAGIC 0x2d560bd8
......@@ -39,8 +51,11 @@ struct zfr_iter_s {
size_t len;
zfr_iter_f *func;
void *priv;
struct zfr_iter_form_s form;
};
void zfr_iter_fini(struct zfr_iter_s *zis);
int zfr_iter_ws(void *, unsigned flush,
const void *ptr, ssize_t len);
int zfr_iter_form(void *, unsigned flush,
const void *ptr, ssize_t len);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment