Add form parsing

parent 33336160
...@@ -47,6 +47,7 @@ TESTS = \ ...@@ -47,6 +47,7 @@ TESTS = \
vtc/sub.vtc \ vtc/sub.vtc \
vtc/sub-coalesce.vtc \ vtc/sub-coalesce.vtc \
vtc/sub-body.vtc \ vtc/sub-body.vtc \
vtc/sub-body-form.vtc \
vtc/ex1.vtc \ vtc/ex1.vtc \
vtc/ex2.vtc \ vtc/ex2.vtc \
vtc/ex3.vtc vtc/ex3.vtc
......
...@@ -107,6 +107,7 @@ struct zipflow_top { ...@@ -107,6 +107,7 @@ struct zipflow_top {
#define ZIPFLOW_TOP_MAGIC 0x5743145e #define ZIPFLOW_TOP_MAGIC 0x5743145e
unsigned body_req:1; unsigned body_req:1;
unsigned body_resp:1; unsigned body_resp:1;
objiterate_f *zfr_f;
struct zfr_iter_s *zis; struct zfr_iter_s *zis;
struct zipflow_head head; struct zipflow_head head;
ZIP *zip; ZIP *zip;
...@@ -268,7 +269,7 @@ vmod_subreq(VRT_CTX, VCL_STRING uri, VCL_STRING host) ...@@ -268,7 +269,7 @@ vmod_subreq(VRT_CTX, VCL_STRING uri, VCL_STRING host)
} }
VCL_VOID VCL_VOID
vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which) vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which, VCL_ENUM format)
{ {
struct zipflow_request *zfr; struct zipflow_request *zfr;
struct zipflow_top *zft; struct zipflow_top *zft;
...@@ -304,6 +305,13 @@ vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which) ...@@ -304,6 +305,13 @@ vmod_subreqs_from_body(VRT_CTX, VCL_ENUM which)
zfr->bundle = 0; zfr->bundle = 0;
} else } else
WRONG("which"); WRONG("which");
if (format == VENUM(whitespace))
zft->zfr_f = zfr_iter_ws;
else if (format == VENUM(form))
zft->zfr_f = zfr_iter_form;
else
WRONG("format");
} }
VCL_BOOL VCL_BOOL
...@@ -542,8 +550,9 @@ zft_iter_req_body(struct zipflow_top *zft) ...@@ -542,8 +550,9 @@ zft_iter_req_body(struct zipflow_top *zft)
zis->priv = zft; zis->priv = zft;
if (zft->body_req) { if (zft->body_req) {
AN(zft->zfr_f);
(void) VRB_Iterate(req->wrk, req->vsl, req, (void) VRB_Iterate(req->wrk, req->vsl, req,
zfr_iter_ws, (void *)zis); zft->zfr_f, (void *)zis);
zfr_iter_fini(zis); zfr_iter_fini(zis);
} }
} }
...@@ -764,7 +773,8 @@ vdp_zipflow_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv, ...@@ -764,7 +773,8 @@ vdp_zipflow_bytes(struct vdp_ctx *vdc, enum vdp_action act, void **priv,
zft = zfr->top; zft = zfr->top;
if (zft->zis != NULL) { if (zft->zis != NULL) {
r = zfr_iter_ws(zft->zis, (act == VDP_END) ? OBJ_ITER_END : 0, AN(zft->zfr_f);
r = zft->zfr_f(zft->zis, (act == VDP_END) ? OBJ_ITER_END : 0,
ptr, len); ptr, len);
} }
else else
......
...@@ -12,7 +12,7 @@ import zipflow \[as name\] \[from \"path\"\] ...@@ -12,7 +12,7 @@ import zipflow \[as name\] \[from \"path\"\]
VOID subreq(STRING url, STRING host) VOID subreq(STRING url, STRING host)
VOID subreqs_from_body(ENUM which) VOID subreqs_from_body(ENUM which, ENUM format)
BOOL is_subreq() BOOL is_subreq()
...@@ -147,15 +147,28 @@ handles the body. ...@@ -147,15 +147,28 @@ handles the body.
Only sub requests with reponse status 200 will be included in the Only sub requests with reponse status 200 will be included in the
resulting zip file. resulting zip file.
## VOID subreqs_from_body(ENUM {req_body, resp_body} which) {#zipflow.subreqs_from_body()} ## VOID subreqs_from_body(ENUM which, ENUM format) {#zipflow.subreqs_from_body()}
VOID subreqs_from_body(
ENUM {req_body, resp_body} which,
ENUM {whitespace, form} format=whitespace
)
Restricted to: `client`. Restricted to: `client`.
*Note* this function should eventually be superseded with something more *Note* this function should eventually be superseded with something more
versatile. versatile.
Parse the given body for tokens in one of the following formats, The given body is parsed according to the *format* argument, which can
separated by any whitespace (`\\r\\n\\t\\s`) be:
- `whitespace` (default) for separation of URLs by any whitespace
(`\\r\\n\\t\\s`), or
- `form` for `application/x-www-form-urlencoded`
(`key=value&key=value`\...) with values url-encoded where only
values are considered.
The tokens are expected to be in format
- `http://`*hosturl* - `http://`*hosturl*
- `https://`*hosturl* - `https://`*hosturl*
......
...@@ -134,15 +134,25 @@ used to control how zipflow handles the body. ...@@ -134,15 +134,25 @@ used to control how zipflow handles the body.
Only sub requests with reponse status 200 will be included in the Only sub requests with reponse status 200 will be included in the
resulting zip file. resulting zip file.
$Function VOID subreqs_from_body(ENUM {req_body, resp_body } which) $Function VOID subreqs_from_body(ENUM {req_body, resp_body } which,
ENUM {whitespace, form} format="whitespace")
$Restrict client $Restrict client
*Note* this function should eventually be superseded with something *Note* this function should eventually be superseded with something
more versatile. more versatile.
Parse the given body for tokens in one of the following formats, The given body is parsed according to the *format* argument, which can
separated by any whitespace (``\\r\\n\\t\\s``) be:
* ``whitespace`` (default) for separation of URLs by any whitespace
(``\\r\\n\\t\\s``), or
* ``form`` for ``application/x-www-form-urlencoded``
(``key=value&key=value``\ ...) with values url-encoded where only
values are considered.
The tokens are expected to be in format
* ``http://``\ *host*\ *url* * ``http://``\ *host*\ *url*
* ``https://``\ *host*\ *url* * ``https://``\ *host*\ *url*
......
varnishtest "vmod-zipflow sub requests from bodies post form"
feature cmd "type curl && type unzip"
varnish v1 -vcl {
import zipflow;
import std;
backend proforma none;
sub vcl_recv {
std.cache_req_body(1M);
return (synth(200));
}
sub synth_top {
zipflow.subreq("/fromvcl");
zipflow.subreq("/404");
synthetic(" aaa=/FIRST/FROM/RESP/file&bbb=/file1");
zipflow.subreqs_from_body(resp_body, format=form);
zipflow.subreqs_from_body(req_body, format=form);
set resp.filters = "zipflow";
}
sub synth_sub {
set resp.http.is-subreq = zipflow.is_subreq();
synthetic("sub " + req.url);
}
sub vcl_synth {
if (req.url == "/404") {
set resp.status = 404;
set resp.body = "404";
}
else if (zipflow.is_subreq()) {
call synth_sub;
zipflow.meta(name=req.url);
if (req.url ~ "/file1") {
zipflow.meta(name="file1.changed");
zipflow.subreq("/file3");
} else
if (req.url ~ "/file3") {
zipflow.subreq("/file4");
}
}
else {
call synth_top;
}
return (deliver);
}
} -start
logexpect l1 -v v1 -g request -q "ReqURL ~ \"/REQ/first/file\"" {
fail add * ReqURL "valid"
expect * * ReqURL "/fromvcl"
expect * * ReqURL "/404"
expect * * ReqURL "/FIRST/FROM/RESP/file"
expect * * ReqURL "/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/REQ/first/file"
expect * * ReqURL "/path/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/another/file2"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
fail clear
} -start
client c1 {
txreq -body {
foo=/REQ/first/file
&foo=http://thishost/path/file1&
in valid
https://invalid
// https://
&bazz=https://thishost/another/file2
&zrrr=//thishost/file3
}
rxresp
expect resp.status == 200
txreq
rxresp
expect resp.status == 500
} -run
logexpect l1 -wait
# all default
shell "curl --data-raw \"/REQ/CURL/first http://thishost/path/file1 https://thishost/another/file2 //thishost/xxx/file3\" -so t.zip -H 'Host: ${v1_addr}' http://${v1_addr}:${v1_port}/ && unzip -Z t.zip"
...@@ -204,13 +204,117 @@ zfr_iter_fini(struct zfr_iter_s *zis) ...@@ -204,13 +204,117 @@ zfr_iter_fini(struct zfr_iter_s *zis)
zis->len = 0; zis->len = 0;
} }
/* iterate over application/x-www-form-urlencoded
* by converting to whitespace format and passing to zfr_iter_ws()
*/
int
zfr_iter_form(void *priv, unsigned flush, const void *ptr, ssize_t alen)
{
struct zfr_iter_form_s *zfrm;
struct zfr_iter_s *zis;
const char *pp, *p, *e;
unsigned end;
size_t l;
int r = 0;
CAST_OBJ_NOTNULL(zis, priv, ZFR_ITER_MAGIC);
zfrm = &zis->form;
assert(alen >= 0);
l = (size_t) alen;
pp = ptr;
e = pp + l;
while (pp < e) {
switch (zfrm->state) {
case FRMST_KEY:
if (*pp != '=')
break;
zfrm->state = FRMST_VAL;
break;
case FRMST_VAL:
p = pp;
while (p < e &&
*p != '%' &&
*p != '&')
p++;
end = 0;
if (p != pp) {
end = (p == e) ? flush : 0;
r = zfr_iter_ws(priv, end,
pp, (ssize_t)pdiff(pp, p));
if (r)
return (r);
pp = p;
}
if (pp == e)
continue;
else if (*pp == '%')
zfrm->state = FRMST_PERC;
else if (end)
break;
else if (*pp == '&') {
r = zfr_iter_ws(priv, 0,
" ", (ssize_t)1);
if (r)
return (r);
zfrm->state = FRMST_KEY;
}
else
WRONG("impossible form val");
break;
case FRMST_PERC:
if (*pp >= '0' && *pp <= '9')
zfrm->hex = (unsigned char)*pp - '0';
else if (*pp >= 'a' && *pp <= 'f')
zfrm->hex = 0xa + (unsigned char)*pp - 'a';
else if (*pp >= 'A' && *pp <= 'F')
zfrm->hex = 0xa + (unsigned char)*pp - 'A';
else
return (-2);
zfrm->hex <<= 4;
zfrm->state = FRMST_PERC1;
break;
case FRMST_PERC1:
if (*pp >= '0' && *pp <= '9')
zfrm->hex |= *pp - '0';
else if (*pp >= 'a' && *pp <= 'f')
zfrm->hex |= 0xa + *pp - 'a';
else if (*pp >= 'A' && *pp <= 'F')
zfrm->hex |= 0xa + *pp - 'A';
else
return (-3);
end = (pp == e) ? flush : 0;
r = zfr_iter_ws(priv, end,
&zfrm->hex, (ssize_t)1);
if (r)
return (r);
zfrm->state = FRMST_VAL;
break;
default:
WRONG("zfrm->state");
}
pp++;
}
assert(pp == e);
AZ(r);
if ((flush & OBJ_ITER_END) != 0 && zis->end_seen == 0)
r = zfr_iter_ws(priv, flush, "", (ssize_t)0);
return (r);
}
#ifdef TEST_DRIVER #ifdef TEST_DRIVER
#include <stdio.h> #include <stdio.h>
struct expect { struct expect {
const char *u, *h, **pfx; const char *eu, *u, *h, **pfx;
}; };
/* testcase whitespace
* -------------
*/
static const char *pfx_ws_host[7] = { static const char *pfx_ws_host[7] = {
"https://", "https://",
"http://", "http://",
...@@ -227,7 +331,7 @@ static const char *pfx_ws_nohost[2] = { ...@@ -227,7 +331,7 @@ static const char *pfx_ws_nohost[2] = {
}; };
// incomplete tokens to skip // incomplete tokens to skip
static const char *invalid[10] = { static const char *invalid_ws[10] = {
// lone prefixes // lone prefixes
"https://", "https://",
"http:///", "http:///",
...@@ -254,6 +358,53 @@ static struct expect tc_ws[9] = { ...@@ -254,6 +358,53 @@ static struct expect tc_ws[9] = {
{ NULL } { NULL }
}; };
/* testcase form
* -------------
*/
static const char *pfx_form_host[7] = {
"https%3a%2F%2F",
"http%3a%2F%2F",
"%2F%2F",
" https%3a%2F%2F",
" http%3a%2F%2F",
" %2F%2F",
NULL
};
static const char *pfx_form_nohost[2] = {
"",
NULL
};
// incomplete tokens to skip
static const char *invalid_form[10] = {
// lone prefixes
"https%3a%2F%2F",
"http%3a%2F%2F%2F",
"%2F%2F",
"%2F%2F%2F",
"%2F%2F%2Fa",
"%2F%2Fa",
// partials
"http",
"invalid",
"a",
NULL
};
static struct expect tc_form[10] = {
{ .eu = "/url", .u = "/url", .h = "host", .pfx = pfx_form_host },
{ .eu = "/url", .u = "%2Furl", .h = "host", .pfx = pfx_form_host },
{ .eu = "/", .u = "%2F", .h = "host", .pfx = pfx_form_host },
{ .eu = "///", .u = "%2F%2F%2F", .h = "ho-t", .pfx = pfx_form_host },
{ .eu = "/", .u = "%2F", .h = "h", .pfx = pfx_form_host },
{ .eu = "///", .u = "%2F%2F%2F", .h = "h", .pfx = pfx_form_host },
{ .eu = "/url", .u = "%2Furl", .h = NULL, .pfx = pfx_form_nohost },
{ .eu = "/", .u = "%2F", .h = NULL, .pfx = pfx_form_nohost },
{ .eu = "/a", .u = "%2Fa", .h = NULL, .pfx = pfx_form_nohost },
{ NULL }
};
struct cb_want_priv { struct cb_want_priv {
unsigned magic; unsigned magic;
#define CBW_MAGIC 0x79d8b905 #define CBW_MAGIC 0x79d8b905
...@@ -267,8 +418,10 @@ cb_not(void *priv, const char *u, const char *h) ...@@ -267,8 +418,10 @@ cb_not(void *priv, const char *u, const char *h)
WRONG("not to be called"); WRONG("not to be called");
} }
typedef void want_f(void *priv, const char *u, const char *h);
static void static void
cb_want(void *priv, const char *u, const char *h) cb_want_ws(void *priv, const char *u, const char *h)
{ {
struct cb_want_priv *p; struct cb_want_priv *p;
const struct expect *e; const struct expect *e;
...@@ -294,7 +447,34 @@ cb_want(void *priv, const char *u, const char *h) ...@@ -294,7 +447,34 @@ cb_want(void *priv, const char *u, const char *h)
} }
static void static void
t_steps(const struct expect *want, unsigned n, const char *s) cb_want_form(void *priv, const char *u, const char *h)
{
struct cb_want_priv *p;
const struct expect *e;
CAST_OBJ_NOTNULL(p, priv, CBW_MAGIC);
e = p->e;
if (e->eu == NULL)
AZ(u);
else {
AN(u);
AZ(strcmp(e->eu, u));
}
if (e->h == NULL)
AZ(h);
else {
AN(h);
AZ(strcmp(e->h, h));
}
p->count++;
}
static void
t_steps(objiterate_f *f, want_f *wf,
const struct expect *want, unsigned n, const char *s)
{ {
struct cb_want_priv wp[1]; struct cb_want_priv wp[1];
struct zfr_iter_s zis[1]; struct zfr_iter_s zis[1];
...@@ -306,16 +486,16 @@ t_steps(const struct expect *want, unsigned n, const char *s) ...@@ -306,16 +486,16 @@ t_steps(const struct expect *want, unsigned n, const char *s)
wp->e = want; wp->e = want;
zis->priv = wp; zis->priv = wp;
zis->func = cb_want; zis->func = wf;
for (step = 1; step < strlen(s); step++) { for (step = 1; step < strlen(s); step++) {
wp->count = 0; wp->count = 0;
zis->end_seen = 0; zis->end_seen = 0;
for (t = s, l = strlen(t); for (t = s, l = strlen(t);
l > step; l > step;
l -= step, t += step) l -= step, t += step)
zfr_iter_ws(zis, 0, t, step); f(zis, 0, t, step);
AN(l); AN(l);
zfr_iter_ws(zis, OBJ_ITER_END, t, l); f(zis, OBJ_ITER_END, t, l);
AN(zis->end_seen); AN(zis->end_seen);
assert(wp->count == n); assert(wp->count == n);
} }
...@@ -325,7 +505,7 @@ int ...@@ -325,7 +505,7 @@ int
main(void) { main(void) {
const struct expect *want; const struct expect *want;
struct zfr_iter_s zis[1]; struct zfr_iter_s zis[1];
const char **p, *t, **inv = invalid; const char **p, *t, **inv = invalid_ws;
char s[256], u[64]; char s[256], u[64];
unsigned n; unsigned n;
size_t l; size_t l;
...@@ -346,7 +526,7 @@ main(void) { ...@@ -346,7 +526,7 @@ main(void) {
for (p = want->pfx; *p != NULL; p++, inv++) { for (p = want->pfx; *p != NULL; p++, inv++) {
AN(inv); AN(inv);
if (*inv == NULL) if (*inv == NULL)
inv = invalid; inv = invalid_ws;
AN(p); AN(p);
AN(*p); AN(*p);
if (**p == '\0') { if (**p == '\0') {
...@@ -365,7 +545,54 @@ main(void) { ...@@ -365,7 +545,54 @@ main(void) {
(void) strcat(s, u); (void) strcat(s, u);
n++; n++;
t_steps(want, n, s); t_steps(zfr_iter_ws, cb_want_ws, want, n, s);
if (*inv == NULL)
continue;
if (sizeof s - strlen(s) - 2 < strlen(*inv))
break;
(void) strcat(s, "\r");
(void) strcat(s, *inv);
t_steps(zfr_iter_ws, cb_want_ws, want, n, s);
inv++;
}
}
zfr_iter_fini(zis);
// form
inv = invalid_form;
for (want = tc_form; want->u != NULL; want++) {
INIT_OBJ(zis, ZFR_ITER_MAGIC);
zis->priv = NULL;
zis->func = cb_not;
zfr_iter_ws(zis, 0, "", 0);
n = 0;
*s = '\0';
for (p = want->pfx; *p != NULL; p++, inv++) {
AN(inv);
if (*inv == NULL)
inv = invalid_form;
AN(p);
AN(*p);
if (**p == '\0') {
l = snprintf(u, sizeof u, " %s\tfoo=%s&",
*inv, want->u);
}
else {
l = snprintf(u, sizeof u, " %s&bar=%s%s%s&",
*inv, *p, want->h, want->u);
}
assert(l < sizeof u);
inv++;
if (sizeof s - strlen(s) - 1 < l)
break;
(void) strcat(s, u);
n++;
t_steps(zfr_iter_form, cb_want_form, want, n, s);
if (*inv == NULL) if (*inv == NULL)
continue; continue;
...@@ -374,7 +601,7 @@ main(void) { ...@@ -374,7 +601,7 @@ main(void) {
break; break;
(void) strcat(s, "\r"); (void) strcat(s, "\r");
(void) strcat(s, *inv); (void) strcat(s, *inv);
t_steps(want, n, s); t_steps(zfr_iter_form, cb_want_form, want, n, s);
inv++; inv++;
} }
} }
......
...@@ -30,6 +30,18 @@ ...@@ -30,6 +30,18 @@
typedef void zfr_iter_f(void *priv, const char *u, const char *h); typedef void zfr_iter_f(void *priv, const char *u, const char *h);
enum zfr_iter_form_e {
FRMST_KEY = 0, // discarding, looking for =
FRMST_VAL, // in value after =
FRMST_PERC, // % seen
FRMST_PERC1 // %x seen, high in hex
};
struct zfr_iter_form_s {
enum zfr_iter_form_e state;
unsigned char hex;
};
struct zfr_iter_s { struct zfr_iter_s {
unsigned magic; unsigned magic;
#define ZFR_ITER_MAGIC 0x2d560bd8 #define ZFR_ITER_MAGIC 0x2d560bd8
...@@ -39,8 +51,11 @@ struct zfr_iter_s { ...@@ -39,8 +51,11 @@ struct zfr_iter_s {
size_t len; size_t len;
zfr_iter_f *func; zfr_iter_f *func;
void *priv; void *priv;
struct zfr_iter_form_s form;
}; };
void zfr_iter_fini(struct zfr_iter_s *zis); void zfr_iter_fini(struct zfr_iter_s *zis);
int zfr_iter_ws(void *, unsigned flush, int zfr_iter_ws(void *, unsigned flush,
const void *ptr, ssize_t len); const void *ptr, ssize_t len);
int zfr_iter_form(void *, unsigned flush,
const void *ptr, ssize_t len);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment