Fix url parsing for zipflow.subreqs_from_body()

parent 3d299460
......@@ -70,13 +70,22 @@ separated by any whitespace (``\\r\\n\\t\\s``)
* ``http://``\ *host*\ *url*
* ``https://``\ *host*\ *url*
* ``//``\ *host*\ *url*
* ``url``
* *url*
with *host* containing any non-whitespace character except for ``/``
and *url* starting with ``/`` and run a sub request for each token as
if ``subreq(``\ *url*\ ``, ``\ *host*\ ``)`` was invoced, but not
using any workspace memory.
.. _RFC 3965: https://www.ietf.org/rfc/rfc3986.txt
*host* may not be empty. The syntactic ambiguity prevents *urls*
starting with ``///``.
(Note: *host* and *url* are used in varnish-cache terminology, in `RFC
3965`_ parlance, *host* is called authority and *url* is a path. The
``//`` scheme is called a network-path reference)
This function can only be called from the top level, that is, not from
a sub request.
......
......@@ -43,10 +43,32 @@ varnish v1 -vcl {
}
} -start
logexpect l1 -v v1 -g request -q "ReqURL ~ \"/REQ/first/file\"" {
fail add * ReqURL "valid"
expect * * ReqURL "/fromvcl"
expect * * ReqURL "/FIRST/FROM/RESP/file"
expect * * ReqURL "/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/REQ/first/file"
expect * * ReqURL "/path/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/another/file2"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
fail clear
} -start
client c1 {
txreq -body {
/REQ/first/file
http://thishost/path/file1
in valid
https://invalid
// https://
https://thishost/another/file2
//thishost/file3
}
......@@ -58,5 +80,7 @@ client c1 {
expect resp.status == 500
} -run
logexpect l1 -wait
# all default
shell "curl --data-raw \"/REQ/CURL/first http://thishost/path/file1 https://thishost/another/file2 //thishost/xxx/file3\" -so t.zip -H 'Host: ${v1_addr}' http://${v1_addr}:${v1_port}/ && unzip -Z t.zip"
......@@ -56,6 +56,23 @@ pincr(const char **p, size_t l)
pincr(&p, strlen(s)) \
)
#ifdef TEST_DRIVER
static const char *DBG_begin;
#define DBG_BEGIN(p) DBG_begin = (p)
#define DBG_INVALID(p, why) do { \
printf("invalid " why " >%.*s<\n", \
(int)pdiff(DBG_begin, p), DBG_begin); \
} while (0)
#define DBG_VALID(p) \
printf("valid >%.*s<\n", (int)pdiff(DBG_begin, p), DBG_begin)
#else
#define DBG_BEGIN(p) (void)0
#define DBG_INVALID(p, why) (void)0
#define DBG_VALID(p) (void)0
#endif
int
zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen)
{
......@@ -106,44 +123,55 @@ zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen)
p++;
continue;
}
if (tok(p, e, "https://") ||
tok(p, e, "http://")) {
p -= 2;
// no need to keep this prefix in the buffer
pp = p;
}
DBG_BEGIN(p);
h = NULL;
if (tok(p, e, "//")) {
h = u = p;
p -= 2;
while (u < e && *u != '/')
u++;
} else
u = p;
if (tok(p, e, "//") ||
tok(p, e, "http://") ||
tok(p, e, "https://")) {
// sufficient to keep "//" in buffer
pp = p - 2;
h = p;
while (p < e && !vct_islws(*p) && *p != '/')
p++;
}
u = p;
// skip over whatever non-whitespace
while (p < e && !vct_islws(*p))
p++;
if (*u != '/')
// continue in next chunk
if (p == e && (! (flush & OBJ_ITER_END)))
break;
if (p < e || flush & OBJ_ITER_END) {
/* match! */
hh = NULL;
if (h) {
assert(u > h);
hh = strndup(h, pdiff(h, u));
AN(hh);
}
assert(p > u);
uu = strndup(u, pdiff(u, p));
AN(uu);
AN(zis->func);
zis->func(zis->priv, uu, hh);
free(hh);
free(uu);
pp = p;
// token complete, checkpoint
pp = p;
if (*u != '/') {
DBG_INVALID(p, "non-url");
continue;
}
if (h != NULL && h == u) {
DBG_INVALID(p, "no-host");
continue;
}
/* match! */
DBG_VALID(p);
hh = NULL;
if (h) {
assert(u > h);
hh = strndup(h, pdiff(h, u));
AN(hh);
}
assert(p > u);
uu = strndup(u, pdiff(u, p));
AN(uu);
AN(zis->func);
zis->func(zis->priv, uu, hh);
free(hh);
free(uu);
}
assert(e >= pp);
......@@ -182,7 +210,7 @@ struct expect {
const char *u, *h, **pfx;
};
const char *pfx_host[7] = {
static const char *pfx_host[7] = {
"https://",
"http://",
"//",
......@@ -192,15 +220,33 @@ const char *pfx_host[7] = {
NULL
};
const char *pfx_nohost[2] = {
static const char *pfx_nohost[2] = {
"",
NULL
};
static struct expect testcase[7] = {
// incomplete tokens to skip
static const char *invalid[10] = {
// lone prefixes
"https://",
"http:///",
"//",
"///",
"///a",
"//a",
// partials
"http",
"invalid",
"a",
NULL
};
static struct expect testcase[9] = {
{ .u = "/url", .h = "host", .pfx = pfx_host },
{ .u = "/", .h = "host", .pfx = pfx_host },
{ .u = "///", .h = "ho-t", .pfx = pfx_host },
{ .u = "/", .h = "h", .pfx = pfx_host },
{ .u = "///", .h = "h", .pfx = pfx_host },
{ .u = "/url", .h = NULL, .pfx = pfx_nohost },
{ .u = "/", .h = NULL, .pfx = pfx_nohost },
{ .u = "/a", .h = NULL, .pfx = pfx_nohost },
......@@ -231,13 +277,17 @@ cb_want(void *priv, const char *u, const char *h)
if (e->u == NULL)
AZ(u);
else
else {
AN(u);
AZ(strcmp(e->u, u));
}
if (e->h == NULL)
AZ(h);
else
else {
AN(h);
AZ(strcmp(e->h, h));
}
p->count++;
}
......@@ -274,7 +324,7 @@ int
main(void) {
const struct expect *want;
struct zfr_iter_s zis[1];
const char **p, *t;
const char **p, *t, **inv = invalid;
char s[256], u[64];
unsigned n;
size_t l;
......@@ -284,7 +334,6 @@ main(void) {
t = s;
assert(tok(t, strchr(s, '\0'), "https://"));
for (want = testcase; want->u != NULL; want++) {
INIT_OBJ(zis, ZFR_ITER_MAGIC);
zis->priv = NULL;
......@@ -293,16 +342,22 @@ main(void) {
n = 0;
*s = '\0';
for (p = want->pfx; *p != NULL; p++) {
for (p = want->pfx; *p != NULL; p++, inv++) {
AN(inv);
if (*inv == NULL)
inv = invalid;
AN(p);
AN(*p);
if (**p == '\0')
l = snprintf(u, sizeof u, "\t%s", want->u);
if (**p == '\0') {
l = snprintf(u, sizeof u, " %s\t%s",
*inv, want->u);
}
else {
l = snprintf(u, sizeof u, "\t%s%s%s",
*p, want->h, want->u);
l = snprintf(u, sizeof u, " %s\t%s%s%s",
*inv, *p, want->h, want->u);
}
assert(l < sizeof u);
inv++;
if (sizeof s - strlen(s) - 1 < l)
break;
......@@ -310,6 +365,16 @@ main(void) {
n++;
t_steps(want, n, s);
if (*inv == NULL)
continue;
if (sizeof s - strlen(s) - 2 < strlen(*inv))
break;
(void) strcat(s, "\r");
(void) strcat(s, *inv);
t_steps(want, n, s);
inv++;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment