Fix url parsing for zipflow.subreqs_from_body()

parent 3d299460
...@@ -70,13 +70,22 @@ separated by any whitespace (``\\r\\n\\t\\s``) ...@@ -70,13 +70,22 @@ separated by any whitespace (``\\r\\n\\t\\s``)
* ``http://``\ *host*\ *url* * ``http://``\ *host*\ *url*
* ``https://``\ *host*\ *url* * ``https://``\ *host*\ *url*
* ``//``\ *host*\ *url* * ``//``\ *host*\ *url*
* ``url`` * *url*
with *host* containing any non-whitespace character except for ``/`` with *host* containing any non-whitespace character except for ``/``
and *url* starting with ``/`` and run a sub request for each token as and *url* starting with ``/`` and run a sub request for each token as
if ``subreq(``\ *url*\ ``, ``\ *host*\ ``)`` was invoced, but not if ``subreq(``\ *url*\ ``, ``\ *host*\ ``)`` was invoced, but not
using any workspace memory. using any workspace memory.
.. _RFC 3965: https://www.ietf.org/rfc/rfc3986.txt
*host* may not be empty. The syntactic ambiguity prevents *urls*
starting with ``///``.
(Note: *host* and *url* are used in varnish-cache terminology, in `RFC
3965`_ parlance, *host* is called authority and *url* is a path. The
``//`` scheme is called a network-path reference)
This function can only be called from the top level, that is, not from This function can only be called from the top level, that is, not from
a sub request. a sub request.
......
...@@ -43,10 +43,32 @@ varnish v1 -vcl { ...@@ -43,10 +43,32 @@ varnish v1 -vcl {
} }
} -start } -start
logexpect l1 -v v1 -g request -q "ReqURL ~ \"/REQ/first/file\"" {
fail add * ReqURL "valid"
expect * * ReqURL "/fromvcl"
expect * * ReqURL "/FIRST/FROM/RESP/file"
expect * * ReqURL "/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/REQ/first/file"
expect * * ReqURL "/path/file1"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
expect * * ReqURL "/another/file2"
expect * * ReqURL "/file3"
expect * * ReqURL "/file4"
fail clear
} -start
client c1 { client c1 {
txreq -body { txreq -body {
/REQ/first/file /REQ/first/file
http://thishost/path/file1 http://thishost/path/file1
in valid
https://invalid
// https://
https://thishost/another/file2 https://thishost/another/file2
//thishost/file3 //thishost/file3
} }
...@@ -58,5 +80,7 @@ client c1 { ...@@ -58,5 +80,7 @@ client c1 {
expect resp.status == 500 expect resp.status == 500
} -run } -run
logexpect l1 -wait
# all default # all default
shell "curl --data-raw \"/REQ/CURL/first http://thishost/path/file1 https://thishost/another/file2 //thishost/xxx/file3\" -so t.zip -H 'Host: ${v1_addr}' http://${v1_addr}:${v1_port}/ && unzip -Z t.zip" shell "curl --data-raw \"/REQ/CURL/first http://thishost/path/file1 https://thishost/another/file2 //thishost/xxx/file3\" -so t.zip -H 'Host: ${v1_addr}' http://${v1_addr}:${v1_port}/ && unzip -Z t.zip"
...@@ -56,6 +56,23 @@ pincr(const char **p, size_t l) ...@@ -56,6 +56,23 @@ pincr(const char **p, size_t l)
pincr(&p, strlen(s)) \ pincr(&p, strlen(s)) \
) )
#ifdef TEST_DRIVER
static const char *DBG_begin;
#define DBG_BEGIN(p) DBG_begin = (p)
#define DBG_INVALID(p, why) do { \
printf("invalid " why " >%.*s<\n", \
(int)pdiff(DBG_begin, p), DBG_begin); \
} while (0)
#define DBG_VALID(p) \
printf("valid >%.*s<\n", (int)pdiff(DBG_begin, p), DBG_begin)
#else
#define DBG_BEGIN(p) (void)0
#define DBG_INVALID(p, why) (void)0
#define DBG_VALID(p) (void)0
#endif
int int
zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen) zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen)
{ {
...@@ -106,44 +123,55 @@ zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen) ...@@ -106,44 +123,55 @@ zfr_iter(void *priv, unsigned flush, const void *ptr, ssize_t alen)
p++; p++;
continue; continue;
} }
if (tok(p, e, "https://") || DBG_BEGIN(p);
tok(p, e, "http://")) {
p -= 2;
// no need to keep this prefix in the buffer
pp = p;
}
h = NULL; h = NULL;
if (tok(p, e, "//")) { if (tok(p, e, "//") ||
h = u = p; tok(p, e, "http://") ||
p -= 2; tok(p, e, "https://")) {
while (u < e && *u != '/') // sufficient to keep "//" in buffer
u++; pp = p - 2;
} else h = p;
u = p; while (p < e && !vct_islws(*p) && *p != '/')
p++;
}
u = p;
// skip over whatever non-whitespace
while (p < e && !vct_islws(*p)) while (p < e && !vct_islws(*p))
p++; p++;
if (*u != '/') // continue in next chunk
if (p == e && (! (flush & OBJ_ITER_END)))
break; break;
if (p < e || flush & OBJ_ITER_END) { // token complete, checkpoint
/* match! */ pp = p;
hh = NULL;
if (h) { if (*u != '/') {
assert(u > h); DBG_INVALID(p, "non-url");
hh = strndup(h, pdiff(h, u)); continue;
AN(hh); }
}
assert(p > u); if (h != NULL && h == u) {
uu = strndup(u, pdiff(u, p)); DBG_INVALID(p, "no-host");
AN(uu); continue;
AN(zis->func);
zis->func(zis->priv, uu, hh);
free(hh);
free(uu);
pp = p;
} }
/* match! */
DBG_VALID(p);
hh = NULL;
if (h) {
assert(u > h);
hh = strndup(h, pdiff(h, u));
AN(hh);
}
assert(p > u);
uu = strndup(u, pdiff(u, p));
AN(uu);
AN(zis->func);
zis->func(zis->priv, uu, hh);
free(hh);
free(uu);
} }
assert(e >= pp); assert(e >= pp);
...@@ -182,7 +210,7 @@ struct expect { ...@@ -182,7 +210,7 @@ struct expect {
const char *u, *h, **pfx; const char *u, *h, **pfx;
}; };
const char *pfx_host[7] = { static const char *pfx_host[7] = {
"https://", "https://",
"http://", "http://",
"//", "//",
...@@ -192,15 +220,33 @@ const char *pfx_host[7] = { ...@@ -192,15 +220,33 @@ const char *pfx_host[7] = {
NULL NULL
}; };
const char *pfx_nohost[2] = { static const char *pfx_nohost[2] = {
"", "",
NULL NULL
}; };
static struct expect testcase[7] = { // incomplete tokens to skip
static const char *invalid[10] = {
// lone prefixes
"https://",
"http:///",
"//",
"///",
"///a",
"//a",
// partials
"http",
"invalid",
"a",
NULL
};
static struct expect testcase[9] = {
{ .u = "/url", .h = "host", .pfx = pfx_host }, { .u = "/url", .h = "host", .pfx = pfx_host },
{ .u = "/", .h = "host", .pfx = pfx_host }, { .u = "/", .h = "host", .pfx = pfx_host },
{ .u = "///", .h = "ho-t", .pfx = pfx_host }, { .u = "///", .h = "ho-t", .pfx = pfx_host },
{ .u = "/", .h = "h", .pfx = pfx_host },
{ .u = "///", .h = "h", .pfx = pfx_host },
{ .u = "/url", .h = NULL, .pfx = pfx_nohost }, { .u = "/url", .h = NULL, .pfx = pfx_nohost },
{ .u = "/", .h = NULL, .pfx = pfx_nohost }, { .u = "/", .h = NULL, .pfx = pfx_nohost },
{ .u = "/a", .h = NULL, .pfx = pfx_nohost }, { .u = "/a", .h = NULL, .pfx = pfx_nohost },
...@@ -231,13 +277,17 @@ cb_want(void *priv, const char *u, const char *h) ...@@ -231,13 +277,17 @@ cb_want(void *priv, const char *u, const char *h)
if (e->u == NULL) if (e->u == NULL)
AZ(u); AZ(u);
else else {
AN(u);
AZ(strcmp(e->u, u)); AZ(strcmp(e->u, u));
}
if (e->h == NULL) if (e->h == NULL)
AZ(h); AZ(h);
else else {
AN(h);
AZ(strcmp(e->h, h)); AZ(strcmp(e->h, h));
}
p->count++; p->count++;
} }
...@@ -274,7 +324,7 @@ int ...@@ -274,7 +324,7 @@ int
main(void) { main(void) {
const struct expect *want; const struct expect *want;
struct zfr_iter_s zis[1]; struct zfr_iter_s zis[1];
const char **p, *t; const char **p, *t, **inv = invalid;
char s[256], u[64]; char s[256], u[64];
unsigned n; unsigned n;
size_t l; size_t l;
...@@ -284,7 +334,6 @@ main(void) { ...@@ -284,7 +334,6 @@ main(void) {
t = s; t = s;
assert(tok(t, strchr(s, '\0'), "https://")); assert(tok(t, strchr(s, '\0'), "https://"));
for (want = testcase; want->u != NULL; want++) { for (want = testcase; want->u != NULL; want++) {
INIT_OBJ(zis, ZFR_ITER_MAGIC); INIT_OBJ(zis, ZFR_ITER_MAGIC);
zis->priv = NULL; zis->priv = NULL;
...@@ -293,16 +342,22 @@ main(void) { ...@@ -293,16 +342,22 @@ main(void) {
n = 0; n = 0;
*s = '\0'; *s = '\0';
for (p = want->pfx; *p != NULL; p++) { for (p = want->pfx; *p != NULL; p++, inv++) {
AN(inv);
if (*inv == NULL)
inv = invalid;
AN(p); AN(p);
AN(*p); AN(*p);
if (**p == '\0') if (**p == '\0') {
l = snprintf(u, sizeof u, "\t%s", want->u); l = snprintf(u, sizeof u, " %s\t%s",
*inv, want->u);
}
else { else {
l = snprintf(u, sizeof u, "\t%s%s%s", l = snprintf(u, sizeof u, " %s\t%s%s%s",
*p, want->h, want->u); *inv, *p, want->h, want->u);
} }
assert(l < sizeof u); assert(l < sizeof u);
inv++;
if (sizeof s - strlen(s) - 1 < l) if (sizeof s - strlen(s) - 1 < l)
break; break;
...@@ -310,6 +365,16 @@ main(void) { ...@@ -310,6 +365,16 @@ main(void) {
n++; n++;
t_steps(want, n, s); t_steps(want, n, s);
if (*inv == NULL)
continue;
if (sizeof s - strlen(s) - 2 < strlen(*inv))
break;
(void) strcat(s, "\r");
(void) strcat(s, *inv);
t_steps(want, n, s);
inv++;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment