Add parsing of bodies (req.body, bereq.body and resp.body)

cf9c6959 · Nils Goroll · 076545a6 · cf9c6959 · cf9c6959 · cf9c6959
Unverified Commit cf9c6959 authored Aug 26, 2023 by Nils Goroll
5 changed files
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -31,7 +31,8 @@ AM_VTC_LOG_FLAGS = \
 	-p vmod_path="$(abs_builddir)/.libs:$(vmoddir)"

 TESTS = \
-	vtc/vmod_frozen.vtc
+	vtc/vmod_frozen.vtc \
+	vtc/readme_example.vtc

 # Documentation


--- a/src/vmod_frozen.c
+++ b/src/vmod_frozen.c
@@ -37,6 +37,20 @@
 #include "vmod_frozen.h"
 #include "frozen/frozen.h"

+static void
+errmsg(VRT_CTX, const char *fmt, ...)
+{
+	va_list args;
+
+	AZ(ctx->method & VCL_MET_TASK_H);
+	va_start(args, fmt);
+	if (ctx->vsl)
+		VSLbv(ctx->vsl, SLT_VCL_Error, fmt, args);
+	else
+		VSLv(SLT_VCL_Error, NO_VXID, fmt, args);
+	va_end(args);
+}
+
 static enum type_e
 type_parse(VCL_ENUM e) {
 #define VMODENUM(n) if (e == VENUM(n)) return(type_ ## n);
@@ -298,6 +312,164 @@ vmod_parser_parse(VRT_CTX, struct vmod_frozen_parser *vfpaa,
 	return (parse(ctx, vfpaa, s, s != NULL ? strlen(s) : 0));
 }

+struct collect_iter_priv {
+	unsigned			magic;
+#define COLLECT_ITER_PRIV_MAGIC 0x6ddd62cd
+	char				*heap;	// only if realloc'ed
+	const char			*ptr;
+	size_t				len;
+	size_t				max;
+};
+
+static int v_matchproto_(objiterate_f)
+collect_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
+{
+	struct collect_iter_priv *cip;
+	char *heap;
+	size_t l;
+
+	CAST_OBJ_NOTNULL(cip, priv, COLLECT_ITER_PRIV_MAGIC);
+
+	if (ptr == NULL || len == 0)
+		return (0);
+
+	/*
+	 * if this is the only segment, return without copying.
+	 *
+	 * comparing flush equal is deliberate: This is not to be true for
+	 * OBJ_ITER_FLUSH
+	 */
+
+	if (cip->ptr == NULL && flush == OBJ_ITER_END) {
+		AZ(cip->heap);
+		AZ(cip->len);
+		cip->ptr = ptr;
+		cip->len = len;
+		return (0);
+	}
+
+	l = cip->len + len;
+	if (l > cip->max) {
+		free(cip->heap);
+		cip->heap = NULL;
+		cip->ptr = NULL;
+		cip->len = l;
+		return (1);
+	}
+
+	heap = realloc(cip->heap, cip->len + len);
+	AN(heap);
+	memcpy(heap + cip->len, ptr, len);
+	cip->heap = heap;
+	cip->ptr = heap;
+	cip->len += len;
+	return (0);
+}
+
+/* we need to keep the body copy around until the end of the task
+ * because .extract uses references to it after .parse_body().
+ *
+ * to hand the heap ptr to a priv_task if we have one
+ */
+
+static void
+collect_free(VRT_CTX, void *p) {
+	(void) ctx;
+	free(p);
+}
+
+struct vmod_priv_methods collect_priv_methods[1] = {{
+	.magic = VMOD_PRIV_METHODS_MAGIC,
+	.type = "frozen collect delayed free",
+	.fini = collect_free
+}};
+
+VCL_BOOL
+vmod_parser_parse_body(VRT_CTX,
+    struct VPFX(frozen_parser) *vfpaa, VCL_ENUM which, VCL_BYTES max)
+{
+	struct collect_iter_priv cip[1];
+	struct vmod_priv *priv_task;
+	VCL_BOOL ret;
+	int r;
+
+	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+
+	INIT_OBJ(cip, COLLECT_ITER_PRIV_MAGIC);
+	cip->max = max;
+
+	if (which == VENUM(req_body)) {
+		if (ctx->req == NULL) {
+			errmsg(ctx, "xfrozen.parse_body(which = req_body) "
+			    "called but no request body found");
+			return (0);
+		}
+		else {
+			r = VRB_Iterate(ctx->req->wrk, ctx->vsl, ctx->req,
+			    collect_iter_f, cip);
+		}
+	}
+	else if (which == VENUM(bereq_body) &&
+	     ctx->bo != NULL && ctx->bo->bereq_body != NULL) {
+		r = ObjIterate(ctx->bo->wrk, ctx->bo->bereq_body,
+		    cip, collect_iter_f, 0);
+	}
+	else if (which == VENUM(bereq_body)) {
+		if (ctx->bo == NULL || ctx->bo->req == NULL) {
+			errmsg(ctx, "xfrozen.parse_body(which = bereq_body) "
+			    "called but no backend request body found");
+			return (0);
+		}
+		else {
+			r = VRB_Iterate(ctx->bo->wrk, ctx->vsl, ctx->bo->req,
+			    collect_iter_f, cip);
+		}
+	}
+	else if (which == VENUM(resp_body)) {
+		if (ctx->req == NULL || ctx->req->objcore == NULL) {
+			errmsg(ctx, "xfrozen.parse_body(which = resp_body) "
+			    "called but no response body found");
+			return (0);
+		}
+		else {
+			r = ObjIterate(ctx->req->wrk, ctx->req->objcore,
+			    cip, collect_iter_f, 0);
+		}
+	}
+	else
+		WRONG("which in parse_body");
+
+	if (r != 0 && cip->len > cip->max) {
+		errmsg(ctx, "xfrozen.parse_body(maxbytes = %zu) "
+		    "exceeded", max);
+		AZ(cip->heap);
+		return (0);
+	}
+	if (r != 0) {
+		errmsg(ctx, "xfrozen.parse_body() collect failed %d", r);
+		free(cip->heap);
+		return (0);
+	}
+	AZ(r);
+	ret = parse(ctx, vfpaa, cip->ptr, cip->len);
+	if (ret == 0) {
+		free(cip->heap);
+		return (ret);
+	}
+	if (cip->heap != NULL) {
+		priv_task = VRT_priv_task(ctx, cip->heap);
+		if (priv_task == NULL) {
+			errmsg(ctx, "xfrozen.parse_body() out of workspace");
+			free(cip->heap);
+			return (0);
+		}
+		priv_task->priv = cip->heap;
+		priv_task->methods = collect_priv_methods;
+	}
+
+	return (ret);
+}
+
 static const enum type_e json_type_2_type_e[JSON_TYPES_CNT] = {
 	[JSON_TYPE_INVALID] = _TYPE_E_INVALID,
 	[JSON_TYPE_STRING] = type_STRING,

--- a/src/vmod_frozen.vcc
+++ b/src/vmod_frozen.vcc
@@ -12,7 +12,9 @@ $Module frozen 3 "JSON parsing with frozen"
 DESCRIPTION
 ===========

-This vmod makes available to VCL the _frozen_ JSON parser with low
+.. _frozen: https://github.com/cesanta/frozen
+
+This vmod makes available to VCL the `frozen`_ JSON parser with low
 overhead: By specifying a set of expected JSON paths, a callback to
 the parser is used to track only paths of interest, which can then be
 extracted.
@@ -50,6 +52,30 @@ Example
 	    # ...
 	}

+Example for parsing a request body and returning it in a different
+format (as an array)::
+
+	import frozen;
+	import std;
+
+	sub vcl_init {
+	    new json = frozen.parser();
+	    json.expect(".a", NUMBER, required = true);
+	    json.expect(".b", STRING, required = true);
+	}
+	sub vcl_recv {
+	    if (! json.parse_body(req_body, 1K)) {
+		    return (synth(400, "parse error"));
+	    }
+	    return (synth(200));
+	}
+	sub vcl_synth {
+	    set resp.http.content-type = "application/json";
+	    set resp.body = "[" + json.extract(".a") + "," +
+			     {"""} + json.extract(".b") + {""]"};
+	    return (deliver);
+	}
+
 $Object parser(INT depth=10)

 Instiantiate a JSON parser object.
@@ -116,6 +142,27 @@ For expected paths, the first match is recorded.

 Details on parse errors are logged as ``VCL_Error``

+$Method BOOL .parse_body(ENUM {req_body, bereq_body, resp_body }
+    which, BYTES maxsize)
+
+Same as `xfrozen.parse()`_, but parse the body given as the *which*
+argument, of maximum size *maxsize*.
+
+Note that, unless the body is stored as single, durable
+(non-transient) segment, a full copy of the body is made on the heap
+memory using :ref:`realloc(3)`. This is due to the fact that the
+*frozen* parser used by this project does not support parsing
+discontinuous segmented data. Thus, be mindful of memory requirements
+when parsing large JSON objects. The *maxsize* argument limits the
+maximum amount of additional heap memory used. If exceeded, the
+`xfrozen.parse_body()`_ returns ``false`` and an appropriate error is
+logged.
+
+When used with a ``req_body`` or ``bereq_body`` *which*
+argument, this method consumes the request body. If it is to
+be used again (for example, to send it to a backend), it
+should first be cached by calling ``std.cache_req_body(<size>)``.
+
 $Method STRING .extract(STRING path, STRING null = "", STRING undef = "")

 After a successful `.parse()`, extract the given path, which must have

--- a/src/vtc/readme_example.vtc
+++ b/src/vtc/readme_example.vtc
+varnishtest "example from readme"
+
+varnish v1 -vcl+backend {
+    import frozen;
+    import std;
+
+    backend proforma none;
+
+    sub vcl_init {
+	new json = frozen.parser();
+	json.expect(".a", NUMBER, required = true);
+	json.expect(".b", STRING, required = true);
+    }
+    sub vcl_recv {
+	if (! json.parse_body(req_body, 1K)) {
+		return (synth(400, "parse error"));
+	}
+	return (synth(200));
+    }
+    sub vcl_synth {
+	set resp.http.content-type = "application/json";
+	set resp.body = "[" + json.extract(".a") + "," +
+			 {"""} + json.extract(".b") + {""]"};
+	return (deliver);
+    }
+} -start
+
+# req.body uncached
+client c1 {
+	txreq -url "/t" -nolen -hdr "Transfer-Encoding: chunked"
+	chunked "{ a: 1, b: \"hi there\", c: true} "
+	chunkedlen 0
+
+	rxresp
+	expect resp.status == 200
+	expect resp.body == "[1,\"hi there\"]"
+} -run
--- a/src/vtc/vmod_frozen.vtc
+++ b/src/vtc/vmod_frozen.vtc
@@ -2,11 +2,16 @@ varnishtest "test vmod-frozen"

 server s1 {
 	rxreq
-	txresp
+	txresp -nolen -hdr "Transfer-Encoding: chunked"
+	chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
+	chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
+	chunked ": [ 7 ] } } "
+	chunkedlen 0
 } -start

 varnish v1 -vcl+backend {
    import frozen;
+    import std;

    sub vcl_init {
 	new p_test = frozen.parser();
@@ -26,6 +31,12 @@ varnish v1 -vcl+backend {
 	p_test.expect(".g");
    }
    sub vcl_recv {
+	if (req.url == "/resp.body") {
+		return (hash);
+	}
+	if (req.http.cache) {
+		std.cache_req_body(std.bytes(req.http.max));
+	}
 	return (synth(200));
    }
    sub extract {
@@ -73,10 +84,23 @@ varnish v1 -vcl+backend {
 			{" e : null, f: [ 1, -2, 3], g: { "1": [], h"} +
 			{": [ 7 ] } } "});
 	}
+	else if (req.url == "/req.body") {
+		set resp.http.parse = p_test.parse_body(req_body,
+		    std.bytes(req.http.max));
+	}
+	call extract;
+    }
+    sub vcl_deliver {
+	set resp.http.fail = p_test.parse(req.http.not);
+	if (req.url == "/resp.body") {
+		set resp.http.parse = p_test.parse_body(resp_body,
+		    std.bytes(req.http.max));
+	}
 	call extract;
    }
 } -start

+# vcl string
 client c1 {
 	txreq -url "/static"
 	rxresp
@@ -105,4 +129,154 @@ client c1 {
 	expect resp.http.gh0-type == NUMBER
 	expect resp.http.g == {{ "1": [], h: [ 7 ] }}
 	expect resp.http.g-type == OBJECT
-} -run
+} -start
+
+# req.body uncached
+client c2 {
+	txreq -url "/req.body" -hdr "max: 1K" \
+	    -nolen -hdr "Transfer-Encoding: chunked"
+	chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
+	chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
+	chunked ": [ 7 ] } } "
+	chunkedlen 0
+
+	rxresp
+	expect resp.status == 200
+	expect resp.http.fail == false
+	expect resp.http.parse == true
+	expect resp.http.a == 1
+	expect resp.http.a-type == NUMBER
+	expect resp.http.b == "hi there"
+	expect resp.http.b-type == STRING
+	expect resp.http.c == true
+	expect resp.http.c-type == BOOL
+	expect resp.http.d == false
+	expect resp.http.d-type == BOOL
+	expect resp.http.e == "<null>"
+	expect resp.http.e-type == ""
+	expect resp.http.f0 == 1
+	expect resp.http.f0-type == NUMBER
+	expect resp.http.f1 == -2
+	expect resp.http.f1-type == NUMBER
+	expect resp.http.f == "[ 1, -2, 3]"
+	expect resp.http.f-type == ARRAY
+	expect resp.http.g1 == "[]"
+	expect resp.http.g1-type == ARRAY
+	expect resp.http.gh0 == 7
+	expect resp.http.gh0-type == NUMBER
+	expect resp.http.g == {{ "1": [], h: [ 7 ] }}
+	expect resp.http.g-type == OBJECT
+} -start
+
+# req.body cached
+client c3 {
+	txreq -url "/req.body" -hdr "cache: true" -hdr "max: 1K" \
+	    -nolen -hdr "Transfer-Encoding: chunked"
+	chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
+	chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
+	chunked ": [ 7 ] } } "
+	chunkedlen 0
+
+	rxresp
+	expect resp.status == 200
+	expect resp.http.fail == false
+	expect resp.http.parse == true
+	expect resp.http.a == 1
+	expect resp.http.a-type == NUMBER
+	expect resp.http.b == "hi there"
+	expect resp.http.b-type == STRING
+	expect resp.http.c == true
+	expect resp.http.c-type == BOOL
+	expect resp.http.d == false
+	expect resp.http.d-type == BOOL
+	expect resp.http.e == "<null>"
+	expect resp.http.e-type == ""
+	expect resp.http.f0 == 1
+	expect resp.http.f0-type == NUMBER
+	expect resp.http.f1 == -2
+	expect resp.http.f1-type == NUMBER
+	expect resp.http.f == "[ 1, -2, 3]"
+	expect resp.http.f-type == ARRAY
+	expect resp.http.g1 == "[]"
+	expect resp.http.g1-type == ARRAY
+	expect resp.http.gh0 == 7
+	expect resp.http.gh0-type == NUMBER
+	expect resp.http.g == {{ "1": [], h: [ 7 ] }}
+	expect resp.http.g-type == OBJECT
+} -start
+
+# resp.body
+client c4 {
+	txreq -url "/resp.body" -hdr "max: 1K"
+
+	rxresp
+	expect resp.status == 200
+	expect resp.http.fail == false
+	expect resp.http.parse == true
+	expect resp.http.a == 1
+	expect resp.http.a-type == NUMBER
+	expect resp.http.b == "hi there"
+	expect resp.http.b-type == STRING
+	expect resp.http.c == true
+	expect resp.http.c-type == BOOL
+	expect resp.http.d == false
+	expect resp.http.d-type == BOOL
+	expect resp.http.e == "<null>"
+	expect resp.http.e-type == ""
+	expect resp.http.f0 == 1
+	expect resp.http.f0-type == NUMBER
+	expect resp.http.f1 == -2
+	expect resp.http.f1-type == NUMBER
+	expect resp.http.f == "[ 1, -2, 3]"
+	expect resp.http.f-type == ARRAY
+	expect resp.http.g1 == "[]"
+	expect resp.http.g1-type == ARRAY
+	expect resp.http.gh0 == 7
+	expect resp.http.gh0-type == NUMBER
+	expect resp.http.g == {{ "1": [], h: [ 7 ] }}
+	expect resp.http.g-type == OBJECT
+} -start
+
+########################################
+## failure modes
+
+# maxbytes exceeded
+logexpect l11 -v v1 -q "ReqURL ~ \"^/req.body\" and ReqHeader:max ~ \"^10B\"" {
+	expect * * ReqURL	{^/req.body}
+	expect * = VCL_Error	{^xfrozen.parse_body.maxbytes = 10. exceeded}
+} -start
+
+client c11 {
+	txreq -url "/req.body" -hdr "max: 10B" \
+	    -nolen -hdr "Transfer-Encoding: chunked"
+	chunked "{ a: 1, b: \"hi there\", c: true, d: false, "
+	chunked " e : null, f: [ 1, -2, 3], g: { \"1\": [], h"
+	chunked ": [ 7 ] } } "
+	chunkedlen 0
+
+	rxresp
+	expect resp.status == 500
+} -start
+
+# no req.body
+logexpect l12 -v v1 -q "ReqURL ~ \"^/req.body\" and ReqHeader:max ~ \"^1B\"" {
+	expect * * ReqURL	{^/req.body}
+	expect * = VCL_Error	{^no state from .parse()}
+} -start
+
+client c12 {
+	txreq -url "/req.body" -hdr "max: 1B"
+
+	rxresp
+	expect resp.status == 500
+} -start
+
+client c1 -wait
+client c2 -wait
+client c3 -wait
+client c4 -wait
+
+client c11 -wait
+client c12 -wait
+logexpect l11 -wait
+logexpect l12 -wait