Add hashing of req.body, bereq.body and resp.body to .update()

.update() can now hash all bodies except beresp.body, which is more difficult because it would prevent streaming. The msg argument is now optional in order to allow hashing of only a body.

Add hashing of req.body, bereq.body and resp.body to .update()
.update() can now hash all bodies except beresp.body, which is more difficult because it would prevent streaming. The msg argument is now optional in order to allow hashing of only a body.
98638e57 · Nils Goroll · 6daf80d1 · 98638e57 · 98638e57 · 98638e57
Unverified Commit 98638e57 authored May 24, 2023 by Nils Goroll
Showing with 373 additions and 24 deletions

README.rst README.rst +26 -9

update_from_body.vtc src/tests/update_from_body.vtc +230 -0

vmod_blobdigest.c src/vmod_blobdigest.c +100 -8

vmod_blobdigest.vcc src/vmod_blobdigest.vcc +17 -7

No files found.
--- a/README.rst
+++ b/README.rst
@@ -25,7 +25,7 @@ SYNOPSIS
  new xdigest = blobdigest.digest(ENUM hash, BLOB init, ENUM scope)
-      BOOL xdigest.update(BLOB msg, BOOL fail)
+      BOOL xdigest.update([BLOB msg], [ENUM from], BOOL fail)
      BLOB xdigest.final()
@@ -266,11 +266,27 @@ Example::
 .. _xdigest.update():
-BOOL xdigest.update(BLOB msg, BOOL fail=1)
+BOOL xdigest.update([BLOB msg], [ENUM from], BOOL fail)
------------------------------------------
+-------------------------------------------------------
+::
+      BOOL xdigest.update(
+            [BLOB msg],
+            [ENUM {req_body, bereq_body, resp_body} from],
+            BOOL fail=1
+      )
+If the BLOB ``msg`` is provided, incrementally add it to the digest
+context of this object.
+If a ``from`` argument is given, incrementally add the named body to
+the digest context of this object.
+If both arguments are given, they are added in the order documented
+herein.
-Incrementally add the BLOB ``msg`` to the digest context of this
+Returns ``true`` if and only if the operation was successful.
-object.  Returns ``true`` if and only if the operation was successful.
 As described above: if a digest object is updated in ``vcl_init``,
 then the updated context is valid for all subsequent uses of the
@@ -280,10 +296,11 @@ task (client or backend transaction).
 This method MAY NOT be called after ``.final()`` has been called
 for the same object, either in ``vcl_init`` or in the current task.
-The method returns ``false`` if ``msg`` is NULL, or if it is called
+The method returns ``false`` if ``msg`` is NULL, if the body as per
-after ``.final()``. If the ``fail`` argument is ``true`` (the
+the ``from`` argument is not available from the current context, or if
-default), this also triggers a VCL Error. A ``fail=false`` argument
+it is called after ``.final()``. If the ``fail`` argument is ``true``
-can be used for explicit error handling.
+(the default), this also triggers a VCL Error. A ``fail=false``
+argument can be used for explicit error handling.
 If it fails in ``vcl_init``, the VCL load will fail with an error
 message. If it fails in any other VCL subroutine, an error message is

--- a/src/tests/update_from_body.vtc
+++ b/src/tests/update_from_body.vtc
+# looks like -*- vcl -*-
+varnishtest "from = ...body with .update()"
+varnish v1 -vcl {
+	import blobdigest;
+	import blob;
+	import std;
+	backend none none;
+	sub vcl_init {
+		new a = blob.blob(IDENTITY, "a");
+		new b = blob.blob(IDENTITY, "b");
+		new d1 = blobdigest.digest(SHA256);
+		new d2 = blobdigest.digest(SHA256, a.get());
+	}
+	sub vcl_backend_error {
+		set beresp.status = 200;
+		set beresp.http.d1 = blob.encode(HEX, LOWER, d1.final());
+		set beresp.http.d2 = blob.encode(HEX, LOWER, d2.final());
+		set beresp.http.d1r = bereq.http.d1r;
+		set beresp.http.d2r = bereq.http.d2r;
+		set beresp.body = bereq.url;
+		return (deliver);
+	}
+	sub vcl_backend_fetch {
+		if (bereq.http.unset ~ "b") {
+			unset bereq.body;
+		}
+		set bereq.http.d1r = d1.update(from = bereq_body);
+		set bereq.http.d2r = d2.update(b.get(), from = bereq_body,
+		    fail = false);
+	}
+	sub vcl_recv {
+		if (req.url ~ "^/cache") {
+			std.cache_req_body(1m);
+		}
+		if (req.url ~ "bereq_body$") {
+			return(pass);
+		}
+		if (req.url ~ "req_body$") {
+			set req.http.d1r = d1.update(from = req_body);
+			set req.http.d2r = d2.update(b.get(),
+			    from = req_body, fail = false);
+			return(synth(200));
+		}
+		return(synth(400));
+	}
+	sub vcl_synth {
+		if (resp.status != 200) {
+			return(deliver);
+		}
+		set resp.http.d1 = blob.encode(HEX, LOWER, d1.final());
+		set resp.http.d2 = blob.encode(HEX, LOWER, d2.final());
+		set resp.http.d1r = req.http.d1r;
+		set resp.http.d2r = req.http.d2r;
+	}
+	sub vcl_deliver {
+		# this is a different d1 than on the backend side
+		# because PRIV_TASK
+		set resp.http.d1dr = d1.update(from = resp_body);
+		set resp.http.d1d = blob.encode(HEX, LOWER, d1.final());
+	}
+} -start
+client c1 {
+	txreq -url "/cache_req_body"
+	rxresp
+	expect resp.status == 200
+	# empty
+	expect resp.http.d1 == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	# "ab"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	txreq -url "/cache_req_body" -body "foo"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "1530db9d21811ed52817f99bf207bbc0aeb433c6f0f1b37e571d28890d33078d"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	txreq -url "/cache_req_body" -nolen -hdr "Transfer-encoding: chunked"
+	chunked f
+	chunked o
+	chunked o
+	chunkedlen 0
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "1530db9d21811ed52817f99bf207bbc0aeb433c6f0f1b37e571d28890d33078d"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+} -start
+# without a cached req body, d2 is always just the hash of "ab"
+client c2 {
+	txreq -url "/req_body"
+	rxresp
+	expect resp.status == 200
+	# empty
+	expect resp.http.d1 == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	# "ab"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	txreq -url "/req_body" -body "foo"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "false"
+	txreq -url "/req_body" -nolen -hdr "Transfer-encoding: chunked"
+	chunked f
+	chunked o
+	chunked o
+	chunkedlen 0
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "false"
+} -start
+client c3 {
+	txreq -url "/cache_bereq_body"
+	rxresp
+	expect resp.status == 200
+	# empty
+	expect resp.http.d1 == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	# "ab"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "307b3532e01bc16bd0ebdb42255d22b01b7fe244227a4dfe13ef5203a24b0c9c"
+	txreq -url "/cache_bereq_body" -body "foo"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "1530db9d21811ed52817f99bf207bbc0aeb433c6f0f1b37e571d28890d33078d"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "307b3532e01bc16bd0ebdb42255d22b01b7fe244227a4dfe13ef5203a24b0c9c"
+	txreq -url "/cache_bereq_body" -nolen -hdr "Transfer-encoding: chunked"
+	chunked f
+	chunked o
+	chunked o
+	chunkedlen 0
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "1530db9d21811ed52817f99bf207bbc0aeb433c6f0f1b37e571d28890d33078d"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "307b3532e01bc16bd0ebdb42255d22b01b7fe244227a4dfe13ef5203a24b0c9c"
+	# was broken before #3914 merge / ace59ccfe8ad99aaa96dc5d9fdcda447086a1225
+	txreq -url "/cache_bereq_body" -hdr "unset: b" -body "foo"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "307b3532e01bc16bd0ebdb42255d22b01b7fe244227a4dfe13ef5203a24b0c9c"
+} -start
+# without a cached req body, d2 is always just the hash of "ab"
+client c4 {
+	txreq -url "/bereq_body"
+	rxresp
+	expect resp.status == 200
+	# empty
+	expect resp.http.d1 == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	# "ab"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "true"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "966b82e2e767ff8fdfa80ef1e6e1ca2467d7a39e783b46ffe5dcec7f7d2b174a"
+	txreq -url "/bereq_body" -body "foo"
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "false"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "966b82e2e767ff8fdfa80ef1e6e1ca2467d7a39e783b46ffe5dcec7f7d2b174a"
+	txreq -url "/bereq_body" -nolen -hdr "Transfer-encoding: chunked"
+	chunked f
+	chunked o
+	chunked o
+	chunkedlen 0
+	rxresp
+	expect resp.status == 200
+	expect resp.http.d1 == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae"
+	expect resp.http.d2 == "fb8e20fc2e4c3f248c60c39bd652f3c1347298bb977b8b4d5903b85055620603"
+	expect resp.http.d1r == "true"
+	expect resp.http.d2r == "false"
+	expect resp.http.d1dr == "true"
+	expect resp.http.d1d == "966b82e2e767ff8fdfa80ef1e6e1ca2467d7a39e783b46ffe5dcec7f7d2b174a"
+} -start
+client c1 -wait
+client c2 -wait
+client c3 -wait
+client c4 -wait
--- a/src/vmod_blobdigest.c
+++ b/src/vmod_blobdigest.c
@@ -229,6 +229,87 @@ update(const enum algorithm hash, hash_ctx *restrict const hctx,
 	}
 }
+struct blobdigest_iter_priv {
+	unsigned			magic;
+#define BLOBDIGEST_ITER_PRIV_MAGIC	0x7b7dbbaf
+	enum algorithm			hash;
+	hash_ctx *restrict const	hctx;
+};
+static int v_matchproto_(objiterate_f)
+blobdigest_iter_f(void *priv, unsigned flush, const void *ptr, ssize_t len)
+{
+	struct blobdigest_iter_priv	*bi;
+	CAST_OBJ_NOTNULL(bi, priv, BLOBDIGEST_ITER_PRIV_MAGIC);
+	(void) flush;
+	update(bi->hash, bi->hctx, ptr, len);
+	return (0);
+}
+static const char *
+blobdigest_vrb_iter(struct worker *wrk, struct vsl_log *vsl, struct req *req,
+		    struct blobdigest_iter_priv *bi)
+{
+	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
+	if (req->req_body_status == BS_NONE)
+		return (NULL);
+	if (req->req_body_status == BS_ERROR)
+		return ("previous error on body (BS_ERROR)");
+	if (req->req_body_status == BS_TAKEN)
+		return ("body already taken (BS_TAKEN)");
+	(void) VRB_Iterate(wrk, vsl, req, blobdigest_iter_f, bi);
+	return (NULL);
+}
+static const char *
+update_body(VRT_CTX, VCL_ENUM from,
+	    const enum algorithm hash, hash_ctx *restrict const hctx)
+{
+	struct blobdigest_iter_priv bi[1] = {{
+		.magic = BLOBDIGEST_ITER_PRIV_MAGIC,
+		.hash = hash,
+		.hctx = hctx
+		}};
+	struct req *req = ctx->req;
+	if (from == VENUM(req_body)) {
+		if (req == NULL)
+			return ("from = req_body, "
+				"but no request body found");
+		return (blobdigest_vrb_iter(req->wrk, ctx->vsl, req, bi));
+	}
+	else if (from == VENUM(bereq_body) &&
+	     ctx->bo != NULL && ctx->bo->bereq_body != NULL) {
+		(void) ObjIterate(ctx->bo->wrk, ctx->bo->bereq_body,
+				  bi, blobdigest_iter_f, 0);
+	}
+	else if (from == VENUM(bereq_body)) {
+		if (ctx->bo == NULL)
+			return ("from = bereq_body, "
+				"but no backend request body found");
+		req = ctx->bo->req;
+		/* no req == no body */
+		if (req == NULL)
+			return (NULL);
+		return (blobdigest_vrb_iter(req->wrk, ctx->vsl, req, bi));
+	}
+	else if (from == VENUM(resp_body)) {
+		if (req == NULL || req->objcore == NULL)
+			return ("from = resp_body, "
+				"but no response body found");
+		(void) ObjIterate(req->wrk, req->objcore,
+				  bi, blobdigest_iter_f, 0);
+	}
+	else
+		WRONG("from VENUM");
+	return (NULL);
+}
 static void
 final(const enum algorithm hash, hash_ctx *restrict const hctx,
      uint8_t *restrict result)
@@ -444,17 +525,20 @@ vmod_digest__fini(struct vmod_blobdigest_digest **digestp)
 }
 VCL_BOOL
-vmod_digest_update(VRT_CTX, struct vmod_blobdigest_digest *h, VCL_BLOB b,
+vmod_digest_update(VRT_CTX, struct vmod_blobdigest_digest *h,
-		   VCL_BOOL fail)
+		   struct VARGS(digest_update) *a)
 {
 	struct digest_task *task;
 	hash_ctx *hctx;
+	const char *err = NULL;
 	CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
+	AN(a);
 	CHECK_OBJ_NOTNULL(h, VMOD_BLOBDIGEST_DIGEST_MAGIC);
 	if (h->result != NULL) {
-		VERR(fail, ctx, "already finalized in %s.update()", h->vcl_name);
+		VERR(a->fail, ctx, "already finalized in %s.update()",
+		     h->vcl_name);
 		return (0);
 	}
@@ -462,19 +546,27 @@ vmod_digest_update(VRT_CTX, struct vmod_blobdigest_digest *h, VCL_BLOB b,
 	if (task == NULL)
 		return (0);
-	if (b == NULL) {
+	if (a->valid_msg && a->msg == NULL) {
-		VERR(fail, ctx, "null BLOB passed to %s.update()", h->vcl_name);
+		VERR(a->fail, ctx, "null BLOB passed to %s.update()",
+		     h->vcl_name);
 		return (0);
 	}
 	hctx = INIT_FINI(ctx) ? &h->ctx : &task->ctx;
 	if (task->result != NULL) {
-		VERR(fail, ctx, "already finalized in %s.update()", h->vcl_name);
+		VERR(a->fail, ctx, "already finalized in %s.update()",
+		     h->vcl_name);
+		return (0);
+	}
+	if (a->valid_msg && a->msg->len > 0 && a->msg->blob != NULL)
+		update(h->hash, hctx, a->msg->blob, a->msg->len);
+	if (a->valid_from)
+		err = update_body(ctx, a->from, h->hash, hctx);
+	if (err != NULL) {
+		VERR(a->fail, ctx, "%s in %s.update()", err, h->vcl_name);
 		return (0);
 	}
-	if (b->len > 0 && b->blob != NULL)
-		update(h->hash, hctx, b->blob, b->len);
 	return (1);
 }

--- a/src/vmod_blobdigest.vcc
+++ b/src/vmod_blobdigest.vcc
@@ -223,10 +223,19 @@ Example::
      new sha512 = blobdigest.digest(SHA512, foo.get());
  }
-$Method BOOL .update(BLOB msg, BOOL fail=1)
+$Method BOOL .update([BLOB msg], [ENUM {req_body, bereq_body, resp_body } from],
+    BOOL fail=1)
-Incrementally add the BLOB ``msg`` to the digest context of this
+If the BLOB ``msg`` is provided, incrementally add it to the digest
-object.  Returns ``true`` if and only if the operation was successful.
+context of this object.
+If a ``from`` argument is given, incrementally add the named body to
+the digest context of this object.
+If both arguments are given, they are added in the order documented
+herein.
+Returns ``true`` if and only if the operation was successful.
 As described above: if a digest object is updated in ``vcl_init``,
 then the updated context is valid for all subsequent uses of the
@@ -236,10 +245,11 @@ task (client or backend transaction).
 This method MAY NOT be called after ``.final()`` has been called
 for the same object, either in ``vcl_init`` or in the current task.
-The method returns ``false`` if ``msg`` is NULL, or if it is called
+The method returns ``false`` if ``msg`` is NULL, if the body as per
-after ``.final()``. If the ``fail`` argument is ``true`` (the
+the ``from`` argument is not available from the current context, or if
-default), this also triggers a VCL Error. A ``fail=false`` argument
+it is called after ``.final()``. If the ``fail`` argument is ``true``
-can be used for explicit error handling.
+(the default), this also triggers a VCL Error. A ``fail=false``
+argument can be used for explicit error handling.
 If it fails in ``vcl_init``, the VCL load will fail with an error
 message. If it fails in any other VCL subroutine, an error message is