Commit 5ea8940c authored by Dridi Boukelmoune's avatar Dridi Boukelmoune

hash: Revert recent hash changes

This reverts the following commits:

- e98e8e64.
  "Documentation updates for changed `vcl_hash{}` / `hash_data()`"
- 001279eb.
  "Document proper design pattern for using hash_data() in vcl_recv,"
- e36573e2.
  "Add a test-case for hash_data() in vcl_recv{}"
- 03fe0cee.
  "Allow hash_data() in vcl_recv{}"
- 4ebc3cfe.
  "Make it possible to override the initial digest, and explain in"
- d6ad52f5
  "Change the way we calculate the hash key for the cache."

Conflicts:
	doc/sphinx/reference/dp_vcl_recv_hash.rst
	doc/sphinx/reference/index.rst

Concerns were raised regarding a change of the way we compute the hash
key outside of the dot-zero release where we would expect such breaking
changes (among other things, vmod_shard relies on hash stability).

There is also no definite consensus of how to handle hashing from
vcl_recv.
parent 81aa74cc
......@@ -183,6 +183,19 @@ HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
FREE_OBJ(oh);
}
void
HSH_AddString(struct req *req, void *ctx, const char *str)
{
CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
AN(ctx);
if (str != NULL) {
VSHA256_Update(ctx, str, strlen(str));
VSLb(req->vsl, SLT_Hash, "%s", str);
} else
VSHA256_Update(ctx, &str, 1);
}
/*---------------------------------------------------------------------
* This is a debugging hack to enable testing of boundary conditions
* in the hash algorithm.
......
......@@ -71,6 +71,7 @@ int HSH_DerefObjCore(struct worker *, struct objcore **, int rushmax);
enum lookup_e HSH_Lookup(struct req *, struct objcore **, struct objcore **);
void HSH_Ref(struct objcore *o);
void HSH_AddString(struct req *, void *ctx, const char *str);
unsigned HSH_Purge(struct worker *, struct objhead *, vtim_real ttl_now,
vtim_dur ttl, vtim_dur grace, vtim_dur keep);
struct objcore *HSH_Private(const struct worker *wrk);
......
......@@ -50,6 +50,7 @@
#include "storage/storage.h"
#include "common/heritage.h"
#include "vcl.h"
#include "vsha256.h"
#include "vtim.h"
#define REQ_STEPS \
......@@ -76,14 +77,6 @@
REQ_STEPS
#undef REQ_STEP
/*
* In this specific context we use SHA256 only as a very good
* hashing function. That renders most of the normal concerns
* about salting & seeding moot. However, if for some reason
* you want to salt your hashes, this is where you do it.
*/
static const uint8_t initial_digest[DIGEST_LEN];
/*--------------------------------------------------------------------
* Handle "Expect:" and "Connection:" on incoming request
*/
......@@ -898,6 +891,7 @@ static enum req_fsm_nxt v_matchproto_(req_state_f)
cnt_recv(struct worker *wrk, struct req *req)
{
unsigned recv_handling;
struct VSHA256Context sha256ctx;
const char *ci, *cp, *endpname;
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
......@@ -927,8 +921,6 @@ cnt_recv(struct worker *wrk, struct req *req)
return (REQ_FSM_DONE);
}
memcpy(req->digest, initial_digest, sizeof req->digest);
VCL_recv_method(req->vcl, wrk, req, NULL, NULL);
if (wrk->handling == VCL_RET_FAIL) {
......@@ -970,13 +962,13 @@ cnt_recv(struct worker *wrk, struct req *req)
}
}
if (!memcmp(req->digest, initial_digest, sizeof req->digest)) {
VCL_hash_method(req->vcl, wrk, req, NULL, NULL);
if (wrk->handling == VCL_RET_FAIL)
recv_handling = wrk->handling;
else
assert(wrk->handling == VCL_RET_LOOKUP);
}
VSHA256_Init(&sha256ctx);
VCL_hash_method(req->vcl, wrk, req, NULL, &sha256ctx);
if (wrk->handling == VCL_RET_FAIL)
recv_handling = wrk->handling;
else
assert(wrk->handling == VCL_RET_LOOKUP);
VSHA256_Final(req->digest, &sha256ctx);
switch (recv_handling) {
case VCL_RET_VCL:
......
......@@ -689,29 +689,19 @@ VRT_fail(VRT_CTX, const char *fmt, ...)
VCL_VOID
VRT_hashdata(VRT_CTX, VCL_STRANDS s)
{
struct VSHA256Context sha256ctx;
int i;
CHECK_OBJ_NOTNULL(ctx, VRT_CTX_MAGIC);
CHECK_OBJ_NOTNULL(ctx->req, REQ_MAGIC);
AZ(ctx->specific);
VSHA256_Init(&sha256ctx);
VSHA256_Update(&sha256ctx, ctx->req->digest, sizeof ctx->req->digest);
AN(ctx->specific);
AN(s);
for (i = 0; i < s->n; i++) {
if (s->p[i] != NULL) {
VSHA256_Update(&sha256ctx, s->p[i], strlen(s->p[i]));
VSLb(ctx->req->vsl, SLT_Hash, "%s", s->p[i]);
} else {
VSHA256_Update(&sha256ctx, "", 1);
}
}
for (i = 0; i < s->n; i++)
HSH_AddString(ctx->req, ctx->specific, s->p[i]);
/*
* Add a 'field-separator' to make it more difficult to
* manipulate the hash.
*/
VSHA256_Update(&sha256ctx, "", 1);
VSHA256_Final(ctx->req->digest, &sha256ctx);
HSH_AddString(ctx->req, ctx->specific, NULL);
}
/*--------------------------------------------------------------------*/
......
......@@ -22,5 +22,5 @@ client c1 {
rxresp
expect resp.http.req_hash ~ "[[:xdigit:]]{64}"
expect resp.http.req_hash == resp.http.bereq_hash
expect resp.http.req_hash-sf == ":0jkH41nfmD0PRFsKpM1m7ucOApnxFc62B//mQWxOnmQ=:"
expect resp.http.req_hash-sf == ":3k0f0yRKtKt7akzkyNsTGSDOJAZOQowTwKWhu5+kIu0=:"
} -run
varnishtest "hash_data() in vcl_recv{}"
server s1 {
rxreq
txresp -hdr "Same: One"
} -start
varnish v1 -vcl+backend {
sub vcl_recv {
if (req.url == "/2") {
hash_data(req.http.foo);
}
}
sub vcl_hash {
hash_data(req.url);
return (lookup);
}
} -start
varnish v1 -cliok "param.set vsl_mask +Hash"
client c1 {
txreq -url /1
rxresp
expect resp.status == 200
expect resp.http.same == One
txreq -url /2 -hdr "foo: /1"
rxresp
expect resp.status == 200
expect resp.http.same == One
} -run
server s1 {
rxreq
txresp -hdr "Second: One"
} -start
varnish v1 -vcl+backend {
sub make_hash_key {
hash_data("Documented Design Pattern");
hash_data(req.url);
}
sub vcl_hash {
call make_hash_key;
return (lookup);
}
sub vcl_recv {
if (req.http.early) {
call make_hash_key;
}
}
}
client c1 {
txreq
rxresp
expect resp.status == 200
expect resp.http.second == One
txreq -hdr "early: yes"
rxresp
expect resp.status == 200
expect resp.http.second == One
} -run
......@@ -218,9 +218,9 @@ client c1 {
client c2 {
txreq -url /b/def
rxresp
expect resp.http.hash == "2e03d4fee2722154a84036faa3e4b6851e003830eadbe0d2874e3df09c8efe55"
expect resp.http.hash == "93d1c4ad76396c91dd97fa310f7f26445332662c89393dbeeb77fe49f9111ee4"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0x2e03d4fe
expect resp.http.key -eq 0x93d1c4ad
expect resp.http.alt == 0
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
......@@ -228,9 +228,9 @@ client c2 {
txreq -url /b/hash
rxresp
expect resp.http.hash == "49f33e9019891091d3dcf6edab6d9433b756678bdf5202dd41b8a667c89887b1"
expect resp.http.hash == "e47da20ea4db49d4f22acdadc69f02f445002be520a2865cd3351272add62540"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0x49f33e90
expect resp.http.key -eq 0xe47da20e
expect resp.http.alt == 1
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
......@@ -268,9 +268,9 @@ client c2 {
client c3 {
txreq -url /b/c/hash/def
rxresp
expect resp.http.hash == "dd70dcbbf385c398ee3b53a849f12d0d846fc21292349fb45d37b2d9d8eca25e"
expect resp.http.hash == "df9a465f8a0455c334b24c1638d3adda0f6e64fbe759029ab83602e3b9138884"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0xdd70dcbb
expect resp.http.key -eq 0xdf9a465f
expect resp.http.alt == 7
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
......@@ -278,9 +278,9 @@ client c3 {
txreq -url /b/c/hash/hash
rxresp
expect resp.http.hash == "41d09b9877cd0ac0eab888359b0ad54f0bf41da0ac03dc1b8ae12aff18465a8d"
expect resp.http.hash == "0eb35bc1fab5aad5902fd1bac86540bd13d43aa31c6c46f54e776b43392e66e6"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0x41d09b98
expect resp.http.key -eq 0x0eb35bc1
expect resp.http.alt == 8
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
......@@ -288,9 +288,9 @@ client c3 {
txreq -url /b/c/hash/url
rxresp
expect resp.http.hash == "dcac849e02b3322f5fd3dddf9b9f5fc26d295733e6f1c51b190dfb7239a56e28"
expect resp.http.hash == "1eb67b701ea07151cac5bea1f11b6267b9de15a3ff83cec995590480cbc2c750"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0xdcac849e
expect resp.http.key -eq 0x1eb67b70
expect resp.http.alt == 9
expect resp.http.warmup == "0.500"
expect resp.http.rampup == "true"
......@@ -298,9 +298,9 @@ client c3 {
txreq -url /b/c/hash/key
rxresp
expect resp.http.hash == "112393761506e85f0c700a5d669a48b54001c870eb2e8d95f4d2f6fdccbe80a3"
expect resp.http.hash == "a11b617e21aa7db22b6205d7612002e595b1b00d8c11602017f65456a1be3a35"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0x11239376
expect resp.http.key -eq 0xa11b617e
expect resp.http.alt == 10
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "false"
......@@ -308,9 +308,9 @@ client c3 {
txreq -url /b/c/hash/blob
rxresp
expect resp.http.hash == "5ef050c1185ac02a66d9f79703b8cd5f0636abf3b1f15b9f22e0fe64df985d28"
expect resp.http.hash == "d7eecc0ac83e1727332dcd8c7c8ae9f3114123abb2bf7e3fb15ecea8c84bb239"
expect resp.http.by == "HASH"
expect resp.http.key -eq 0x5ef050c1
expect resp.http.key -eq 0xd7eecc0a
expect resp.http.alt == 11
expect resp.http.warmup == "-1.000"
expect resp.http.rampup == "true"
......
......@@ -35,15 +35,6 @@ release process.
Varnish Cache Next (2021-03-15)
================================
* `hash_data()` can be called from `vcl_recv`, in which case
`vcl_hash` is not called. This allows `vcl_recv` and
backends to take the object identity into account, for
instance when choosing backend and grace periods.
* `hash_data()` calculates the hash-key differently than previously.
This means that persistent storage will be lost, and it may break
very specific `*.vtc` test-scripts.
* counters MAIN.s_req_bodybytes and VBE.*.tools.beresp_bodybytes
are now always the number of bodybytes moved on the wire.
......
..
Copyright (c) 2021 Varnish Software AS
SPDX-License-Identifier: BSD-2-Clause
See LICENSE file for full text of license
.. _db_vcl_recv_hash:
Hashing in `vcl_recv{}`
=======================
Calculating the `hash` used for cache lookup already in `vcl_recv{}`
makes it possible for certain directors to offer targeted health status.
To ensure consistent hashing, use this design pattern::
sub make_hash_key {
hash_data([…]);
}
sub vcl_hash {
call make_hash_key;
return (lookup);
}
sub vcl_recv {
[…]
call make_hash_key;
[…]
}
......@@ -27,7 +27,6 @@ VCL Design Patterns
.. toctree::
:maxdepth: 1
dp_vcl_recv_hash.rst
dp_vcl_resp_status.rst
Bundled VMODs
......
......@@ -355,9 +355,7 @@ hash_data(input)
~~~~~~~~~~~~~~~~
Adds an input to the hash input. In the built-in VCL ``hash_data()``
is called on the host and URL of the request.
Available in ``vcl_hash`` or ``vcl_recv``. If used in ``vcl_recv``
``vcl_hash`` will not be called.
is called on the host and URL of the request. Available in ``vcl_hash``.
synthetic(STRING)
~~~~~~~~~~~~~~~~~
......
......@@ -134,9 +134,8 @@ of the following keywords:
vcl_hash
~~~~~~~~
Called after `vcl_recv` to create a hash value for the request,
unless `vcl_recv` already did that.
This is used as the key to store and look up objects in the cache.
Called after `vcl_recv` to create a hash value for the request. This is
used as a key to look up the object in Varnish.
The `vcl_hash` subroutine may terminate with calling ``return()`` with one
of the following keywords:
......
......@@ -6,10 +6,12 @@
Hashing
-------
Internally, when Varnish stores content in the cache indexed by a hash
key used to find the object again. In the default setup
this key is calculated based on `URL`, the `Host:` header, or
if there is none, the IP address of the server::
Internally, when Varnish stores content in the cache it stores the object
together with a hash key to find the object again. In the default setup
this key is calculated based on the content of the *Host* header or the
IP address of the server and the URL.
Behold the `default vcl`::
sub vcl_hash {
hash_data(req.url);
......@@ -21,7 +23,7 @@ if there is none, the IP address of the server::
return (lookup);
}
As you can see it first hashes `req.url` and then `req.http.host` if
As you can see it first checks in `req.url` then `req.http.host` if
it exists. It is worth pointing out that Varnish doesn't lowercase the
hostname or the URL before hashing it so in theory having "Varnish.org/"
and "varnish.org/" would result in different cache entries. Browsers
......@@ -45,16 +47,7 @@ And then add a `vcl_hash`::
hash_data(req.http.X-Country-Code);
}
Because there is no `return(lookup)`, the builtin VCL will take care
of adding the URL, `Host:` or server IP# to the hash as usual.
If `vcl_hash` did return, ie::
sub vcl_hash {
hash_data(req.http.X-Country-Code);
return(lookup);
}
then *only* the country-code would matter, and Varnish would return
seemingly random objects, ignoring the URL, (but they would always
have the correct `X-Country-Code`).
As the default VCL will take care of adding the host and URL to the hash
we don't have to do anything else. Be careful calling ``return (lookup)``
as this will abort the execution of the default VCL and Varnish can end
up returning data based on more or less random inputs.
......@@ -54,7 +54,6 @@
* binary/load-time compatible, increment MAJOR version
*
* 13.0 (2021-03-15)
* VRT_hashdata() produces different hash-keys.
* Move VRT_synth_page() to deprecated status
* Add VRT_synth_strands() and VRT_synth_blob()
* struct vrt_type now produced by generate.py
......
......@@ -452,7 +452,7 @@ vcc_Action_Init(struct vcc *tl)
ACT(ban, vcc_act_ban, 0);
ACT(call, vcc_act_call, 0);
ACT(hash_data, vcc_act_hash_data,
VCL_MET_RECV | VCL_MET_HASH);
VCL_MET_HASH);
ACT(if, vcc_Act_If, 0);
ACT(new, vcc_Act_New,
VCL_MET_INIT);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment