When connecting to backends, respect the administrative health

When making a connection a "real" backend (VBE), we checked the probed
health state and did not take into account the administrative health
state as set with `varnishadm backend.set_health ... {healthy,sick}`.

Our documentation was not particularly explicit on this aspect either,
but the administrative states `sick` and `healthy` made no sense if
`auto` semantics was implied always. Also, the semantics were implicitly
documented for `backend.list`.

Implementation note:

The relevant change is to call `VRT_Healthy()`, which does check the
administrative health, in place of checking `(struct backend *)->sick`
in `vbe_dir_getfd()`.

As a `VRT_CTX` is required by `VRT_Healthy()`, we change the arguments of
`vbe_dir_getfd()` accordingly: The busyobj can now be taken from the ctx,
but the worker argument differs for pipe mode vs. fetch, so we preserve
it as an explicit argument.

A test for overriding a probed backend as healthy has been added to
c00048.vtc, which requires a second probe to hit server s1 and fail.
This is timing sensitive, so I hope that the backend probe interval
of 5 seconds is long enough for all our test environments. If not,
we probably need to make it longer or add another vtc.

Fixes #3299
parent 448e2edc
......@@ -110,12 +110,15 @@ VBE_Connect_Error(struct VSC_vbe *vsc, int err)
/*--------------------------------------------------------------------
* Get a connection to the backend
*
* note: wrk is a separate argument because it differs for pipe vs. fetch
*/
static struct pfd *
vbe_dir_getfd(struct worker *wrk, struct backend *bp, struct busyobj *bo,
vbe_dir_getfd(VRT_CTX, struct worker *wrk, struct backend *bp,
unsigned force_fresh)
{
struct busyobj *bo;
struct pfd *pfd;
int *fdp, err;
vtim_dur tmod;
......@@ -123,11 +126,12 @@ vbe_dir_getfd(struct worker *wrk, struct backend *bp, struct busyobj *bo,
char pbuf1[VTCP_PORTBUFSIZE], pbuf2[VTCP_PORTBUFSIZE];
CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
CHECK_OBJ_NOTNULL(bo, BUSYOBJ_MAGIC);
CHECK_OBJ_NOTNULL(ctx->bo, BUSYOBJ_MAGIC);
bo = ctx->bo;
CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC);
AN(bp->vsc);
if (bp->sick) {
if (! VRT_Healthy(ctx, bp->director, NULL)) {
VSLb(bo->vsl, SLT_FetchError,
"backend %s: unhealthy", VRT_BACKEND_string(bp->director));
bp->vsc->unhealthy++;
......@@ -280,7 +284,7 @@ vbe_dir_gethdrs(VRT_CTX, VCL_BACKEND d)
http_PrintfHeader(bo->bereq, "Host: %s", bp->hosthdr);
do {
pfd = vbe_dir_getfd(wrk, bp, bo, extrachance == 0 ? 1 : 0);
pfd = vbe_dir_getfd(ctx, wrk, bp, extrachance == 0 ? 1 : 0);
if (pfd == NULL)
return (-1);
AN(bo->htc);
......@@ -365,7 +369,7 @@ vbe_dir_http1pipe(VRT_CTX, VCL_BACKEND d)
ctx->req->res_mode = RES_PIPE;
pfd = vbe_dir_getfd(ctx->req->wrk, bp, ctx->bo, 0);
pfd = vbe_dir_getfd(ctx, ctx->req->wrk, bp, 0);
if (pfd == NULL) {
retval = SC_TX_ERROR;
......
varnishtest "Forcing health of backends"
server s1 -repeat 3 {
barrier b1 cond 2
server s1 {
# probe
rxreq
txresp
# req
accept
rxreq
txresp
rxreq
txresp -hdr "Connection: close"
# probe sick
accept
rxreq
txresp -status 500
barrier b1 sync
accept
# req
rxreq
txresp
} -start
......@@ -13,7 +34,7 @@ varnish v1 -vcl {
.window = 8;
.initial = 7;
.threshold = 8;
.interval = 10s;
.interval = 5s;
}
}
......@@ -60,6 +81,18 @@ client c1 {
expect resp.status == 200
} -run
# wait for sick probe
barrier b1 sync
# healthy overrides probe
varnish v1 -cliok "backend.list"
client c1 {
txreq
rxresp
expect resp.status == 200
} -run
varnish v1 -vsl_catchup
varnish v1 -clierr 106 "backend.set_health s1 foo"
......
......@@ -312,8 +312,11 @@ CLI_CMD(BACKEND_LIST,
CLI_CMD(BACKEND_SET_HEALTH,
"backend.set_health",
"backend.set_health <backend_pattern> [auto|healthy|sick]",
"Set health status on the backends.",
"",
"Set health status of backend(s) matching <backend_pattern>.",
" * With ``auto``, the health status is determined by a probe\n"
" or some other dynamic mechanism, if any\n"
" * ``healthy`` sets the backend as usable\n"
" * ``sick`` sets the backend as unsable\n",
2, 2
)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment