Fix a race between vdp_pesi_fini() and vped_task() on done flag

To terminate a subrequest, we need to do two things:

- fini the task (signal that the thread handling the subrequest in
  parallel is about to terminate)

- signal that the node representing the subrequest can be picked up by
  esi_level 0 delivery

From the perspective of delivery (unpending), the former needs to
happen before the latter (delivery needs to be sure that no other
thread is working on the node), so we call task_fini() before
signalling that the node done status has changed.

The top level delivery thread, however, might need to tear down the
tree, which assumes that all subrequests have finished. For this, it
checks the task list and waits for it to become empty.

The assumption was that when this is the case, the tree can not be in
use any more, however it could be in vped_task() between task_fini and
releasing the tree lock.

We solve this race by taking the tree lock before finally destroying
it.

Fixes https://gitlab.com/uplex/varnish/libvdp-pesi/-/issues/2
parent 6b9ae172
varnishtest "subreq.done race #2"
# https://gitlab.com/uplex/varnish/libvdp-pesi/-/issues/2
# needs source patch to reproduce
# diff --git a/src/vdp_pesi.c b/src/vdp_pesi.c
# index 12a5014..5322e0c 100644
# --- a/src/vdp_pesi.c
# +++ b/src/vdp_pesi.c
# @@ -232,6 +232,7 @@ vped_task(struct worker *wrk, void *priv)
# assert(pesi == req->transport_priv);
# Lck_Lock(&pesi_tree->tree->tree_lock);
# task_fini(pesi_tree, pesi);
# + sleep(1);
# node->subreq.done = 1;
# AZ(pthread_cond_signal(&node->subreq.cond));
# Lck_Unlock(&pesi_tree->tree->tree_lock);
server s1 {
rxreq
expect req.url == "/"
txresp -body {
<html>
Before include
<esi:include src="/esi"/>
After include
</html>
}
rxreq
expect req.url == "/esi"
txresp -body {
<html>
Before include
<esi:include src="/synth"/>
<esi:include src="/body"/>
After include
</html>
}
} -start
server s2 {
rxreq
expect req.url == "/body"
delay 2
txresp -body {
Included file
}
} -start
varnish v1 -arg "-p debug=+syncvsl,+waitinglist" -vcl+backend {
import vtc;
import ${vmod_pesi};
import ${vmod_pesi_debug};
include "debug.inc.vcl";
sub vcl_recv {
if (req.url == "/synth") {
return (synth(200));
}
}
sub vcl_backend_fetch {
if (bereq.url == "/" || bereq.url == "/esi") {
set bereq.backend = s1;
}
else {
set bereq.backend = s2;
}
}
sub vcl_backend_response {
if (bereq.url == "/" || bereq.url == "/esi") {
set beresp.do_esi = true;
}
}
sub vcl_deliver {
pesi.activate();
}
} -start
client c1 {
txreq -url "/body" -hdr "Host: foo"
rxresp
} -start
delay .2
client c2 {
txreq -hdr "Host: foo"
rxresphdrs
rxchunk
} -start
client c1 -wait
client c2 -wait
server s1 -wait
server s2 -wait
......@@ -838,14 +838,14 @@ vdp_pesi_fini(struct vdp_ctx *vdc, void **priv)
&pesi_tree->task_lock));
Lck_Unlock(&pesi_tree->task_lock);
}
/* no locking required because no other tasks are running */
/*
* prevent race with vped_task()
*/
Lck_Lock(&pesi_tree->tree->tree_lock);
assert_node(bytes_tree->root, CHK_ANY);
/*
* for an early client close, our subrequests had no chance to run
*
* we likely need more error handling
*/
tree_prune(req->vdc, bytes_tree->root);
......@@ -854,6 +854,7 @@ vdp_pesi_fini(struct vdp_ctx *vdc, void **priv)
Lck_Delete(&pesi_tree->task_lock);
AZ(pthread_cond_destroy(&pesi_tree->task_cond));
Lck_Unlock(&pesi_tree->tree->tree_lock);
Lck_Delete(&bytes_tree->tree_lock);
AZ(pthread_cond_destroy(&bytes_tree->cond));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment