Commit b3344f26 authored by Martin Blix Grydeland's avatar Martin Blix Grydeland Committed by Reza Naghibi

Govern thread creation by queue length instead of dry signals

When getidleworker signals the pool herder to spawn a thread, it increases
the dry counter, and the herder resets dry again when spawning a single
thread. This will in many cases only create a single thread even though
the herder was signaled dry multiple times, and may cause a situation
where the cache acceptor is queued and no new thread is created. Together
with long lasting tasks (ie endless pipelines), and all other tasks having
higher priority, this will prevent the cache acceptor from being
rescheduled.

c00096.vtc demonstrates how this can lock up.

To fix this, spawn threads if we have queued tasks and we are below the
thread maximum level.

Conflicts:
    bin/varnishd/cache/cache_wrk.c
parent 8f8c85c4
......@@ -48,7 +48,6 @@ struct pool {
struct taskhead idle_queue;
struct taskhead queues[TASK_QUEUE_END];
unsigned nthr;
unsigned dry;
unsigned lqueue;
uintmax_t sdropped;
uintmax_t rdropped;
......
......@@ -194,13 +194,8 @@ pool_getidleworker(struct pool *pp, enum task_prio prio)
AZ(pp->nidle);
}
if (pt == NULL) {
if (pp->nthr < cache_param->wthread_max) {
pp->dry++;
AZ(pthread_cond_signal(&pp->herder_cond));
}
if (pt == NULL)
return (NULL);
}
AZ(pt->func);
CAST_OBJ_NOTNULL(wrk, pt->priv, WORKER_MAGIC);
return (wrk);
......@@ -303,6 +298,7 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum task_prio prio)
pp->nqueued++;
pp->lqueue++;
VTAILQ_INSERT_TAIL(&pp->queues[prio], task, list);
AZ(pthread_cond_signal(&pp->herder_cond));
} else {
if (prio == TASK_QUEUE_REQ)
pp->sdropped++;
......@@ -473,7 +469,6 @@ pool_breed(struct pool *qp)
Lck_Unlock(&pool_mtx);
VTIM_sleep(cache_param->wthread_fail_delay);
} else {
qp->dry = 0;
qp->nthr++;
Lck_Lock(&pool_mtx);
VSC_C_main->threads++;
......@@ -549,7 +544,7 @@ pool_herder(void *priv)
/* Make more threads if needed and allowed */
if (pp->nthr < wthread_min ||
(pp->dry && pp->nthr < cache_param->wthread_max)) {
(pp->lqueue > 0 && pp->nthr < cache_param->wthread_max)) {
pool_breed(pp);
continue;
}
......@@ -610,16 +605,14 @@ pool_herder(void *priv)
continue;
}
Lck_Lock(&pp->mtx);
if (!pp->dry) {
if (pp->lqueue == 0) {
if (DO_DEBUG(DBG_VTC_MODE))
delay = 0.5;
(void)Lck_CondWait(&pp->herder_cond, &pp->mtx,
VTIM_real() + delay);
} else {
} else
/* XXX: unsafe counters */
VSC_C_main->threads_limited++;
pp->dry = 0;
}
Lck_Unlock(&pp->mtx);
}
return (NULL);
......
varnishtest "Test thread creation on acceptor thread queuing"
# This tests that we are able to spawn new threads in the event that the
# cache acceptor has been queued. It does this by starting 6 long lasting
# fetches, which will consume 12 threads. That exceeds the initial
# allotment of 10 threads, giving some probability that the acceptor
# thread is queued. Then a single quick fetch is done, which should be
# served since we are well below the maximum number of threads allowed.
# Barrier b1 blocks the slow servers from finishing until the quick fetch
# is done.
barrier b1 cond 7
# Barrier b2 blocks the start of the quick fetch until all slow fetches
# are known to hold captive two threads each.
barrier b2 cond 7
server s0 {
rxreq
txresp -nolen -hdr "Content-Length: 10" -hdr "Connection: close"
send "123"
barrier b1 sync
send "4567890"
expect_close
} -dispatch
server stest {
rxreq
txresp -body "All good"
} -start
varnish v1 -arg "-p debug=+syncvsl -p debug=+flush_head"
varnish v1 -arg "-p thread_pools=1 -p thread_pool_min=10"
varnish v1 -vcl+backend {
sub vcl_backend_fetch {
if (bereq.url == "/test") {
set bereq.backend = stest;
} else {
set bereq.backend = s0;
}
}
} -start
# NB: we might go above 10 threads when early tasks are submitted to
# the pool since at least one idle thread must be kept in the pool
# reserve.
varnish v1 -expect MAIN.threads >= 10
client c1 {
txreq -url /1
rxresphdrs
barrier b2 sync
rxrespbody
} -start
client c2 {
txreq -url /2
rxresphdrs
barrier b2 sync
rxrespbody
} -start
client c3 {
txreq -url /3
rxresphdrs
barrier b2 sync
rxrespbody
} -start
client c4 {
txreq -url /4
rxresphdrs
barrier b2 sync
rxrespbody
} -start
client c5 {
txreq -url /5
rxresphdrs
barrier b2 sync
rxrespbody
} -start
client c6 {
txreq -url /6
rxresphdrs
barrier b2 sync
rxrespbody
} -start
client ctest {
barrier b2 sync
txreq -url "/test"
rxresp
expect resp.status == 200
expect resp.body == "All good"
} -run
barrier b1 sync
client c1 -wait
client c2 -wait
client c3 -wait
client c4 -wait
client c5 -wait
client c6 -wait
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment