Commit d44838bf authored by Poul-Henning Kamp's avatar Poul-Henning Kamp

The acceptor already has a back-off feature for when we run out of

filedescriptors, generalize that concept and use it for all cases
where we cannot accept and/or serve the connection:  (Lack of sessions,
lack of workerthreads).

This is controlled by three paramters:

Everytime we run into trouble, we increase the sleep-time by:
	acceptor_sleep_incr (0.001 s)

But we never let it get above
	acceptor_sleep_max  (0.050 s)

Once we manage to accept and schedule a connection, we multiply
the sleep-time by:
	acceptor_sleep_decay (0.9)

The default numbers are more or less picked out of thin air.

Two new stats counters help us keep track of this:
	accept_fail
		where accept(2) returns error.  This can be out of
		file-descriptors, but also clients which closed while
		they were stuck in the accept-queue.  Under normal
		operation, a minor trickle is probably to be expected.

	client_drop
		New connection dropped, because we could not get a
		session for it, or because the workerthreads were
		too busy.

	client_drop_late
		An previously served connection was dropped for the
		same reasons.

As always, feedback welcome.




git-svn-id: http://www.varnish-cache.org/svn/trunk/varnish-cache@4497 d4fa192b-c00b-0410-8231-f00ffab90ce4
parent 953bf626
......@@ -187,7 +187,7 @@ vca_acct(void *arg)
struct pollfd *pfd;
struct listen_sock *ls;
unsigned u;
double now;
double now, pace;
THR_SetName("cache-acceptor");
(void)arg;
......@@ -207,6 +207,7 @@ vca_acct(void *arg)
}
need_test = 1;
pace = 0;
while (1) {
#ifdef SO_SNDTIMEO_WORKS
if (params->send_timeout != tv_sndtimeo.tv_sec) {
......@@ -234,6 +235,13 @@ vca_acct(void *arg)
}
}
#endif
/* Bound the pacing delay by parameter */
if (pace > params->acceptor_sleep_max)
pace = params->acceptor_sleep_max;
if (pace < params->acceptor_sleep_incr)
pace = 0.0;
if (pace > 0.0)
TIM_sleep(pace);
i = poll(pfd, heritage.nsocks, 1000);
now = TIM_real();
u = 0;
......@@ -247,6 +255,7 @@ vca_acct(void *arg)
addr = (void*)&addr_s;
i = accept(ls->sock, addr, &l);
if (i < 0) {
VSL_stats->accept_fail++;
switch (errno) {
case EAGAIN:
case ECONNABORTED:
......@@ -255,14 +264,13 @@ vca_acct(void *arg)
VSL(SLT_Debug, ls->sock,
"Too many open files "
"when accept(2)ing. Sleeping.");
TIM_sleep(
params->accept_fd_holdoff * 0.001);
pace += params->acceptor_sleep_incr;
break;
default:
VSL(SLT_Debug, ls->sock,
"Accept failed: %s",
strerror(errno));
/* XXX: stats ? */
pace += params->acceptor_sleep_incr;
break;
}
continue;
......@@ -271,6 +279,7 @@ vca_acct(void *arg)
if (sp == NULL) {
AZ(close(i));
VSL_stats->client_drop++;
pace += params->acceptor_sleep_incr;
continue;
}
sp->fd = i;
......@@ -283,7 +292,12 @@ vca_acct(void *arg)
sp->sockaddrlen = l;
sp->step = STP_FIRST;
WRK_QueueSession(sp);
if (WRK_QueueSession(sp)) {
VSL_stats->client_drop++;
pace += params->acceptor_sleep_incr;
} else {
pace *= params->acceptor_sleep_decay;
}
}
}
NEEDLESS_RETURN(NULL);
......@@ -306,7 +320,8 @@ vca_handover(struct sess *sp, int status)
break;
case 1:
sp->step = STP_START;
WRK_QueueSession(sp);
if (WRK_QueueSession(sp))
VSL_stats->client_drop_late++;
break;
default:
INCOMPL();
......
......@@ -463,7 +463,14 @@ hsh_rush(struct objhead *oh)
AZ(sp->wrk);
VTAILQ_REMOVE(&oh->waitinglist, sp, list);
DSL(0x20, SLT_Debug, sp->id, "off waiting list");
WRK_QueueSession(sp);
if (WRK_QueueSession(sp)) {
/*
* We could not schedule the session, leave the
* rest on the busy list.
*/
VSL_stats->client_drop_late++;
break;
}
}
}
......
......@@ -186,9 +186,10 @@ struct params {
/* Acceptable clockskew with backends */
unsigned clock_skew;
/* Amount of time to sleep when running out of file
descriptors. In msecs */
unsigned accept_fd_holdoff;
/* Acceptor pacer parameters */
double acceptor_sleep_max;
double acceptor_sleep_incr;
double acceptor_sleep_decay;
/* Get rid of duplicate purges */
unsigned purge_dups;
......
......@@ -132,7 +132,6 @@ tweak_timeout_double(struct cli *cli, const struct parspec *par,
cli_out(cli, "%.6f", *dest);
}
#if 0
/*--------------------------------------------------------------------*/
static void
......@@ -163,7 +162,6 @@ tweak_generic_double(struct cli *cli, const struct parspec *par,
} else
cli_out(cli, "%f", *dest);
}
#endif
/*--------------------------------------------------------------------*/
......@@ -703,12 +701,30 @@ static const struct parspec input_parspec[] = {
"and backend request. This parameter does not apply to pipe.",
0,
"60", "s" },
{ "accept_fd_holdoff", tweak_timeout,
&master.accept_fd_holdoff, 0, 3600*1000,
"If we run out of file descriptors, the accept thread will "
"sleep. This parameter control for how long it will sleep.",
{ "acceptor_sleep_max", tweak_timeout_double,
&master.acceptor_sleep_max, 0, 10,
"If we run out of resources, such as file descriptors or "
"worker threads, the acceptor will sleep between accepts.\n"
"This parameter limits how long it can sleep between "
"attempts to accept new connections.",
EXPERIMENTAL,
"50", "ms" },
"0.050", "s" },
{ "acceptor_sleep_incr", tweak_timeout_double,
&master.acceptor_sleep_incr, 0, 1,
"If we run out of resources, such as file descriptors or "
"worker threads, the acceptor will sleep between accepts.\n"
"This parameter control how much longer we sleep, each time "
"we fail to accept a new connection.",
EXPERIMENTAL,
"0.001", "s" },
{ "acceptor_sleep_decay", tweak_generic_double,
&master.acceptor_sleep_decay, 0, 1,
"If we run out of resources, such as file descriptors or "
"worker threads, the acceptor will sleep between accepts.\n"
"This parameter (multiplicatively) reduce the sleep duration "
"for each succesfull accept. (ie: 0.9 = reduce by 10%)",
EXPERIMENTAL,
"0.900", "" },
{ "clock_skew", tweak_uint, &master.clock_skew, 0, UINT_MAX,
"How much clockskew we are willing to accept between the "
"backend and our own clock.",
......
......@@ -33,7 +33,7 @@
*/
MAC_STAT(client_conn, uint64_t, 0, 'a', "Client connections accepted")
MAC_STAT(client_drop, uint64_t, 0, 'a', "Connection dropped, no sess")
MAC_STAT(client_drop, uint64_t, 0, 'a', "Connection dropped, no sess/wrk")
MAC_STAT(client_req, uint64_t, 1, 'a', "Client requests received")
MAC_STAT(cache_hit, uint64_t, 1, 'a', "Cache hits")
......@@ -149,3 +149,5 @@ MAC_STAT(hcb_insert, uint64_t, 0, 'a', "HCB Inserts")
MAC_STAT(esi_parse, uint64_t, 0, 'a', "Objects ESI parsed (unlock)")
MAC_STAT(esi_errors, uint64_t, 0, 'a', "ESI parse errors (unlock)")
MAC_STAT(accept_fail, uint64_t, 0, 'a', "Accept failures")
MAC_STAT(client_drop_late, uint64_t, 0, 'a', "Connection dropped late")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment