Commit f91fc4d9 authored by Martin Blix Grydeland's avatar Martin Blix Grydeland

Turn on SO_KEEPALIVE on all TCP connections.

This will help in determining remote hang up of the connection for
situations where we still are not able to send any reply, but freeing
the session will reduce resource overhead (e.g. when staying on
waitinglists for extended periods).

On platforms that support it also add runtime parameters to control
the keep-alive packet settings through socket options. On platforms
that don't support these socket options, the values must be set system
wide.

The Varnish runtime parameters will only be applied when they are less
than the system default.
parent 4fa4a80c
......@@ -70,8 +70,26 @@ static const struct linger linger = {
.l_onoff = 0,
};
/*
* We turn on keepalives by default to assist in detecting clients that have
* hung up on connections returning from waitinglists
*/
static const int keepalive = 1;
static unsigned char need_sndtimeo, need_rcvtimeo, need_linger, need_test,
need_tcpnodelay;
static unsigned char need_keepalive = 0;
#ifdef HAVE_TCP_KEEP
static unsigned char need_ka_time = 0;
static unsigned char need_ka_probes = 0;
static unsigned char need_ka_intvl = 0;
static int ka_time_cur = 0;
static int ka_probes_cur = 0;
static int ka_intvl_cur = 0;
static int ka_time, ka_time_sys;
static int ka_probes, ka_probes_sys;
static int ka_intvl, ka_intvl_sys;
#endif
/*--------------------------------------------------------------------
* Some kernels have bugs/limitations with respect to which options are
......@@ -83,6 +101,10 @@ static void
sock_test(int fd)
{
struct linger lin;
int tka;
#ifdef HAVE_TCP_KEEP
int tka_time, tka_probes, tka_intvl;
#endif
struct timeval tv;
socklen_t l;
int i, tcp_nodelay;
......@@ -97,6 +119,48 @@ sock_test(int fd)
if (memcmp(&lin, &linger, l))
need_linger = 1;
l = sizeof tka;
i = getsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &tka, &l);
if (i) {
VTCP_Assert(i);
return;
}
assert(l == sizeof tka);
if (tka != keepalive)
need_keepalive = 1;
#ifdef HAVE_TCP_KEEP
l = sizeof tka_time;
i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &tka_time, &l);
if (i) {
VTCP_Assert(i);
return;
}
assert(l == sizeof tka_time);
if (tka_time != ka_time_cur)
need_ka_time = 1;
l = sizeof tka_probes;
i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &tka_probes, &l);
if (i) {
VTCP_Assert(i);
return;
}
assert(l == sizeof tka_probes);
if (tka_probes != ka_probes_cur)
need_ka_probes = 1;
l = sizeof tka_intvl;
i = getsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &tka_intvl, &l);
if (i) {
VTCP_Assert(i);
return;
}
assert(l == sizeof tka_intvl);
if (tka_intvl != ka_intvl_cur)
need_ka_intvl = 1;
#endif
#ifdef SO_SNDTIMEO_WORKS
l = sizeof tv;
i = getsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &tv, &l);
......@@ -281,6 +345,22 @@ VCA_SetupSess(struct worker *wrk, struct sess *sp)
if (need_linger)
VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_LINGER,
&linger, sizeof linger));
if (need_keepalive)
VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_KEEPALIVE,
&keepalive, sizeof keepalive));
#ifdef HAVE_TCP_KEEP
AN(ka_time);
if (need_ka_time)
VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPIDLE,
&ka_time_cur, sizeof ka_time_cur));
if (need_ka_probes)
VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPCNT,
&ka_probes_cur, sizeof ka_probes_cur));
if (need_ka_intvl)
VTCP_Assert(setsockopt(sp->fd, IPPROTO_TCP, TCP_KEEPINTVL,
&ka_intvl_cur, sizeof ka_intvl_cur));
#endif
#ifdef SO_SNDTIMEO_WORKS
if (need_sndtimeo)
VTCP_Assert(setsockopt(sp->fd, SOL_SOCKET, SO_SNDTIMEO,
......@@ -312,10 +392,19 @@ vca_acct(void *arg)
struct listen_sock *ls;
double t0, now;
int i;
#ifdef HAVE_TCP_KEEP
socklen_t len;
#endif
THR_SetName("cache-acceptor");
(void)arg;
#ifdef HAVE_TCP_KEEP
ka_time = cache_param->tcp_keepalive_time;
ka_probes = cache_param->tcp_keepalive_probes;
ka_intvl = cache_param->tcp_keepalive_intvl;
#endif
VTAILQ_FOREACH(ls, &heritage.socks, list) {
if (ls->sock < 0)
continue;
......@@ -324,6 +413,50 @@ vca_acct(void *arg)
&linger, sizeof linger));
AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_NODELAY,
&tcp_nodelay, sizeof tcp_nodelay));
AZ(setsockopt(ls->sock, SOL_SOCKET, SO_KEEPALIVE,
&keepalive, sizeof keepalive));
#ifdef HAVE_TCP_KEEP
if (!ka_time_cur) {
len = sizeof ka_time_sys;
AZ(getsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
&ka_time_sys, &len));
assert(len == sizeof ka_time_sys);
AN(ka_time_sys);
ka_time_cur = ka_time =
(ka_time_sys < cache_param->tcp_keepalive_time ?
ka_time_sys : cache_param->tcp_keepalive_time);
}
AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPIDLE,
&ka_time_cur, sizeof ka_time_cur));
if (!ka_probes_cur) {
len = sizeof ka_probes_sys;
AZ(getsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
&ka_probes_sys, &len));
assert(len == sizeof ka_probes_sys);
AN(ka_probes_sys);
ka_probes_cur = ka_probes =
(ka_probes_sys < cache_param->tcp_keepalive_probes ?
ka_probes_sys :
cache_param->tcp_keepalive_probes);
}
AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPCNT,
&ka_probes_cur, sizeof ka_probes_cur));
if (!ka_intvl_cur) {
len = sizeof ka_intvl_sys;
AZ(getsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
&ka_intvl_sys, &len));
assert(len == sizeof ka_intvl_sys);
AN(ka_intvl_sys);
ka_intvl_cur = ka_intvl =
(ka_intvl_sys < cache_param->tcp_keepalive_intvl ?
ka_intvl_sys :
cache_param->tcp_keepalive_intvl);
}
AZ(setsockopt(ls->sock, IPPROTO_TCP, TCP_KEEPINTVL,
&ka_intvl_cur, sizeof ka_intvl_cur));
#endif
if (cache_param->accept_filter) {
i = VTCP_filter_http(ls->sock);
if (i)
......@@ -339,6 +472,35 @@ vca_acct(void *arg)
t0 = VTIM_real();
while (1) {
(void)sleep(1);
#ifdef HAVE_TCP_KEEP
ka_time = (ka_time_sys < cache_param->tcp_keepalive_time ?
ka_time_sys : cache_param->tcp_keepalive_time);
ka_probes = (ka_probes_sys < cache_param->tcp_keepalive_probes ?
ka_probes_sys : cache_param->tcp_keepalive_probes);
ka_intvl = (ka_intvl_sys < cache_param->tcp_keepalive_intvl ?
ka_intvl_sys : cache_param->tcp_keepalive_intvl);
if (ka_time_cur != ka_time ||
ka_probes_cur != ka_probes ||
ka_intvl_cur != ka_intvl) {
need_test = 1;
ka_time_cur = ka_time;
ka_probes_cur = ka_probes;
ka_intvl_cur = ka_intvl;
VTAILQ_FOREACH(ls, &heritage.socks, list) {
if (ls->sock < 0)
continue;
AZ(setsockopt(ls->sock, IPPROTO_TCP,
TCP_KEEPIDLE,
&ka_time_cur, sizeof ka_time_cur));
AZ(setsockopt(ls->sock, IPPROTO_TCP,
TCP_KEEPCNT,
&ka_probes_cur, sizeof ka_probes_cur));
AZ(setsockopt(ls->sock, IPPROTO_TCP,
TCP_KEEPINTVL,
&ka_intvl_cur, sizeof ka_intvl_cur));
}
}
#endif
#ifdef SO_SNDTIMEO_WORKS
if (cache_param->idle_send_timeout != send_timeout) {
need_test = 1;
......
......@@ -110,6 +110,11 @@ struct params {
unsigned pipe_timeout;
unsigned send_timeout;
unsigned idle_send_timeout;
#ifdef HAVE_TCP_KEEP
unsigned tcp_keepalive_time;
unsigned tcp_keepalive_probes;
unsigned tcp_keepalive_intvl;
#endif
/* Management hints */
unsigned auto_restart;
......
......@@ -205,6 +205,31 @@ const struct parspec mgt_parspec[] = {
"See setsockopt(2) under SO_SNDTIMEO for more information.",
DELAYED_EFFECT,
"60", "seconds" },
#ifdef HAVE_TCP_KEEP
{ "tcp_keepalive_time", tweak_timeout, &mgt_param.tcp_keepalive_time,
1, 7200,
"The number of seconds a connection needs to be idle before "
"TCP begins sending out keep-alive probes. Note that this "
"setting will only take effect when it is less than the "
"system default.",
EXPERIMENTAL,
"600", "seconds" },
{ "tcp_keepalive_probes", tweak_uint, &mgt_param.tcp_keepalive_probes,
1, 100,
"The maximum number of TCP keep-alive probes to send before "
"giving up and killing the connection if no response is "
"obtained from the other end. Note that this setting will "
"only take effect when it is less than the system default.",
EXPERIMENTAL,
"5", "probes" },
{ "tcp_keepalive_intvl", tweak_timeout, &mgt_param.tcp_keepalive_intvl,
1, 100,
"The number of seconds between TCP keep-alive probes. Note "
"that this setting will only take effect when it is less than"
"the system default.",
EXPERIMENTAL,
"5", "seconds" },
#endif
{ "auto_restart", tweak_bool, &mgt_param.auto_restart, 0, 0,
"Restart child process automatically if it dies.\n",
0,
......
......@@ -423,6 +423,38 @@ if test "$ac_cv_so_rcvtimeo_works" = no ||
fi
LIBS="${save_LIBS}"
# Check if the OS supports TCP_KEEP(CNT|IDLE|INTVL) socket options
save_LIBS="${LIBS}"
LIBS="${LIBS} ${NET_LIBS}"
AC_CACHE_CHECK([for TCP_KEEP(CNT|IDLE|INTVL) socket options],
[ac_cv_have_tcp_keep],
[AC_RUN_IFELSE(
[AC_LANG_PROGRAM([[
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
]],[[
int s = socket(AF_INET, SOCK_STREAM, 0);
int i;
i = 5;
if (setsockopt(s, IPPROTO_TCP, TCP_KEEPCNT, &i, sizeof i))
return (1);
if (setsockopt(s, IPPROTO_TCP, TCP_KEEPIDLE, &i, sizeof i))
return (1);
if (setsockopt(s, IPPROTO_TCP, TCP_KEEPINTVL, &i, sizeof i))
return (1);
return (0);
]])],
[ac_cv_have_tcp_keep=yes],
[ac_cv_have_tcp_keep=no])
])
if test "$ac_cv_have_tcp_keep" = yes; then
AC_DEFINE([HAVE_TCP_KEEP], [1], [Define if OS supports TCP_KEEP* socket options])
fi
LIBS="${save_LIBS}"
# Run-time directory
VARNISH_STATE_DIR='${localstatedir}/varnish'
AC_SUBST(VARNISH_STATE_DIR)
......
......@@ -35,3 +35,28 @@ Reduce the maximum stack size by running::
in the Varnish startup script.
TCP keep-alive configuration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
On some systems, Varnish is not able to set the TCP keep-alive values
per socket, and therefor the tcp_keepalive_* Varnish runtime
parameters are not available. On these platforms it can be benefitial
to tune the system wide values for these in order to more reliably
detect remote close for sessions spending long time on
waitinglists. This will help free up resources faster.
Systems to not support TCP keep-alive values per socket include:
- Solaris releases prior to version 11
- FreeBSD releases prior to version 9.1
- OS X releases prior to Mountain Lion
On platforms with the necessary socket options the defaults are set
to:
- tcp_keepalive_time = 600 seconds
- tcp_keepalive_probes = 5
- tcp_keepalive_intvl = 5 seconds
Note that Varnish will only apply these run-time parameters so long as
they are less than the system default value.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment