Commit 970270a5 authored by Poul-Henning Kamp's avatar Poul-Henning Kamp

Drop pseudo-automatic support for multihomed backends and require

clear expression of intent in VCL.

We now fail backend hostnames that resolve to multiple IPv4 or multiple
IPv6 addresses, in other words, you cannot use "cnn.com" as a backend
hostname specification without the compiler yelling at you:

    % ./varnishd -d -d -b cnn.com -a :8080
    Backend host "cnn.com": resolves to multiple IPv4 addresses.
    Only one address is allowed.
    Please specify which exact address you want to use, we found these:
	    64.236.16.20
	    64.236.16.52
	    64.236.24.12
	    64.236.29.120
    [...]
    VCL compilation failed

However, you _can_ use a hostname that resolves to both an IPv4 and
an IPv6 address, and the new paramter "prefer_ipv6" will determine
which one we try first in such cases.

The other part of this change is that we now do the DNS lookup at
VCL compiletime, and only then.

If your backend's DNS record (or /etc/hosts entry) changes IP#, you
must reload your VCL code to notify varnish.

Finer technical points:

We build a bytestring representation of the sockaddr's in VCC and
include them in the concept of backend identity, for an existing
backend (+ connections) to be reused for a new VCL the backend must
now be defined exactly the same way AND have the same resolved
IPv4/IPv6 addresses.

Since we never muck about with the address in the backend struct
anymore, it's static for the life of the struct backend instance,
we can simplify and eliminate the locking dance around our connection
attempts.

Also eliminate the struct vrt_backend inclusion in struct backend,
and instead make the relevat fields full-blown members of struct
backend.  This eliminates a number of TRUST_ME() calls.

This is the companion commit to #2934 which prepared the VCL compiler.


git-svn-id: http://www.varnish-cache.org/svn/trunk@2936 d4fa192b-c00b-0410-8231-f00ffab90ce4
parent ae1bc87a
......@@ -41,11 +41,9 @@
#include <poll.h>
#include <sys/socket.h>
#include <netdb.h>
#include "shmlog.h"
#include "cache.h"
#include "vrt.h"
#include "cache_backend.h"
/*
......@@ -70,7 +68,7 @@ VBE_AddHostHeader(const struct sess *sp)
CHECK_OBJ_NOTNULL(sp->bereq->http, HTTP_MAGIC);
CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC);
http_PrintfHeader(sp->wrk, sp->fd, sp->bereq->http,
"Host: %s", sp->backend->vrt->hostname);
"Host: %s", sp->backend->hosthdr);
}
/*--------------------------------------------------------------------
......@@ -83,11 +81,8 @@ VBE_AddHostHeader(const struct sess *sp)
*/
static int
VBE_TryConnect(const struct sess *sp, const struct addrinfo *ai)
VBE_TryConnect(const struct sess *sp, int pf, const struct sockaddr *sa, socklen_t salen)
{
struct sockaddr_storage ss;
int fam, sockt, proto;
socklen_t alen;
int s, i, tmo;
char abuf1[TCP_ADDRBUFSIZE], abuf2[TCP_ADDRBUFSIZE];
char pbuf1[TCP_PORTBUFSIZE], pbuf2[TCP_PORTBUFSIZE];
......@@ -95,47 +90,31 @@ VBE_TryConnect(const struct sess *sp, const struct addrinfo *ai)
CHECK_OBJ_NOTNULL(sp, SESS_MAGIC);
CHECK_OBJ_NOTNULL(sp->backend, BACKEND_MAGIC);
/*
* ai is only valid with the lock held, so copy out the bits
* we need to make the connection
*/
fam = ai->ai_family;
sockt = ai->ai_socktype;
proto = ai->ai_protocol;
alen = ai->ai_addrlen;
assert(alen <= sizeof ss);
memcpy(&ss, ai->ai_addr, alen);
/* release lock during stuff that can take a long time */
UNLOCK(&sp->backend->mtx);
s = socket(fam, sockt, proto);
s = socket(pf, SOCK_STREAM, 0);
if (s < 0) {
LOCK(&sp->backend->mtx);
return (s);
}
tmo = params->connect_timeout;
if (sp->backend->vrt->connect_timeout > 10e-3)
tmo = sp->backend->vrt->connect_timeout * 1000;
if (sp->backend->connect_timeout > 10e-3)
tmo = sp->backend->connect_timeout * 1000;
if (tmo > 0)
i = TCP_connect(s, (void *)&ss, alen, tmo);
i = TCP_connect(s, sa, salen, tmo);
else
i = connect(s, (void *)&ss, alen);
i = connect(s, sa, salen);
if (i != 0) {
AZ(close(s));
LOCK(&sp->backend->mtx);
return (-1);
}
TCP_myname(s, abuf1, sizeof abuf1, pbuf1, sizeof pbuf1);
TCP_name((void*)&ss, alen, abuf2, sizeof abuf2, pbuf2, sizeof pbuf2);
TCP_name(sa, salen, abuf2, sizeof abuf2, pbuf2, sizeof pbuf2);
WSL(sp->wrk, SLT_BackendOpen, s, "%s %s %s %s %s",
sp->backend->vrt->vcl_name, abuf1, pbuf1, abuf2, pbuf2);
sp->backend->vcl_name, abuf1, pbuf1, abuf2, pbuf2);
LOCK(&sp->backend->mtx);
return (s);
}
......@@ -254,54 +233,6 @@ VBE_ReleaseConn(struct vbe_conn *vc)
}
}
/*--------------------------------------------------------------------
* Try to get a socket connected to one of the addresses on the list.
* We start from the cached "last good" address and try all items on
* the list exactly once.
*
* Called with backend mutex held, but will release/acquire it.
*
* XXX: Not ready for DNS re-lookups
*/
static int
bes_conn_try_list(const struct sess *sp, struct backend *bp)
{
struct addrinfo *ai, *from;
int s, loops;
CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC);
/* No addrinfo, no connection */
if (bp->ai == NULL)
return (-1);
AN(bp->last_ai);
/* Called with lock held */
loops = 0;
ai = from = bp->last_ai;
while (loops == 0 || ai != from) {
/* NB: releases/acquires backend lock */
s = VBE_TryConnect(sp, ai);
if (s >= 0) {
bp->last_ai = ai;
return (s);
}
/* Try next one */
ai = ai->ai_next;
if (ai == NULL) {
loops++;
ai = bp->ai;
}
}
/* We have tried them all, fail */
return (-1);
}
/*--------------------------------------------------------------------*/
static int
......@@ -311,10 +242,25 @@ bes_conn_try(const struct sess *sp, struct backend *bp)
LOCK(&bp->mtx);
bp->refcount++;
s = bes_conn_try_list(sp, bp); /* releases/acquires backend lock */
if (s < 0)
UNLOCK(&sp->backend->mtx);
s = -1;
assert(bp->ipv6 != NULL || bp->ipv4 != NULL);
/* release lock during stuff that can take a long time */
if (params->prefer_ipv6 && bp->ipv6 != NULL)
s = VBE_TryConnect(sp, PF_INET6, bp->ipv6, bp->ipv6len);
if (s == -1 && bp->ipv4 != NULL)
s = VBE_TryConnect(sp, PF_INET, bp->ipv4, bp->ipv4len);
if (s == -1 && !params->prefer_ipv6 && bp->ipv6 != NULL)
s = VBE_TryConnect(sp, PF_INET6, bp->ipv6, bp->ipv6len);
if (s < 0) {
LOCK(&sp->backend->mtx);
bp->refcount--; /* Only keep ref on success */
UNLOCK(&bp->mtx);
UNLOCK(&bp->mtx);
}
return (s);
}
......@@ -377,7 +323,7 @@ VBE_ClosedFd(struct worker *w, struct vbe_conn *vc)
CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC);
b = vc->backend;
assert(vc->fd >= 0);
WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vrt->vcl_name);
WSL(w, SLT_BackendClose, vc->fd, "%s", vc->backend->vcl_name);
i = close(vc->fd);
assert(i == 0 || errno == ECONNRESET || errno == ENOTCONN);
vc->fd = -1;
......@@ -398,7 +344,7 @@ VBE_RecycleFd(struct worker *w, struct vbe_conn *vc)
CHECK_OBJ_NOTNULL(vc->backend, BACKEND_MAGIC);
assert(vc->fd >= 0);
bp = vc->backend;
WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vrt->vcl_name);
WSL(w, SLT_BackendReuse, vc->fd, "%s", vc->backend->vcl_name);
LOCK(&vc->backend->mtx);
VSL_stats->backend_recycle++;
VTAILQ_INSERT_HEAD(&bp->connlist, vc, list);
......
......@@ -73,15 +73,21 @@ struct backend {
unsigned magic;
#define BACKEND_MAGIC 0x64c4c7c6
struct vrt_backend vrt[1];
char *hosthdr;
char *ident;
char *vcl_name;
double connect_timeout;
uint32_t hash;
VTAILQ_ENTRY(backend) list;
int refcount;
pthread_mutex_t mtx;
struct addrinfo *ai;
struct addrinfo *last_ai;
struct sockaddr *ipv4;
socklen_t ipv4len;
struct sockaddr *ipv6;
socklen_t ipv6len;
VTAILQ_HEAD(, vbe_conn) connlist;
......
......@@ -41,7 +41,6 @@
#include <poll.h>
#include <sys/socket.h>
#include <netdb.h>
#include "shmlog.h"
#include "cache.h"
......@@ -99,9 +98,10 @@ VBE_DropRefLocked(struct backend *b)
AZ(close(vbe->fd));
VBE_ReleaseConn(vbe);
}
free(TRUST_ME(b->vrt->ident));
free(TRUST_ME(b->vrt->hostname));
free(TRUST_ME(b->vrt->portname));
free(b->ident);
free(b->hosthdr);
free(b->ipv4);
free(b->ipv6);
b->magic = 0;
free(b);
VSL_stats->n_backend--;
......@@ -117,44 +117,17 @@ VBE_DropRef(struct backend *b)
VBE_DropRefLocked(b);
}
/*--------------------------------------------------------------------
* DNS lookup of backend host/port
*/
/*--------------------------------------------------------------------*/
static void
vbe_dns_lookup(const struct cli *cli, struct backend *bp)
copy_sockaddr(struct sockaddr **sa, socklen_t *len, const unsigned char *src)
{
int error;
struct addrinfo *res, hint, *old;
CHECK_OBJ_NOTNULL(bp, BACKEND_MAGIC);
memset(&hint, 0, sizeof hint);
hint.ai_family = PF_UNSPEC;
hint.ai_socktype = SOCK_STREAM;
res = NULL;
error = getaddrinfo(bp->vrt->hostname, bp->vrt->portname,
&hint, &res);
if (error) {
if (res != NULL)
freeaddrinfo(res);
/*
* We cannot point to the source code any more, it may
* be long gone from memory. We already checked over in
* the VCL compiler, so this is only relevant for refreshes.
* XXX: which we do when exactly ?
*/
cli_out(cli, "DNS(/hosts) lookup failed for (%s/%s): %s",
bp->vrt->hostname, bp->vrt->portname, gai_strerror(error));
return;
}
LOCK(&bp->mtx);
old = bp->ai;
bp->ai = res;
bp->last_ai = res;
UNLOCK(&bp->mtx);
if (old != NULL)
freeaddrinfo(old);
assert(*src > 0);
*sa = malloc(*src);
AN(*sa);
memcpy(*sa, src + 1, *src);
*len = *src;
}
/*--------------------------------------------------------------------
......@@ -169,25 +142,46 @@ VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb)
struct backend *b;
uint32_t u;
AN(vb->hostname);
AN(vb->portname);
AN(vb->ident);
assert(vb->ipv4_sockaddr != NULL || vb->ipv6_sockaddr != NULL);
(void)cli;
ASSERT_CLI();
u = crc32_l(vb->ident, strlen(vb->ident));
/* calculate a hash of (ident + ipv4_sockaddr + ipv6_sockaddr) */
u = crc32(~0U, vb->ident, strlen(vb->ident));
if (vb->ipv4_sockaddr != NULL)
u = crc32(u, vb->ipv4_sockaddr + 1, vb->ipv4_sockaddr[0]);
if (vb->ipv6_sockaddr != NULL)
u = crc32(u, vb->ipv6_sockaddr + 1, vb->ipv6_sockaddr[0]);
/* Run through the list and see if we already have this backend */
VTAILQ_FOREACH(b, &backends, list) {
CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC);
if (u != b->hash)
continue;
if (strcmp(b->vrt->ident, vb->ident))
if (strcmp(b->ident, vb->ident))
continue;
if (vb->ipv4_sockaddr != NULL &&
b->ipv4len != vb->ipv4_sockaddr[0])
continue;
if (vb->ipv6_sockaddr != NULL &&
b->ipv6len != vb->ipv6_sockaddr[0])
continue;
if (b->ipv4len != 0 &&
memcmp(b->ipv4, vb->ipv4_sockaddr + 1, b->ipv4len))
continue;
if (b->ipv6len != 0 &&
memcmp(b->ipv6, vb->ipv6_sockaddr + 1, b->ipv6len))
continue;
b->refcount++;
return (b);
}
/* Create new backend */
ALLOC_OBJ(b, BACKEND_MAGIC);
XXXAN(b);
b->magic = BACKEND_MAGIC;
MTX_INIT(&b->mtx);
b->refcount = 1;
VTAILQ_INIT(&b->connlist);
b->hash = u;
......@@ -196,17 +190,21 @@ VBE_AddBackend(struct cli *cli, const struct vrt_backend *vb)
* This backend may live longer than the VCL that instantiated it
* so we cannot simply reference the VCL's copy of things.
*/
REPLACE(b->vrt->ident, vb->ident);
REPLACE(b->vrt->hostname, vb->hostname);
REPLACE(b->vrt->portname, vb->portname);
REPLACE(b->vrt->vcl_name, vb->vcl_name);
REPLACE(b->ident, vb->ident);
REPLACE(b->vcl_name, vb->vcl_name);
REPLACE(b->hosthdr, vb->hostname);
b->vrt->connect_timeout = vb->connect_timeout;
b->connect_timeout = vb->connect_timeout;
MTX_INIT(&b->mtx);
b->refcount = 1;
/*
* Copy over the sockaddrs
*/
if (vb->ipv4_sockaddr != NULL)
copy_sockaddr(&b->ipv4, &b->ipv4len, vb->ipv4_sockaddr);
if (vb->ipv6_sockaddr != NULL)
copy_sockaddr(&b->ipv6, &b->ipv6len, vb->ipv6_sockaddr);
vbe_dns_lookup(cli, b);
assert(b->ipv4 != NULL || b->ipv6 != NULL);
VTAILQ_INSERT_TAIL(&backends, b, list);
VSL_stats->n_backend++;
......@@ -238,11 +236,9 @@ cli_debug_backend(struct cli *cli, const char * const *av, void *priv)
ASSERT_CLI();
VTAILQ_FOREACH(b, &backends, list) {
CHECK_OBJ_NOTNULL(b, BACKEND_MAGIC);
cli_out(cli, "%p %s/%s/%s %d\n",
cli_out(cli, "%p %s %d\n",
b,
b->vrt->vcl_name,
b->vrt->hostname,
b->vrt->portname,
b->vcl_name,
b->refcount);
}
}
......
......@@ -166,6 +166,9 @@ struct params {
/* Log local socket address to shm */
unsigned log_local_addr;
/* Prefer IPv6 connections to backend*/
unsigned prefer_ipv6;
};
extern volatile struct params *params;
......
......@@ -735,6 +735,11 @@ static const struct parspec parspec[] = {
"VCL can override this default value for each backend.",
0,
"400", "ms" },
{ "prefer_ipv6", tweak_bool, &master.prefer_ipv6, 0, 0,
"Prefer IPv6 address when connecting to backends which "
"have both IPv4 and IPv6 addresses.",
0,
"off", "bool" },
{ "session_linger", tweak_uint,
&master.session_linger,0, UINT_MAX,
"How long time the workerthread lingers on the session "
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment