Commit c1f19f98 authored by Poul-Henning Kamp's avatar Poul-Henning Kamp

Add an offline, default-off lock-order "Witness" facility, to make sure

I have not added any other lock-order reversals.
parent 573016ec
......@@ -849,6 +849,7 @@ unsigned HTTP1_Write(const struct worker *w, const struct http *hp, const int*);
#define VXID(u) ((u) & VSL_IDENTMASK)
uint32_t VXID_Get(struct worker *, uint32_t marker);
extern volatile struct params * cache_param;
extern pthread_key_t witness_key;
void THR_SetName(const char *name);
const char* THR_GetName(void);
void THR_SetBusyobj(const struct busyobj *);
......@@ -859,9 +860,9 @@ struct req * THR_GetRequest(void);
/* cache_lck.c */
/* Internal functions, call only through macros below */
void Lck__Lock(struct lock *lck, const char *p, const char *f, int l);
void Lck__Unlock(struct lock *lck, const char *p, const char *f, int l);
int Lck__Trylock(struct lock *lck, const char *p, const char *f, int l);
void Lck__Lock(struct lock *lck, const char *p, int l);
void Lck__Unlock(struct lock *lck, const char *p, int l);
int Lck__Trylock(struct lock *lck, const char *p, int l);
void Lck__New(struct lock *lck, struct VSC_C_lck *, const char *);
void Lck__Assert(const struct lock *lck, int held);
......@@ -871,9 +872,9 @@ void Lck_Delete(struct lock *lck);
int Lck_CondWait(pthread_cond_t *cond, struct lock *lck, double);
#define Lck_New(a, b) Lck__New(a, b, #b)
#define Lck_Lock(a) Lck__Lock(a, __func__, __FILE__, __LINE__)
#define Lck_Unlock(a) Lck__Unlock(a, __func__, __FILE__, __LINE__)
#define Lck_Trylock(a) Lck__Trylock(a, __func__, __FILE__, __LINE__)
#define Lck_Lock(a) Lck__Lock(a, __func__, __LINE__)
#define Lck_Unlock(a) Lck__Unlock(a, __func__, __LINE__)
#define Lck_Trylock(a) Lck__Trylock(a, __func__, __LINE__)
#define Lck_AssertHeld(a) Lck__Assert(a, 1)
#define LOCK(nam) extern struct VSC_C_lck *lck_##nam;
......
......@@ -37,6 +37,7 @@
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include "cache.h"
......@@ -59,16 +60,63 @@ static VTAILQ_HEAD(, ilck) ilck_head = VTAILQ_HEAD_INITIALIZER(ilck_head);
static pthread_mutex_t lck_mtx;
/*--------------------------------------------------------------------*/
static void
Lck_Witness_Lock(const struct ilck *il, const char *p, int l, const char *try)
{
char *q, t[10];
int emit;
AN(p);
q = pthread_getspecific(witness_key);
if (q == NULL) {
q = calloc(1, 1024);
AN(q);
AZ(pthread_setspecific(witness_key, q));
}
emit = *q != '\0';
strcat(q, " ");
strcat(q, il->w);
strcat(q, try);
strcat(q, ",");
strcat(q, p);
strcat(q, ",");
bprintf(t, "%d", l);
strcat(q, t);
if (emit)
VSL(SLT_Witness, 0, "%s", q);
}
static void
Lck_Witness_Unlock(const struct ilck *il)
{
char *q, *r;
q = pthread_getspecific(witness_key);
if (q == NULL)
return;
r = strrchr(q, ' ');
if (r == NULL)
r = q;
else
*r++ = '\0';
if (memcmp(r, il->w, strlen(il->w)))
VSL(SLT_Witness, 0, "Unlock %s @ %s <%s>", il->w, r, q);
else
*r = '\0';
}
/*--------------------------------------------------------------------*/
void __match_proto__()
Lck__Lock(struct lock *lck, const char *p, const char *f, int l)
Lck__Lock(struct lock *lck, const char *p, int l)
{
struct ilck *ilck;
(void)p;
(void)f;
(void)l;
CAST_OBJ_NOTNULL(ilck, lck->priv, ILCK_MAGIC);
if (DO_DEBUG(DBG_WITNESS))
Lck_Witness_Lock(ilck, p, l, "");
AZ(pthread_mutex_lock(&ilck->mtx));
AZ(ilck->held);
ilck->stat->locks++;
......@@ -77,12 +125,11 @@ Lck__Lock(struct lock *lck, const char *p, const char *f, int l)
}
void __match_proto__()
Lck__Unlock(struct lock *lck, const char *p, const char *f, int l)
Lck__Unlock(struct lock *lck, const char *p, int l)
{
struct ilck *ilck;
(void)p;
(void)f;
(void)l;
CAST_OBJ_NOTNULL(ilck, lck->priv, ILCK_MAGIC);
......@@ -101,19 +148,19 @@ Lck__Unlock(struct lock *lck, const char *p, const char *f, int l)
*/
memset(&ilck->owner, 0, sizeof ilck->owner);
AZ(pthread_mutex_unlock(&ilck->mtx));
if (DO_DEBUG(DBG_WITNESS))
Lck_Witness_Unlock(ilck);
}
int __match_proto__()
Lck__Trylock(struct lock *lck, const char *p, const char *f, int l)
Lck__Trylock(struct lock *lck, const char *p, int l)
{
struct ilck *ilck;
int r;
(void)p;
(void)f;
(void)l;
CAST_OBJ_NOTNULL(ilck, lck->priv, ILCK_MAGIC);
if (DO_DEBUG(DBG_WITNESS))
Lck_Witness_Lock(ilck, p, l, "?");
r = pthread_mutex_trylock(&ilck->mtx);
assert(r == 0 || r == EBUSY);
if (r == 0) {
......
......@@ -51,6 +51,7 @@ volatile struct params *cache_param;
static pthread_key_t req_key;
static pthread_key_t bo_key;
pthread_key_t witness_key;
void
THR_SetBusyobj(const struct busyobj *bo)
......@@ -199,6 +200,7 @@ child_main(void)
AZ(pthread_key_create(&req_key, NULL));
AZ(pthread_key_create(&bo_key, NULL));
AZ(pthread_key_create(&witness_key, NULL));
AZ(pthread_key_create(&name_key, NULL));
THR_SetName("cache-main");
......
......@@ -70,6 +70,7 @@ extern int vtc_stop; /* Abandon current test, no error */
extern pthread_t vtc_thread;
extern int iflg;
extern unsigned vtc_maxdur;
extern int vtc_witness;
void init_sema(void);
void init_server(void);
......
......@@ -88,6 +88,7 @@ static int vtc_fail;
static int leave_temp;
static char *tmppath;
static char *cwd = NULL;
int vtc_witness = 0;
/**********************************************************************
* Parse a -D option argument into a name/val pair, and insert
......@@ -414,7 +415,7 @@ main(int argc, char * const *argv)
setbuf(stdout, NULL);
setbuf(stderr, NULL);
while ((ch = getopt(argc, argv, "D:ij:klLn:qt:v")) != -1) {
while ((ch = getopt(argc, argv, "D:ij:klLn:qt:vW")) != -1) {
switch (ch) {
case 'D':
if (!parse_D_opt(optarg)) {
......@@ -452,6 +453,9 @@ main(int argc, char * const *argv)
if (vtc_verbosity < 2)
vtc_verbosity++;
break;
case 'W':
vtc_witness++;
break;
default:
usage();
}
......
......@@ -394,6 +394,8 @@ varnish_launch(struct varnish *v)
VSB_printf(vsb, "cd ${pwd} &&");
VSB_printf(vsb, " exec ${varnishd} %s -d -n %s",
v->jail, v->workdir);
if (vtc_witness)
VSB_cat(vsb, " -p debug=+witness");
VSB_printf(vsb, " -l 2m,1m,-");
VSB_printf(vsb, " -p auto_restart=off");
VSB_printf(vsb, " -p syslog_cli_traffic=off");
......
#!/usr/bin/env python
#
# This script is in the public domain
#
# Run instructions:
# varnishtest -W -iv -j <pick_a_number> > _.w
# python witness.py
# dot -Tpng /tmp/_.dot > /tmp/_.png
from __future__ import print_function
d = dict()
a = dict()
fi = open("_w")
fo = open("/tmp/_.dot", "w")
fo.write('''digraph {
#rotate="90"
#page="8.2,11.7"
size="8.2,11.7"
rankdir="LR"
node [fontname="Inconsolata", fontsize="10"]
edge [fontname="Inconsolata", fontsize="10"]
''')
for i in fi:
l = "ROOT"
j = i.split()
if len(j) < 8:
continue
if j[1][0] != 'v':
continue
if j[3] != "vsl|":
continue
if j[5] != "Witness":
continue
t = j[7:]
tt = str(t)
if tt in d:
continue
d[tt] = True
for e in t:
f = e.split(",")
x = '"%s" -> "%s" [label="%s(%s)"]\n' % (l, f[0], f[1], f[2])
if not x in a:
a[x] = True
fo.write(x)
l = f[0]
fo.write("}\n")
.. _phk_brinch_hansens_arrows:
=====================
Brinch-Hansens Arrows
=====================
As you may have noticed, I am a bit of a IT-history nerd, and we do have
some rather important early IT history in Denmark.
If you have a solid CS background, you have undoubtedly heard about
both dining philosophers and Brinch-Hansens work in multiprogramming.
Multiprogramming is fundamentally hard, because you cannot have one
thread holding lock A trying to get lock B, while another holds
lock B trying to get lock A.
Brinch-Hansen did a lot of both theoretical and practical work in the
area of multiprogramming and being both good at it and one of the
pioneers, he was awardede the ACM Turing Prize for it.
You can read more about him here:
`Brinch-Hansen Archive <http://brinch-hansen.net/>`_
Along the way he came up with a trivial and practical way to guarantee
that a given multiprogramming system was free of deadlocks: Draw
the locking order and make sure all the arrows point to the right.
When we started working with multi-core systems in FreeBSD, we were
sure to have deadlocks in our future, and we adobted and expanded
a facility called "WITNESS" originally written for BSDI, which
keeps an eye on Brinch-Hansens arrows in real time.
Historically I have been pretty good at avoiding deadlocks, it seems
to come natural to me to think about locking order, but not everybody
feels that way about them, and WITNESS have caught a lot of "Ohh,
didn't think about *that*" situations over the years.
It is no accident that Varnish has a very simple locking structure,
but as we add more and more flexibility and extensibility to Varnish
it grows a little here and there, and I managed to introduce a
lock-order reversal the other day - my first in about five years I
think.
Since I'm obviously getting old and slipping up here, I though it
was about time I carried out the Brinch-Hansen check on Varnish.
I briefly pondered porting WITNESS into Varnish, but it's 3k lines
and we have extremely good code coverage in our regression tests
so I decided to KISS and do it as post-processing.
I have added default-off debug code to emit VSL "Witness" records,
taught varnishtest how to enable that code, and added a small python
script to process the records into a nice plot:
.. image:: brinch_hansens_arrows_1.svg
And yo and behold: All the arrows point to the right.
*phk*
This diff is collapsed.
......@@ -8,6 +8,7 @@ You may or may not want to know what Poul-Henning thinks.
.. toctree::
:maxdepth: 1
brinch-hansens-arrows.rst
ssl_again.rst
persistent.rst
dough.rst
......
......@@ -41,4 +41,5 @@ DEBUG_BIT(LURKER, lurker, "VSL Ban lurker")
DEBUG_BIT(ESI_CHOP, esi_chop, "Chop ESI fetch to bits")
DEBUG_BIT(FLUSH_HEAD, flush_head, "Flush after http1 head")
DEBUG_BIT(VTC_MODE, vtc_mode, "Varnishtest Mode")
DEBUG_BIT(WITNESS, witness, "Emit WITNESS lock records")
/*lint -restore */
......@@ -518,5 +518,9 @@ SLTM(VfpAcct, 0, "Fetch filter accounting",
NODEF_NOTICE
)
SLTM(Witness, 0, "Lock order witness records",
"Diagnostic recording of locking order.\n"
)
#undef NODEF_NOTICE
/*lint -restore */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment