Fix and radically simplify logbuffer_wait_flush_fini()

The dance of taking a reference when waiting caused a lot of trouble
already before, and with a fresh look at it does not seem to make much
sense. But most importantly, it was wrong:

lbuf->ff was set to NULL in logbuffer_flush_finish_work_one() before
the mutex was returned with pthread_cond_wait(), so

	if (ff == NULL)
		goto unlock;

in logbuffer_wait_flush_fini() could lead to the function returning
before logbuffer_flush_finish_work_one() _was_ actually done.

But with bceec122 this could lead
to the stack memory being repurposed (logbuffer_flush_finish
returning) before it was actually safe to.

This issue could surface in fellow_log_test hanging.

We also now return all allocations under the lock to prevent a race
with fellow_log_close() where flush finish threads could outlive the
ffd, resulting in buddy leak detection to fire, because the ff
allocation was not returned.

Fixes #49
parent 95ec7bce
......@@ -316,7 +316,6 @@ struct fellow_logbuffer_ff {
unsigned magic;
#define FELLOW_LOGBUFFER_FF_MAGIC 0xcb1341d3
unsigned can;
unsigned ref; // for wait_flush_fini
enum ff_state_e state;
struct buddy_ptr_extent alloc;
VLIST_ENTRY(fellow_logbuffer_ff) list;
......@@ -1979,7 +1978,6 @@ logbuffer_assert_empty(const struct fellow_logbuffer *lbuf)
static void
logbuffer_wait_flush_fini(const struct fellow_logbuffer *lbuf)
{
struct fellow_logbuffer_ff *ff;
pthread_mutex_t *phase_mtx;
pthread_cond_t *phase_cond;
......@@ -1989,19 +1987,8 @@ logbuffer_wait_flush_fini(const struct fellow_logbuffer *lbuf)
AN(phase_cond);
AZ(pthread_mutex_lock(phase_mtx));
ff = lbuf->ff;
if (ff == NULL)
goto unlock;
CHECK_OBJ(ff, FELLOW_LOGBUFFER_FF_MAGIC);
ff->ref++;
while (ff->state < FF_DONE)
while (lbuf->ff != NULL)
AZ(pthread_cond_wait(phase_cond, phase_mtx));
AN(ff->ref);
assert(lbuf->ff != ff);
if (--ff->ref == 0)
AZ(pthread_cond_broadcast(phase_cond));
unlock:
AZ(pthread_mutex_unlock(phase_mtx));
}
......@@ -3155,34 +3142,29 @@ logbuffer_flush_finish_work_one(struct fellow_logbuffer_ff *ff)
AZ(pthread_mutex_lock(phase_mtx));
FF_TRANSITION(ff, FF_FREE, FF_DONE);
AN(ff->lbuf_ff);
/* there might be a later flush */
if (*ff->lbuf_ff == ff)
*ff->lbuf_ff = NULL;
ff->lbuf_ff = NULL;
AZ(VLIST_NEXT(ff, list));
next = VLIST_PREV(ff, &ffd->ffhead, fellow_logbuffer_ff, list);
VLIST_REMOVE(ff, list);
while (ff->ref > 0) {
// sync with logbuffer_wait_flush_fini()
AN(ff->lbuf_ff);
/* are we the last flush of this logbuffer ?
* note: next can still be non-null for a flush from
* a different logbuffer
*/
if (*ff->lbuf_ff == ff) {
*ff->lbuf_ff = NULL;
AZ(pthread_cond_broadcast(phase_cond));
AZ(pthread_cond_wait(phase_cond, phase_mtx));
}
AZ(ff->ref);
ff->lbuf_ff = NULL;
AN(ffd->nff--);
// return mem under mtx to avoid race with buddy destroy
if (UNLIKELY(ffd->phase == FP_FINI)) {
if (LIKELY(alloc.ptr != NULL))
buddy_return1_ptr_extent(ffd->membuddy, &alloc);
if (ffd->nff == 0)
AZ(pthread_cond_broadcast(phase_cond));
}
AZ(pthread_mutex_unlock(phase_mtx));
if (LIKELY(alloc.ptr != NULL))
buddy_return1_ptr_extent(ffd->membuddy, &alloc);
// for felow_log_close()
if (ffd->nff == 0)
AZ(pthread_cond_broadcast(phase_cond));
AZ(pthread_mutex_unlock(phase_mtx));
return (next);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment