Commit 3d20d799 authored by Nils Goroll's avatar Nils Goroll

for ESI T_NEXUS nodes, we can put all nodes directly below onto the ws

because the ESI request needs to stay alive until all nodes below have
been delivered, now that we got tree pruning, we can take them from the
ESI requests workspace.

See long comment in src/node.c for more detail and evidence collection
for the tuneables.
parent b99fd36e
......@@ -52,15 +52,132 @@
#define GZIP_TAILBUF_SZ 13
// node finalizers
/* ============================================================
* node finalizers
*/
static void fini_final(struct req *, struct node *);
static void fini_subreq(struct req *, struct node *);
static void fini_data(struct req *, struct node *);
/* ============================================================
* tuneables which we may or may not want to expose
*/
/*
* nodes on the workspace:
*
* Because we ensure that we do not fini any ESI T_NEXUS request before all
* children have been fini't (we need to do this anyway because all children
* need the nexus' VDP chain), we can allocate any ESI T_NEXUS' children from
* its workspace
*
* This is great, because it avoids using a central allocator, reduces memory
* requirements and, in particular, allows for totally lock free allocations,
* because we create all children of a nexus from its request context anyway (if
* not, a node can still be requested from a different allocator like the
* mempool).
*
* Other vmods could also require a foreign request's workspace (for all
* practical purposes this will probably be the topreq's workspace for
* PRIV_TOP).
*
* If we just accessed the workspace for every node allocation, this would
* create more conflict points with such allocations (which need to be
* synchronised to work with pesi anyway), so we better stay out of the way and
* pre-allocate a sensible number of nodes. This also increases memory locality
*
* ws_min_room is the minmum bytes we keep available,
* ws_max_nodes is the maximum numer of nodes to allocate from the workspace
*
* data collection from our vtcs with DEBUG_PESI_WS enabled:
* $ find . -name \*.log | xargs grep -h "ws used" | sort| uniq -c
* 3 **** v1 vsl| 0 Debug - ws used 104 free 56320
* 3 **** v1 vsl| 0 Debug - ws used 104 free 56464
* 1 **** v1 vsl| 0 Debug - ws used 104 free 56648
* 1 **** v1 vsl| 0 Debug - ws used 104 free 56672
* 19 **** v1 vsl| 0 Debug - ws used 104 free 56736
* 16 **** v1 vsl| 0 Debug - ws used 104 free 56752
* 7 **** v1 vsl| 0 Debug - ws used 104 free 56760
* 4 **** v1 vsl| 0 Debug - ws used 128 free 56688
* 2 **** v1 vsl| 0 Debug - ws used 128 free 56704
* 2 **** v1 vsl| 0 Debug - ws used 128 free 56712
* 1 **** v1 vsl| 0 Debug - ws used 64 free 56280
* 1 **** v1 vsl| 0 Debug - ws used 64 free 56296
* 1 **** v1 vsl| 0 Debug - ws used 64 free 56304
* 7 **** v1 vsl| 0 Debug - ws used 64 free 56344
* 5 **** v1 vsl| 0 Debug - ws used 64 free 56352
* 3 **** v1 vsl| 0 Debug - ws used 64 free 56360
* 14 **** v1 vsl| 0 Debug - ws used 64 free 56368
* 2 **** v1 vsl| 0 Debug - ws used 64 free 56376
* 1 **** v1 vsl| 0 Debug - ws used 64 free 56432
* 15 **** v1 vsl| 0 Debug - ws used 64 free 56496
* 4 **** v1 vsl| 0 Debug - ws used 64 free 56504
* 50 **** v1 vsl| 0 Debug - ws used 64 free 56512
* 19 **** v1 vsl| 0 Debug - ws used 64 free 56520
*
* $ find . -name \*.log | xargs grep -h "nodes" | sort | uniq -c
* 125 **** v1 vsl| 0 Debug - nodes used 0
* 8 **** v1 vsl| 0 Debug - nodes used 1
* 8 **** v1 vsl| 0 Debug - nodes used 10
* 4 **** v1 vsl| 0 Debug - nodes used 3
* 36 **** v1 vsl| 0 Debug - nodes used 5
*
*/
// known minimum workspace left over for other vmods running during delivery
static size_t ws_min_room = 4 * 1024;
// sensible maximum number of nodes to pre-allocate on the workspace
static unsigned ws_max_nodes = 32;
/* ============================================================
* node/tree alloc / fini / free
*/
void
node_init_nodestock(struct node_head *head)
{
VSTAILQ_INIT(head);
}
void
node_fill_nodestock(struct ws *ws, struct node_head *head)
{
struct node *node;
unsigned spc, n;
void *p;
WS_Assert(ws);
AN(head);
spc = WS_ReserveAll(ws);
p = ws->f;
if (spc < ws_min_room)
n = 0;
else
n = (spc - ws_min_room) / sizeof *node;
if (n > ws_max_nodes)
n = ws_max_nodes;
spc = n * sizeof *node;
WS_Release(ws, spc);
if (spc == 0)
return;
memset(p, 0, spc);
node = p;
while (n--) {
node->magic = NODE_MAGIC;
node->allocator = NA_WS;
VSTAILQ_INSERT_TAIL(head, node, sibling);
node++;
}
assert((char *)node <= ws->f);
}
size_t
node_size()
{
......@@ -68,11 +185,20 @@ node_size()
}
struct node *
node_alloc(void)
node_alloc(struct pesi *pesi)
{
struct node *node;
unsigned sz;
if (pesi != NULL &&
(node = VSTAILQ_FIRST(&pesi->nodestock)) != NULL) {
VSTAILQ_REMOVE_HEAD(&pesi->nodestock, sibling);
VSTAILQ_NEXT(node, sibling) = NULL;
CHECK_OBJ(node, NODE_MAGIC);
assert(node->allocator == NA_WS);
return (node);
}
AN(mempool);
node = MPL_Get(mempool, &sz);
MPL_AssertSane(node);
......@@ -130,15 +256,22 @@ tree_prune(struct req *req, struct node *node)
void
tree_free(struct req *req, struct node *node)
{
AN(mempool);
MPL_AssertSane(node);
if (node->type == T_NEXUS)
tree_prune(req, node);
node_fini(req, node);
assert(node->allocator == NA_MPL);
MPL_Free(mempool, node);
switch (node->allocator) {
case NA_WS:
return;
case NA_MPL:
AN(mempool);
MPL_AssertSane(node);
MPL_Free(mempool, node);
return;
default:
INCOMPL();
}
}
/* ============================================================
......
......@@ -29,7 +29,7 @@
* node interface
*/
struct node;
#include "node_head.h"
struct bytes_tree {
unsigned magic;
......@@ -107,8 +107,6 @@ enum n_state {
ST_PRUNED
} __attribute__ ((__packed__));
VSTAILQ_HEAD(node_head, node);
struct node_nexus {
struct node_head children;
struct objcore *oc;
......@@ -259,7 +257,12 @@ node_mutate_unlock(struct bytes_tree *tree)
//--------------
struct node *node_alloc(void);
void node_fill_nodestock(struct ws *, struct node_head *);
//--------------
struct pesi;
struct node *node_alloc(struct pesi *);
void node_insert(struct bytes_tree *, struct node *, struct node *);
void set_open(struct bytes_tree *, struct node *, const struct worker *);
void set_closed(struct bytes_tree *, struct node *, const struct worker *);
......
/*
* shared between pesi.h and node.h
* because of (struct pesi).nodestock
*/
#ifndef _PESI_NODE_HEAD_H
#define _PESI_NODE_HEAD_H
struct node;
VSTAILQ_HEAD(node_head, node);
#endif
void node_init_nodestock(struct node_head *);
......@@ -79,6 +79,8 @@ pesi_new(struct ws *ws, struct pesi_tree *pesi_tree)
pesi->pesi_tree = pesi_tree;
pesi->flags = PF_HAS_TASK | PF_CFG_DEFAULT;
node_init_nodestock(&pesi->nodestock);
Lck_Lock(&pesi_tree->task_lock);
VTAILQ_INSERT_TAIL(&pesi_tree->task_head, pesi, list);
assert(pesi_tree->task_running >= 0);
......
......@@ -29,6 +29,8 @@
* PESI per request state
*/
#include "node_head.h"
struct pesi_tree;
struct pesi * pesi_new(struct ws *ws, struct pesi_tree *pesi_tree);
......@@ -64,6 +66,11 @@ struct pesi {
struct pecx pecx[1];
VTAILQ_ENTRY(pesi) list;
struct node_head nodestock;
unsigned no_thread;
#ifdef DEBUG_PESI_WS
uintptr_t ws_snap;
#endif
};
......@@ -514,7 +514,7 @@ pesi_buf_bytes(struct req *req, enum vdp_action act, void **priv,
assert(req == parent->nexus.req);
VSLdbg(req, "bytes_add: adding data to node");
node = node_alloc();
node = node_alloc(pesi);
CHECK_OBJ_NOTNULL(node, NODE_MAGIC);
node->type = T_DATA;
......@@ -637,7 +637,11 @@ vdp_pesi_init(struct req *req, void **priv)
*priv = pesi;
WS_Assert_Allocated(req->ws, pesi, sizeof *pesi);
node_fill_nodestock(req->ws, &pesi->nodestock);
get_task_cfg(req, &pesi->flags);
#ifdef DEBUG_PESI_WS
pesi->ws_snap = WS_Snapshot(req->ws);
#endif
return (0);
}
......@@ -678,7 +682,8 @@ vdp_pesi_init(struct req *req, void **priv)
if (req->resp_len != 0)
req->resp_len = -1;
pesi_tree->tree->root = node_alloc();
node_fill_nodestock(req->ws, &pesi->nodestock);
pesi_tree->tree->root = node_alloc(pesi);
CHECK_OBJ_NOTNULL(pesi_tree->tree->root, NODE_MAGIC);
pesi_tree->tree->front = pesi_tree->tree->root;
pesi_tree->tree->root->state = ST_PRIVATE;
......@@ -696,6 +701,9 @@ vdp_pesi_init(struct req *req, void **priv)
AZ(pesi->woken);
VDP_Push(req, &VDP_pesi_buf, pesi);
#ifdef DEBUG_PESI_WS
pesi->ws_snap = WS_Snapshot(req->ws);
#endif
return (0);
}
......@@ -722,6 +730,20 @@ vdp_pesi_fini(struct req *req, void **priv)
node = pesi->node;
CHECK_OBJ_NOTNULL(node, NODE_MAGIC);
#ifdef DEBUG_PESI_WS
assert(node->type == T_NEXUS);
VSL(SLT_Debug, 0, "ws used %u free %u",
pdiff((void *)pesi->ws_snap, (void *)WS_Snapshot(req->ws)),
pdiff(req->ws->f, req->ws->e));
{
unsigned i = 0;
struct node *c;
VSTAILQ_FOREACH(c, &node->nexus.children, sibling)
i++;
VSL(SLT_Debug, 0, "nodes used %u", i);
}
#endif
if (req->esi_level > 0) {
assert(req->transport_priv == pesi);
*priv = NULL;
......@@ -880,7 +902,7 @@ vdp_pesi_bytes(struct req *req, enum vdp_action act, void **priv,
if (*pecx->p == VEC_GZ) {
if (parent_gzip == NULL) {
AZ(child);
child = node_alloc();
child = node_alloc(pesi);
CHECK_OBJ_NOTNULL(child, NODE_MAGIC);
child->type = T_CRC;
child->state = ST_DATA;
......@@ -919,7 +941,7 @@ vdp_pesi_bytes(struct req *req, enum vdp_action act, void **priv,
return (-1);
AZ(child);
child = node_alloc();
child = node_alloc(pesi);
CHECK_OBJ_NOTNULL(child, NODE_MAGIC);
child->type = T_CRC;
child->state = ST_DATA;
......@@ -952,7 +974,7 @@ vdp_pesi_bytes(struct req *req, enum vdp_action act, void **priv,
Debug("INCL [%s][%s] BEGIN\n", q, pecx->p);
AZ(child);
child = node_alloc();
child = node_alloc(pesi);
CHECK_OBJ_NOTNULL(child, NODE_MAGIC);
child->type = T_NEXUS;
child->state = ST_PRIVATE;
......@@ -982,7 +1004,7 @@ vdp_pesi_bytes(struct req *req, enum vdp_action act, void **priv,
case 2:
if (node->nexus.gzip.is) {
AZ(child);
child = node_alloc();
child = node_alloc(pesi);
CHECK_OBJ_NOTNULL(child, NODE_MAGIC);
child->type = T_CRC;
child->state = ST_DATA;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment