fellow_disk_obj: copy to smaller allocation instead of trimming

99b8d250 · Nils Goroll · e7677311 · 99b8d250
Unverified Commit 99b8d250 authored Jan 18, 2024 by Nils Goroll
Hide whitespace changes
Inline Side-by-side

Showing with 53 additions and 12 deletions

fellow_cache.c src/fellow_cache.c +53 -12

No files found.
--- a/src/fellow_cache.c
+++ b/src/fellow_cache.c
@@ -1109,12 +1109,14 @@ fellow_disk_obj_compat(struct fellow_disk_obj *fdo)
 	fdo->fdo_flags |= FDO_F_VXID64;
 }

-static void
+static struct fellow_disk_obj *
 fellow_disk_obj_trim(const struct fellow_cache *fc,
    struct fellow_cache_seg *fcs)
 {
 	struct fellow_disk_seglist *fdsl;
 	struct fellow_disk_obj *fdo;
+	struct buddy_ptr_extent mem;
+	size_t trim_sz;

 	CHECK_OBJ_NOTNULL(fc, FELLOW_CACHE_MAGIC);
 	CHECK_OBJ_NOTNULL(fcs, FELLOW_CACHE_SEG_MAGIC);
@@ -1128,8 +1130,38 @@ fellow_disk_obj_trim(const struct fellow_cache *fc,
 	assert(fdsl->lsegs >= fdsl->nsegs);
 	fdsl->lsegs = fdsl->nsegs;

-	buddy_trim1_ptr_extent(fc->membuddy, &fcs->alloc,
-	    fellow_disk_obj_size(fdo, fdsl));
+	trim_sz = fellow_disk_obj_size(fdo, fdsl);
+
+	/*
+	 * Objects which are already potentially accessed asynchronously need to
+	 * keep their address (fellow_cache_async_write_complete() call).
+	 *
+	 * But objects which we are reading and which thus do not yet have
+	 * concurrent access can be relocated. We need to copy, but we gain a
+	 * larger page.
+	 */
+
+	switch (fcs->state) {
+	case FCO_READING:
+		if (log2up(trim_sz) < log2up(fcs->alloc.size)) {
+			mem = buddy_alloc1_ptr_extent_wait(fc->membuddy,
+			    FEP_SPCPRI, trim_sz, 0);
+			if (mem.ptr) {
+				memcpy(mem.ptr, fcs->alloc.ptr, trim_sz);
+				buddy_return1_ptr_extent(fc->membuddy,
+				    &fcs->alloc);
+				fcs->alloc = mem;
+				break;
+			}
+		}
+		/* FALLTHROUGH */
+	case FCO_WRITING:
+		buddy_trim1_ptr_extent(fc->membuddy, &fcs->alloc, trim_sz);
+		break;
+	default:
+		WRONG("fcs->state to call fellow_disk_obj_trim()");
+	}
+	return (fellow_disk_obj(fcs));
 }

 /* ============================================================
@@ -1908,10 +1940,6 @@ fellow_cache_seglists_load(const struct fellow_cache *fc,

 	CHECK_OBJ_NOTNULL(fc, FELLOW_CACHE_MAGIC);

-	err = fellow_disk_seglist_check(fdsl);
-	if (err != NULL)
-		return (err);
-
 	while (1) {
 		CHECK_OBJ_NOTNULL(fcsl, FELLOW_CACHE_SEGLIST_MAGIC);
 		CHECK_OBJ_NOTNULL(fdsl, FELLOW_DISK_SEGLIST_MAGIC);
@@ -3001,6 +3029,7 @@ fellow_cache_async_write_complete(struct fellow_cache *fc,
 	struct fellow_busy *fbo;
 	struct fellow_cache_seg *fcs = NULL;
 	struct fellow_cache_obj *fco;
+	struct fellow_disk_obj *fdo, *fdo2;
 	enum fcos_state fcos_next = FCOS_INVAL;
 	enum fellow_busy_io_e type;
 	uint8_t io_outstanding;
@@ -3054,8 +3083,11 @@ fellow_cache_async_write_complete(struct fellow_cache *fc,
 		    (FCOS_HIGH(fcs->state) | FCOS_INCORE);
 		assert_fcos_transition(fcs->state, fcos_next);

-		if (fcs->state == FCO_WRITING)
-			fellow_disk_obj_trim(fc, fcs);
+		if (fcs->state == FCO_WRITING) {
+			fdo = fellow_disk_obj(fcs);
+			fdo2 = fellow_disk_obj_trim(fc, fcs);
+			assert(fdo == fdo2);
+		}
 	} else {
 		assert(type == FBIO_SEGLIST);
 		if (result < (int32_t)fbio->u.seglist.reg.size) {
@@ -5368,11 +5400,22 @@ struct objcore **ocp, uintptr_t priv2, unsigned crit)
 	if (fco->oc)
 		fco->oc->oa_present |= fdoa2oa_present(fdo->fdoa_present);

+	fdsl = fellow_disk_obj_fdsl(fdo);
+	/* the check of the fdo-embedded fdsl needs to happen before trim,
+	 * because both trim and relocation change data after the used part of
+	 * the fdsl
+	 */
+	err = fellow_disk_seglist_check(fdsl);
+	if (err != NULL)
+		goto err;
+
+	/* trim may relocate, so get the new fdo and fdsl pointer */
+	fdo = fellow_disk_obj_trim(fc, fcs);
 	fdsl = fellow_disk_obj_fdsl(fdo);
 	assert(PAOK(fdsl));
 	CHECK_OBJ_NOTNULL(fdsl, FELLOW_DISK_SEGLIST_MAGIC);

-	// the embedded fcsl may or may not fit
+	// the fco-embedded fcsl may or may not fit
 	if (fco->fcsl_embed.lsegs >= fdsl->nsegs)
 		fco->fcsl = &fco->fcsl_embed;
 	else {
@@ -5397,8 +5440,6 @@ struct objcore **ocp, uintptr_t priv2, unsigned crit)
 	if (err != NULL)
 		goto err;

-	fellow_disk_obj_trim(fc, fcs);
-
 #define FDO_AUXATTR(U, l)						\
 	fellow_cache_seg_associate(&fco->aa_##l##_seg,			\
 	    &fdo->aa_##l##_seg, fdo->aa_##l##_seg.seg.size == 0 ?	\