Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
U
unique-xids
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
uplex-varnish
unique-xids
Commits
0cca0bf8
Commit
0cca0bf8
authored
Feb 08, 2011
by
Poul-Henning Kamp
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
clone from storage_persistent.c
parent
20b88b19
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
1580 additions
and
0 deletions
+1580
-0
storage_persistent_mgt.c
bin/varnishd/storage_persistent_mgt.c
+1580
-0
No files found.
bin/varnishd/storage_persistent_mgt.c
0 → 100644
View file @
0cca0bf8
/*-
* Copyright (c) 2008-2010 Linpro AS
* All rights reserved.
*
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Persistent storage method
*
* XXX: Before we start the client or maybe after it stops, we should give the
* XXX: stevedores a chance to examine their storage for consistency.
*
* XXX: Do we ever free the LRU-lists ?
*/
#include "config.h"
#include "svnid.h"
SVNID
(
"$Id$"
)
#include <errno.h>
#include <math.h>
#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
#include <sys/mman.h>
#include "cache.h"
#include "stevedore.h"
#include "hash_slinger.h"
#include "vsha256.h"
#include "cli.h"
#include "cli_priv.h"
#include "persistent.h"
#include "storage_persistent.h"
/*--------------------------------------------------------------------*/
/*
* silos is unlocked, it only changes during startup when we are
* single-threaded
*/
static
VTAILQ_HEAD
(,
smp_sc
)
silos
=
VTAILQ_HEAD_INITIALIZER
(
silos
);
/*--------------------------------------------------------------------
* SIGNATURE functions
* The signature is SHA256 over:
* 1. The smp_sign struct up to but not including the length field.
* 2. smp_sign->length bytes, starting after the smp_sign structure
* 3. The smp-sign->length field.
* The signature is stored after the byte-range from step 2.
*/
/*--------------------------------------------------------------------
* Define a signature by location and identifier.
*/
static
void
smp_def_sign
(
const
struct
smp_sc
*
sc
,
struct
smp_signctx
*
ctx
,
uint64_t
off
,
const
char
*
id
)
{
AZ
(
off
&
7
);
/* Alignment */
assert
(
strlen
(
id
)
<
sizeof
ctx
->
ss
->
ident
);
memset
(
ctx
,
0
,
sizeof
ctx
);
ctx
->
ss
=
(
void
*
)(
sc
->
base
+
off
);
ctx
->
unique
=
sc
->
unique
;
ctx
->
id
=
id
;
}
/*--------------------------------------------------------------------
* Check that a signature is good, leave state ready for append
*/
static
int
smp_chk_sign
(
struct
smp_signctx
*
ctx
)
{
struct
SHA256Context
cx
;
unsigned
char
sign
[
SHA256_LEN
];
int
r
=
0
;
if
(
strncmp
(
ctx
->
id
,
ctx
->
ss
->
ident
,
sizeof
ctx
->
ss
->
ident
))
r
=
1
;
else
if
(
ctx
->
unique
!=
ctx
->
ss
->
unique
)
r
=
2
;
else
if
((
uintptr_t
)
ctx
->
ss
!=
ctx
->
ss
->
mapped
)
r
=
3
;
else
{
SHA256_Init
(
&
ctx
->
ctx
);
SHA256_Update
(
&
ctx
->
ctx
,
ctx
->
ss
,
offsetof
(
struct
smp_sign
,
length
));
SHA256_Update
(
&
ctx
->
ctx
,
SIGN_DATA
(
ctx
),
ctx
->
ss
->
length
);
cx
=
ctx
->
ctx
;
SHA256_Update
(
&
cx
,
&
ctx
->
ss
->
length
,
sizeof
(
ctx
->
ss
->
length
));
SHA256_Final
(
sign
,
&
cx
);
if
(
memcmp
(
sign
,
SIGN_END
(
ctx
),
sizeof
sign
))
r
=
4
;
}
if
(
r
)
{
fprintf
(
stderr
,
"CHK(%p %s %p %s) = %d
\n
"
,
ctx
,
ctx
->
id
,
ctx
->
ss
,
r
>
1
?
ctx
->
ss
->
ident
:
"<invalid>"
,
r
);
}
return
(
r
);
}
/*--------------------------------------------------------------------
* Append data to a signature
*/
static
void
smp_append_sign
(
struct
smp_signctx
*
ctx
,
const
void
*
ptr
,
uint32_t
len
)
{
struct
SHA256Context
cx
;
unsigned
char
sign
[
SHA256_LEN
];
if
(
len
!=
0
)
{
SHA256_Update
(
&
ctx
->
ctx
,
ptr
,
len
);
ctx
->
ss
->
length
+=
len
;
}
cx
=
ctx
->
ctx
;
SHA256_Update
(
&
cx
,
&
ctx
->
ss
->
length
,
sizeof
(
ctx
->
ss
->
length
));
SHA256_Final
(
sign
,
&
cx
);
memcpy
(
SIGN_END
(
ctx
),
sign
,
sizeof
sign
);
XXXAZ
(
smp_chk_sign
(
ctx
));
}
/*--------------------------------------------------------------------
* Reset a signature to empty, prepare for appending.
*/
static
void
smp_reset_sign
(
struct
smp_signctx
*
ctx
)
{
memset
(
ctx
->
ss
,
0
,
sizeof
*
ctx
->
ss
);
strcpy
(
ctx
->
ss
->
ident
,
ctx
->
id
);
ctx
->
ss
->
unique
=
ctx
->
unique
;
ctx
->
ss
->
mapped
=
(
uintptr_t
)
ctx
->
ss
;
SHA256_Init
(
&
ctx
->
ctx
);
SHA256_Update
(
&
ctx
->
ctx
,
ctx
->
ss
,
offsetof
(
struct
smp_sign
,
length
));
smp_append_sign
(
ctx
,
NULL
,
0
);
}
/*--------------------------------------------------------------------
* Force a write of a signature block to the backing store.
*/
static
void
smp_sync_sign
(
const
struct
smp_signctx
*
ctx
)
{
int
i
;
/* XXX: round to pages */
i
=
msync
((
void
*
)
ctx
->
ss
,
ctx
->
ss
->
length
+
SHA256_LEN
,
MS_SYNC
);
if
(
i
&&
0
)
fprintf
(
stderr
,
"SyncSign(%p %s) = %d %s
\n
"
,
ctx
->
ss
,
ctx
->
id
,
i
,
strerror
(
errno
));
}
/*--------------------------------------------------------------------
* Create and force a new signature to backing store
*/
static
void
smp_new_sign
(
const
struct
smp_sc
*
sc
,
struct
smp_signctx
*
ctx
,
uint64_t
off
,
const
char
*
id
)
{
smp_def_sign
(
sc
,
ctx
,
off
,
id
);
smp_reset_sign
(
ctx
);
smp_sync_sign
(
ctx
);
}
/*--------------------------------------------------------------------
* Caculate payload of some stuff
*/
static
uint64_t
smp_stuff_len
(
const
struct
smp_sc
*
sc
,
unsigned
stuff
)
{
uint64_t
l
;
assert
(
stuff
<
SMP_END_STUFF
);
l
=
sc
->
ident
->
stuff
[
stuff
+
1
]
-
sc
->
ident
->
stuff
[
stuff
];
l
-=
SMP_SIGN_SPACE
;
return
(
l
);
}
/*--------------------------------------------------------------------
* Initialize a Silo with a valid but empty structure.
*
* XXX: more intelligent sizing of things.
*/
static
void
smp_newsilo
(
struct
smp_sc
*
sc
)
{
struct
smp_ident
*
si
;
ASSERT_MGT
();
assert
(
strlen
(
SMP_IDENT_STRING
)
<
sizeof
si
->
ident
);
/* Choose a new random number */
sc
->
unique
=
random
();
smp_reset_sign
(
&
sc
->
idn
);
si
=
sc
->
ident
;
memset
(
si
,
0
,
sizeof
*
si
);
strcpy
(
si
->
ident
,
SMP_IDENT_STRING
);
si
->
byte_order
=
0x12345678
;
si
->
size
=
sizeof
*
si
;
si
->
major_version
=
2
;
si
->
unique
=
sc
->
unique
;
si
->
mediasize
=
sc
->
mediasize
;
si
->
granularity
=
sc
->
granularity
;
/*
* Aim for cache-line-width
*/
si
->
align
=
sizeof
(
void
*
)
*
2
;
sc
->
align
=
si
->
align
;
si
->
stuff
[
SMP_BAN1_STUFF
]
=
sc
->
granularity
;
si
->
stuff
[
SMP_BAN2_STUFF
]
=
si
->
stuff
[
SMP_BAN1_STUFF
]
+
1024
*
1024
;
si
->
stuff
[
SMP_SEG1_STUFF
]
=
si
->
stuff
[
SMP_BAN2_STUFF
]
+
1024
*
1024
;
si
->
stuff
[
SMP_SEG2_STUFF
]
=
si
->
stuff
[
SMP_SEG1_STUFF
]
+
1024
*
1024
;
si
->
stuff
[
SMP_SPC_STUFF
]
=
si
->
stuff
[
SMP_SEG2_STUFF
]
+
1024
*
1024
;
si
->
stuff
[
SMP_END_STUFF
]
=
si
->
mediasize
;
assert
(
si
->
stuff
[
SMP_SPC_STUFF
]
<
si
->
stuff
[
SMP_END_STUFF
]);
smp_new_sign
(
sc
,
&
sc
->
ban1
,
si
->
stuff
[
SMP_BAN1_STUFF
],
"BAN 1"
);
smp_new_sign
(
sc
,
&
sc
->
ban2
,
si
->
stuff
[
SMP_BAN2_STUFF
],
"BAN 2"
);
smp_new_sign
(
sc
,
&
sc
->
seg1
,
si
->
stuff
[
SMP_SEG1_STUFF
],
"SEG 1"
);
smp_new_sign
(
sc
,
&
sc
->
seg2
,
si
->
stuff
[
SMP_SEG2_STUFF
],
"SEG 2"
);
smp_append_sign
(
&
sc
->
idn
,
si
,
sizeof
*
si
);
smp_sync_sign
(
&
sc
->
idn
);
}
/*--------------------------------------------------------------------
* Check if a silo is valid.
*/
static
int
smp_valid_silo
(
struct
smp_sc
*
sc
)
{
struct
smp_ident
*
si
;
int
i
,
j
;
assert
(
strlen
(
SMP_IDENT_STRING
)
<
sizeof
si
->
ident
);
if
(
smp_chk_sign
(
&
sc
->
idn
))
return
(
1
);
si
=
sc
->
ident
;
if
(
strcmp
(
si
->
ident
,
SMP_IDENT_STRING
))
return
(
2
);
if
(
si
->
byte_order
!=
0x12345678
)
return
(
3
);
if
(
si
->
size
!=
sizeof
*
si
)
return
(
4
);
if
(
si
->
major_version
!=
2
)
return
(
5
);
if
(
si
->
mediasize
!=
sc
->
mediasize
)
return
(
7
);
if
(
si
->
granularity
!=
sc
->
granularity
)
return
(
8
);
if
(
si
->
align
<
sizeof
(
void
*
))
return
(
9
);
if
(
!
PWR2
(
si
->
align
))
return
(
10
);
sc
->
align
=
si
->
align
;
sc
->
unique
=
si
->
unique
;
/* XXX: Sanity check stuff[6] */
assert
(
si
->
stuff
[
SMP_BAN1_STUFF
]
>
sizeof
*
si
+
SHA256_LEN
);
assert
(
si
->
stuff
[
SMP_BAN2_STUFF
]
>
si
->
stuff
[
SMP_BAN1_STUFF
]);
assert
(
si
->
stuff
[
SMP_SEG1_STUFF
]
>
si
->
stuff
[
SMP_BAN2_STUFF
]);
assert
(
si
->
stuff
[
SMP_SEG2_STUFF
]
>
si
->
stuff
[
SMP_SEG1_STUFF
]);
assert
(
si
->
stuff
[
SMP_SPC_STUFF
]
>
si
->
stuff
[
SMP_SEG2_STUFF
]);
assert
(
si
->
stuff
[
SMP_END_STUFF
]
==
sc
->
mediasize
);
assert
(
smp_stuff_len
(
sc
,
SMP_SEG1_STUFF
)
>
65536
);
assert
(
smp_stuff_len
(
sc
,
SMP_SEG1_STUFF
)
==
smp_stuff_len
(
sc
,
SMP_SEG2_STUFF
));
assert
(
smp_stuff_len
(
sc
,
SMP_BAN1_STUFF
)
>
65536
);
assert
(
smp_stuff_len
(
sc
,
SMP_BAN1_STUFF
)
==
smp_stuff_len
(
sc
,
SMP_BAN2_STUFF
));
smp_def_sign
(
sc
,
&
sc
->
ban1
,
si
->
stuff
[
SMP_BAN1_STUFF
],
"BAN 1"
);
smp_def_sign
(
sc
,
&
sc
->
ban2
,
si
->
stuff
[
SMP_BAN2_STUFF
],
"BAN 2"
);
smp_def_sign
(
sc
,
&
sc
->
seg1
,
si
->
stuff
[
SMP_SEG1_STUFF
],
"SEG 1"
);
smp_def_sign
(
sc
,
&
sc
->
seg2
,
si
->
stuff
[
SMP_SEG2_STUFF
],
"SEG 2"
);
/* We must have one valid BAN table */
i
=
smp_chk_sign
(
&
sc
->
ban1
);
j
=
smp_chk_sign
(
&
sc
->
ban2
);
if
(
i
&&
j
)
return
(
100
+
i
*
10
+
j
);
/* We must have one valid SEG table */
i
=
smp_chk_sign
(
&
sc
->
seg1
);
j
=
smp_chk_sign
(
&
sc
->
seg2
);
if
(
i
&&
j
)
return
(
200
+
i
*
10
+
j
);
return
(
0
);
}
/*--------------------------------------------------------------------
* Calculate cleaner metrics from silo dimensions
*/
static
void
smp_metrics
(
struct
smp_sc
*
sc
)
{
/*
* We do not want to loose too big chunks of the silos
* content when we are forced to clean a segment.
*
* For now insist that a segment covers no more than 1% of the silo.
*
* XXX: This should possibly depend on the size of the silo so
* XXX: trivially small silos do not run into trouble along
* XXX: the lines of "one object per segment".
*/
sc
->
min_nseg
=
10
;
sc
->
max_segl
=
smp_stuff_len
(
sc
,
SMP_SPC_STUFF
)
/
sc
->
min_nseg
;
fprintf
(
stderr
,
"min_nseg = %u, max_segl = %ju
\n
"
,
sc
->
min_nseg
,
(
uintmax_t
)
sc
->
max_segl
);
/*
* The number of segments are limited by the size of the segment
* table(s) and from that follows the minimum size of a segmement.
*/
sc
->
max_nseg
=
smp_stuff_len
(
sc
,
SMP_SEG1_STUFF
)
/
sc
->
min_nseg
;
sc
->
min_segl
=
smp_stuff_len
(
sc
,
SMP_SPC_STUFF
)
/
sc
->
max_nseg
;
while
(
sc
->
min_segl
<
sizeof
(
struct
object
))
{
sc
->
max_nseg
/=
2
;
sc
->
min_segl
=
smp_stuff_len
(
sc
,
SMP_SPC_STUFF
)
/
sc
->
max_nseg
;
}
fprintf
(
stderr
,
"max_nseg = %u, min_segl = %ju
\n
"
,
sc
->
max_nseg
,
(
uintmax_t
)
sc
->
min_segl
);
/*
* Set our initial aim point at the exponential average of the
* two extremes.
*
* XXX: This is a pretty arbitrary choice, but having no idea
* XXX: object count, size distribution or ttl pattern at this
* XXX: point, we have to do something.
*/
sc
->
aim_nseg
=
(
unsigned
)
exp
((
log
(
sc
->
min_nseg
)
+
log
(
sc
->
max_nseg
))
*
.
5
);
sc
->
aim_segl
=
smp_stuff_len
(
sc
,
SMP_SPC_STUFF
)
/
sc
->
aim_nseg
;
fprintf
(
stderr
,
"aim_nseg = %u, aim_segl = %ju
\n
"
,
sc
->
aim_nseg
,
(
uintmax_t
)
sc
->
aim_segl
);
/*
* How much space in the free reserve pool ?
*/
sc
->
free_reserve
=
sc
->
aim_segl
*
10
;
fprintf
(
stderr
,
"free_reserve = %ju
\n
"
,
sc
->
free_reserve
);
}
/*--------------------------------------------------------------------
* Set up persistent storage silo in the master process.
*/
static
void
smp_init
(
struct
stevedore
*
parent
,
int
ac
,
char
*
const
*
av
)
{
struct
smp_sc
*
sc
;
int
i
;
ASSERT_MGT
();
AZ
(
av
[
ac
]);
#define SIZOF(foo) fprintf(stderr, \
"sizeof(%s) = %zu = 0x%zx\n", #foo, sizeof(foo), sizeof(foo));
SIZOF
(
struct
smp_ident
);
SIZOF
(
struct
smp_sign
);
SIZOF
(
struct
smp_segptr
);
SIZOF
(
struct
smp_object
);
#undef SIZOF
/* See comments in persistent.h */
assert
(
sizeof
(
struct
smp_ident
)
==
SMP_IDENT_SIZE
);
/* Allocate softc */
ALLOC_OBJ
(
sc
,
SMP_SC_MAGIC
);
XXXAN
(
sc
);
sc
->
parent
=
parent
;
sc
->
fd
=
-
1
;
VTAILQ_INIT
(
&
sc
->
segments
);
/* Argument processing */
if
(
ac
!=
2
)
ARGV_ERR
(
"(-spersistent) wrong number of arguments
\n
"
);
i
=
STV_GetFile
(
av
[
0
],
&
sc
->
fd
,
&
sc
->
filename
,
"-spersistent"
);
if
(
i
==
2
)
ARGV_ERR
(
"(-spersistent) need filename (not directory)
\n
"
);
sc
->
align
=
sizeof
(
void
*
)
*
2
;
sc
->
granularity
=
getpagesize
();
sc
->
mediasize
=
STV_FileSize
(
sc
->
fd
,
av
[
1
],
&
sc
->
granularity
,
"-spersistent"
);
AZ
(
ftruncate
(
sc
->
fd
,
sc
->
mediasize
));
sc
->
base
=
mmap
(
NULL
,
sc
->
mediasize
,
PROT_READ
|
PROT_WRITE
,
MAP_NOCORE
|
MAP_NOSYNC
|
MAP_SHARED
,
sc
->
fd
,
0
);
if
(
sc
->
base
==
MAP_FAILED
)
ARGV_ERR
(
"(-spersistent) failed to mmap (%s)
\n
"
,
strerror
(
errno
));
smp_def_sign
(
sc
,
&
sc
->
idn
,
0
,
"SILO"
);
sc
->
ident
=
SIGN_DATA
(
&
sc
->
idn
);
i
=
smp_valid_silo
(
sc
);
if
(
i
)
smp_newsilo
(
sc
);
AZ
(
smp_valid_silo
(
sc
));
smp_metrics
(
sc
);
parent
->
priv
=
sc
;
/* XXX: only for sendfile I guess... */
mgt_child_inherit
(
sc
->
fd
,
"storage_persistent"
);
}
/*--------------------------------------------------------------------
* Write the segmentlist back to the silo.
*
* We write the first copy, sync it synchronously, then write the
* second copy and sync it synchronously.
*
* Provided the kernel doesn't lie, that means we will always have
* at least one valid copy on in the silo.
*/
static
void
smp_save_seg
(
const
struct
smp_sc
*
sc
,
struct
smp_signctx
*
ctx
)
{
struct
smp_segptr
*
ss
;
struct
smp_seg
*
sg
;
uint64_t
length
;
Lck_AssertHeld
(
&
sc
->
mtx
);
smp_reset_sign
(
ctx
);
ss
=
SIGN_DATA
(
ctx
);
length
=
0
;
VTAILQ_FOREACH
(
sg
,
&
sc
->
segments
,
list
)
{
assert
(
sg
->
p
.
offset
<
sc
->
mediasize
);
assert
(
sg
->
p
.
offset
+
sg
->
p
.
length
<=
sc
->
mediasize
);
*
ss
=
sg
->
p
;
ss
++
;
length
+=
sizeof
*
ss
;
}
smp_append_sign
(
ctx
,
SIGN_DATA
(
ctx
),
length
);
smp_sync_sign
(
ctx
);
}
static
void
smp_save_segs
(
struct
smp_sc
*
sc
)
{
struct
smp_seg
*
sg
,
*
sg2
;
Lck_AssertHeld
(
&
sc
->
mtx
);
/*
* Remove empty segments from the front of the list
* before we write the segments to disk.
*/
VTAILQ_FOREACH_SAFE
(
sg
,
&
sc
->
segments
,
list
,
sg2
)
{
if
(
sg
->
nobj
>
0
)
break
;
if
(
sg
==
sc
->
cur_seg
)
continue
;
VTAILQ_REMOVE
(
&
sc
->
segments
,
sg
,
list
);
free
(
sg
);
}
smp_save_seg
(
sc
,
&
sc
->
seg1
);
smp_save_seg
(
sc
,
&
sc
->
seg2
);
}
/*---------------------------------------------------------------------
*/
static
struct
smp_object
*
smp_find_so
(
const
struct
smp_seg
*
sg
,
const
struct
objcore
*
oc
)
{
struct
smp_object
*
so
;
unsigned
smp_idx
;
smp_idx
=
oc
->
priv2
;
assert
(
smp_idx
>
0
);
assert
(
smp_idx
<=
sg
->
p
.
lobjlist
);
so
=
&
sg
->
objs
[
sg
->
p
.
lobjlist
-
smp_idx
];
return
(
so
);
}
/*---------------------------------------------------------------------
* Check if a given storage structure is valid to use
*/
static
int
smp_loaded_st
(
const
struct
smp_sc
*
sc
,
const
struct
smp_seg
*
sg
,
const
struct
storage
*
st
)
{
struct
smp_seg
*
sg2
;
const
uint8_t
*
pst
;
uint64_t
o
;
(
void
)
sg
;
/* XXX: faster: Start search from here */
pst
=
(
const
void
*
)
st
;
if
(
pst
<
(
sc
->
base
+
sc
->
ident
->
stuff
[
SMP_SPC_STUFF
]))
return
(
0x01
);
/* Before silo payload start */
if
(
pst
>
(
sc
->
base
+
sc
->
ident
->
stuff
[
SMP_END_STUFF
]))
return
(
0x02
);
/* After silo end */
o
=
pst
-
sc
->
base
;
/* Find which segment contains the storage structure */
VTAILQ_FOREACH
(
sg2
,
&
sc
->
segments
,
list
)
if
(
o
>
sg2
->
p
.
offset
&&
(
o
+
sizeof
(
*
st
))
<
sg2
->
p
.
objlist
)
break
;
if
(
sg2
==
NULL
)
return
(
0x04
);
/* No claiming segment */
if
(
!
(
sg2
->
flags
&
SMP_SEG_LOADED
))
return
(
0x08
);
/* Claiming segment not loaded */
/* It is now safe to access the storage structure */
if
(
st
->
magic
!=
STORAGE_MAGIC
)
return
(
0x10
);
/* Not enough magic */
if
(
o
+
st
->
space
>=
sg2
->
p
.
objlist
)
return
(
0x20
);
/* Allocation not inside segment */
if
(
st
->
len
>
st
->
space
)
return
(
0x40
);
/* Plain bad... */
/*
* XXX: We could patch up st->stevedore and st->priv here
* XXX: but if things go right, we will never need them.
*/
return
(
0
);
}
/*---------------------------------------------------------------------
* objcore methods for persistent objects
*/
static
struct
object
*
smp_oc_getobj
(
struct
worker
*
wrk
,
struct
objcore
*
oc
)
{
struct
object
*
o
;
struct
smp_seg
*
sg
;
struct
smp_object
*
so
;
struct
storage
*
st
;
uint64_t
l
;
int
bad
;
/* Some calls are direct, but they should match anyway */
assert
(
oc
->
methods
->
getobj
==
smp_oc_getobj
);
CHECK_OBJ_NOTNULL
(
oc
,
OBJCORE_MAGIC
);
if
(
wrk
==
NULL
)
AZ
(
oc
->
flags
&
OC_F_NEEDFIXUP
);
CAST_OBJ_NOTNULL
(
sg
,
oc
->
priv
,
SMP_SEG_MAGIC
);
so
=
smp_find_so
(
sg
,
oc
);
o
=
(
void
*
)(
sg
->
sc
->
base
+
so
->
ptr
);
/*
* The object may not be in this segment since we allocate it
* In a separate operation than the smp_object. We could check
* that it is in a later segment, but that would be complicated.
* XXX: For now, be happy if it is inside th silo
*/
ASSERT_PTR_IN_SILO
(
sg
->
sc
,
o
);
CHECK_OBJ_NOTNULL
(
o
,
OBJECT_MAGIC
);
/*
* If this flag is not set, it will not be, and the lock is not
* needed to test it.
*/
if
(
!
(
oc
->
flags
&
OC_F_NEEDFIXUP
))
return
(
o
);
AN
(
wrk
);
Lck_Lock
(
&
sg
->
sc
->
mtx
);
/* Check again, we might have raced. */
if
(
oc
->
flags
&
OC_F_NEEDFIXUP
)
{
/* We trust caller to have a refcnt for us */
o
->
objcore
=
oc
;
bad
=
0
;
l
=
0
;
VTAILQ_FOREACH
(
st
,
&
o
->
store
,
list
)
{
bad
|=
smp_loaded_st
(
sg
->
sc
,
sg
,
st
);
if
(
bad
)
break
;
l
+=
st
->
len
;
}
if
(
l
!=
o
->
len
)
bad
|=
0x100
;
if
(
bad
)
{
o
->
ttl
=
0
;
o
->
grace
=
0
;
so
->
ttl
=
0
;
}
sg
->
nfixed
++
;
wrk
->
stats
.
n_object
++
;
wrk
->
stats
.
n_vampireobject
--
;
oc
->
flags
&=
~
OC_F_NEEDFIXUP
;
}
Lck_Unlock
(
&
sg
->
sc
->
mtx
);
return
(
o
);
}
static
void
smp_oc_updatemeta
(
struct
objcore
*
oc
)
{
struct
object
*
o
;
struct
smp_seg
*
sg
;
struct
smp_object
*
so
;
double
mttl
;
CHECK_OBJ_NOTNULL
(
oc
,
OBJCORE_MAGIC
);
o
=
smp_oc_getobj
(
NULL
,
oc
);
AN
(
o
);
CAST_OBJ_NOTNULL
(
sg
,
oc
->
priv
,
SMP_SEG_MAGIC
);
CHECK_OBJ_NOTNULL
(
sg
->
sc
,
SMP_SC_MAGIC
);
so
=
smp_find_so
(
sg
,
oc
);
if
(
isnan
(
o
->
grace
))
mttl
=
o
->
ttl
;
else
mttl
=
-
(
o
->
ttl
+
o
->
grace
);
if
(
sg
==
sg
->
sc
->
cur_seg
)
{
/* Lock necessary, we might race close_seg */
Lck_Lock
(
&
sg
->
sc
->
mtx
);
so
->
ban
=
o
->
ban_t
;
so
->
ttl
=
mttl
;
Lck_Unlock
(
&
sg
->
sc
->
mtx
);
}
else
{
so
->
ban
=
o
->
ban_t
;
so
->
ttl
=
mttl
;
}
}
static
void
__match_proto__
()
smp_oc_freeobj
(
struct
objcore
*
oc
)
{
struct
smp_seg
*
sg
;
struct
smp_object
*
so
;
CHECK_OBJ_NOTNULL
(
oc
,
OBJCORE_MAGIC
);
CAST_OBJ_NOTNULL
(
sg
,
oc
->
priv
,
SMP_SEG_MAGIC
);
so
=
smp_find_so
(
sg
,
oc
);
Lck_Lock
(
&
sg
->
sc
->
mtx
);
so
->
ttl
=
0
;
so
->
ptr
=
0
;
assert
(
sg
->
nobj
>
0
);
assert
(
sg
->
nfixed
>
0
);
sg
->
nobj
--
;
sg
->
nfixed
--
;
Lck_Unlock
(
&
sg
->
sc
->
mtx
);
}
static
struct
objcore_methods
smp_oc_methods
=
{
.
getobj
=
smp_oc_getobj
,
.
updatemeta
=
smp_oc_updatemeta
,
.
freeobj
=
smp_oc_freeobj
,
};
/*--------------------------------------------------------------------
* Add a new ban to all silos
*/
static
void
smp_appendban
(
struct
smp_sc
*
sc
,
struct
smp_signctx
*
ctx
,
double
t0
,
uint32_t
flags
,
uint32_t
len
,
const
char
*
ban
)
{
uint8_t
*
ptr
,
*
ptr2
;
(
void
)
sc
;
ptr
=
ptr2
=
SIGN_END
(
ctx
);
memcpy
(
ptr
,
"BAN"
,
4
);
ptr
+=
4
;
memcpy
(
ptr
,
&
t0
,
sizeof
t0
);
ptr
+=
sizeof
t0
;
memcpy
(
ptr
,
&
flags
,
sizeof
flags
);
ptr
+=
sizeof
flags
;
memcpy
(
ptr
,
&
len
,
sizeof
len
);
ptr
+=
sizeof
len
;
memcpy
(
ptr
,
ban
,
len
);
ptr
+=
len
;
smp_append_sign
(
ctx
,
ptr2
,
ptr
-
ptr2
);
}
void
SMP_NewBan
(
double
t0
,
const
char
*
ban
)
{
struct
smp_sc
*
sc
;
uint32_t
l
=
strlen
(
ban
)
+
1
;
VTAILQ_FOREACH
(
sc
,
&
silos
,
list
)
{
smp_appendban
(
sc
,
&
sc
->
ban1
,
t0
,
0
,
l
,
ban
);
smp_appendban
(
sc
,
&
sc
->
ban2
,
t0
,
0
,
l
,
ban
);
}
}
/*--------------------------------------------------------------------
* Attempt to open and read in a ban list
*/
static
int
smp_open_bans
(
struct
smp_sc
*
sc
,
struct
smp_signctx
*
ctx
)
{
uint8_t
*
ptr
,
*
pe
;
double
t0
;
uint32_t
flags
,
length
;
int
i
,
retval
=
0
;
ASSERT_CLI
();
(
void
)
sc
;
i
=
smp_chk_sign
(
ctx
);
if
(
i
)
return
(
i
);
ptr
=
SIGN_DATA
(
ctx
);
pe
=
ptr
+
ctx
->
ss
->
length
;
while
(
ptr
<
pe
)
{
if
(
memcmp
(
ptr
,
"BAN"
,
4
))
{
retval
=
1001
;
break
;
}
ptr
+=
4
;
memcpy
(
&
t0
,
ptr
,
sizeof
t0
);
ptr
+=
sizeof
t0
;
memcpy
(
&
flags
,
ptr
,
sizeof
flags
);
ptr
+=
sizeof
flags
;
if
(
flags
!=
0
)
{
retval
=
1002
;
break
;
}
memcpy
(
&
length
,
ptr
,
sizeof
length
);
ptr
+=
sizeof
length
;
if
(
ptr
+
length
>
pe
)
{
retval
=
1003
;
break
;
}
if
(
ptr
[
length
-
1
]
!=
'\0'
)
{
retval
=
1004
;
break
;
}
BAN_Reload
(
t0
,
flags
,
(
const
char
*
)
ptr
);
ptr
+=
length
;
}
assert
(
ptr
<=
pe
);
return
(
retval
);
}
/*--------------------------------------------------------------------*/
static
uint64_t
smp_segend
(
const
struct
smp_seg
*
sg
)
{
return
(
sg
->
p
.
offset
+
sg
->
p
.
length
);
}
static
uint64_t
smp_spaceleft
(
const
struct
smp_sc
*
sc
,
const
struct
smp_seg
*
sg
)
{
IASSERTALIGN
(
sc
,
sc
->
next_bot
);
assert
(
sc
->
next_bot
<=
sc
->
next_top
-
IRNUP
(
sc
,
SMP_SIGN_SPACE
));
assert
(
sc
->
next_bot
>=
sg
->
p
.
offset
);
assert
(
sc
->
next_top
<
sg
->
p
.
offset
+
sg
->
p
.
length
);
return
((
sc
->
next_top
-
sc
->
next_bot
)
-
IRNUP
(
sc
,
SMP_SIGN_SPACE
));
}
/*--------------------------------------------------------------------
* Load segments
*
* The overall objective is to register the existence of an object, based
* only on the minimally sized struct smp_object, without causing the
* main object to be faulted in.
*
* XXX: We can test this by mprotecting the main body of the segment
* XXX: until the first fixup happens, or even just over this loop,
* XXX: However: the requires that the smp_objects starter further
* XXX: into the segment than a page so that they do not get hit
* XXX: by the protection.
*/
static
void
smp_load_seg
(
const
struct
sess
*
sp
,
const
struct
smp_sc
*
sc
,
struct
smp_seg
*
sg
)
{
struct
smp_object
*
so
;
struct
objcore
*
oc
;
uint32_t
no
;
double
t_now
=
TIM_real
();
struct
smp_signctx
ctx
[
1
];
ASSERT_SILO_THREAD
(
sc
);
CHECK_OBJ_NOTNULL
(
sp
,
SESS_MAGIC
);
CHECK_OBJ_NOTNULL
(
sg
,
SMP_SEG_MAGIC
);
CHECK_OBJ_NOTNULL
(
sg
->
lru
,
LRU_MAGIC
);
assert
(
sg
->
flags
&
SMP_SEG_MUSTLOAD
);
sg
->
flags
&=
~
SMP_SEG_MUSTLOAD
;
AN
(
sg
->
p
.
offset
);
if
(
sg
->
p
.
objlist
==
0
)
return
;
smp_def_sign
(
sc
,
ctx
,
sg
->
p
.
offset
,
"SEGHEAD"
);
if
(
smp_chk_sign
(
ctx
))
return
;
/* test SEGTAIL */
/* test OBJIDX */
so
=
(
void
*
)(
sc
->
base
+
sg
->
p
.
objlist
);
sg
->
objs
=
so
;
no
=
sg
->
p
.
lobjlist
;
/* Clear the bogus "hold" count */
sg
->
nobj
=
0
;
for
(;
no
>
0
;
so
++
,
no
--
)
{
if
(
so
->
ttl
>
0
&&
so
->
ttl
<
t_now
)
continue
;
if
(
so
->
ttl
<
0
&&
-
so
->
ttl
<
t_now
)
continue
;
HSH_Prealloc
(
sp
);
oc
=
sp
->
wrk
->
nobjcore
;
oc
->
flags
|=
OC_F_NEEDFIXUP
|
OC_F_LRUDONTMOVE
;
oc
->
flags
&=
~
OC_F_BUSY
;
oc
->
priv
=
sg
;
oc
->
priv2
=
no
;
oc
->
methods
=
&
smp_oc_methods
;
oc
->
ban
=
BAN_RefBan
(
oc
,
so
->
ban
,
sc
->
tailban
);
memcpy
(
sp
->
wrk
->
nobjhead
->
digest
,
so
->
hash
,
SHA256_LEN
);
(
void
)
HSH_Insert
(
sp
);
AZ
(
sp
->
wrk
->
nobjcore
);
EXP_Inject
(
oc
,
sg
->
lru
,
fabs
(
so
->
ttl
));
sg
->
nobj
++
;
}
WRK_SumStat
(
sp
->
wrk
);
sg
->
flags
|=
SMP_SEG_LOADED
;
}
/*--------------------------------------------------------------------
* Attempt to open and read in a segment list
*/
static
int
smp_open_segs
(
struct
smp_sc
*
sc
,
struct
smp_signctx
*
ctx
)
{
uint64_t
length
,
l
;
struct
smp_segptr
*
ss
,
*
se
;
struct
smp_seg
*
sg
,
*
sg1
,
*
sg2
;
int
i
,
n
=
0
;
ASSERT_CLI
();
i
=
smp_chk_sign
(
ctx
);
if
(
i
)
return
(
i
);
ss
=
SIGN_DATA
(
ctx
);
length
=
ctx
->
ss
->
length
;
if
(
length
==
0
)
{
/* No segments */
sc
->
free_offset
=
sc
->
ident
->
stuff
[
SMP_SPC_STUFF
];
return
(
0
);
}
se
=
ss
+
length
/
sizeof
*
ss
;
se
--
;
assert
(
ss
<=
se
);
/*
* Locate the free reserve, there are only two basic cases,
* but once we start dropping segments, things gets more complicated.
*/
sc
->
free_offset
=
se
->
offset
+
se
->
length
;
l
=
sc
->
mediasize
-
sc
->
free_offset
;
if
(
se
->
offset
>
ss
->
offset
&&
l
>=
sc
->
free_reserve
)
{
/*
* [__xxxxyyyyzzzz___]
* Plenty of space at tail, do nothing.
*/
}
else
if
(
ss
->
offset
>
se
->
offset
)
{
/*
* [zzzz____xxxxyyyy_]
* (make) space between ends
* We might nuke the entire tail end without getting
* enough space, in which case we fall through to the
* last check.
*/
while
(
ss
<
se
&&
ss
->
offset
>
se
->
offset
)
{
l
=
ss
->
offset
-
(
se
->
offset
+
se
->
length
);
if
(
l
>
sc
->
free_reserve
)
break
;
ss
++
;
n
++
;
}
}
if
(
l
<
sc
->
free_reserve
)
{
/*
* [__xxxxyyyyzzzz___]
* (make) space at front
*/
sc
->
free_offset
=
sc
->
ident
->
stuff
[
SMP_SPC_STUFF
];
while
(
ss
<
se
)
{
l
=
ss
->
offset
-
sc
->
free_offset
;
if
(
l
>
sc
->
free_reserve
)
break
;
ss
++
;
n
++
;
}
}
assert
(
l
>=
sc
->
free_reserve
);
sg1
=
NULL
;
sg2
=
NULL
;
for
(;
ss
<=
se
;
ss
++
)
{
ALLOC_OBJ
(
sg
,
SMP_SEG_MAGIC
);
AN
(
sg
);
sg
->
lru
=
LRU_Alloc
();
CHECK_OBJ_NOTNULL
(
sg
->
lru
,
LRU_MAGIC
);
sg
->
p
=
*
ss
;
sg
->
flags
|=
SMP_SEG_MUSTLOAD
;
/*
* HACK: prevent save_segs from nuking segment until we have
* HACK: loaded it.
*/
sg
->
nobj
=
1
;
if
(
sg1
!=
NULL
)
{
assert
(
sg1
->
p
.
offset
!=
sg
->
p
.
offset
);
if
(
sg1
->
p
.
offset
<
sg
->
p
.
offset
)
assert
(
smp_segend
(
sg1
)
<=
sg
->
p
.
offset
);
else
assert
(
smp_segend
(
sg
)
<=
sg1
->
p
.
offset
);
}
if
(
sg2
!=
NULL
)
{
assert
(
sg2
->
p
.
offset
!=
sg
->
p
.
offset
);
if
(
sg2
->
p
.
offset
<
sg
->
p
.
offset
)
assert
(
smp_segend
(
sg2
)
<=
sg
->
p
.
offset
);
else
assert
(
smp_segend
(
sg
)
<=
sg2
->
p
.
offset
);
}
/* XXX: check that they are inside silo */
/* XXX: check that they don't overlap */
/* XXX: check that they are serial */
sg
->
sc
=
sc
;
VTAILQ_INSERT_TAIL
(
&
sc
->
segments
,
sg
,
list
);
sg2
=
sg
;
if
(
sg1
==
NULL
)
sg1
=
sg
;
}
printf
(
"Dropped %d segments to make free_reserve
\n
"
,
n
);
return
(
0
);
}
/*--------------------------------------------------------------------
* Create a new segment
*/
static
void
smp_new_seg
(
struct
smp_sc
*
sc
)
{
struct
smp_seg
*
sg
,
*
sg2
;
Lck_AssertHeld
(
&
sc
->
mtx
);
ALLOC_OBJ
(
sg
,
SMP_SEG_MAGIC
);
AN
(
sg
);
sg
->
sc
=
sc
;
sg
->
lru
=
LRU_Alloc
();
CHECK_OBJ_NOTNULL
(
sg
->
lru
,
LRU_MAGIC
);
/* XXX: find where it goes in silo */
sg
->
p
.
offset
=
sc
->
free_offset
;
// XXX: align */
assert
(
sg
->
p
.
offset
>=
sc
->
ident
->
stuff
[
SMP_SPC_STUFF
]);
assert
(
sg
->
p
.
offset
<
sc
->
mediasize
);
sg
->
p
.
length
=
sc
->
aim_segl
;
sg
->
p
.
length
&=
~
7
;
if
(
smp_segend
(
sg
)
>
sc
->
mediasize
)
{
sc
->
free_offset
=
sc
->
ident
->
stuff
[
SMP_SPC_STUFF
];
sg
->
p
.
offset
=
sc
->
free_offset
;
sg2
=
VTAILQ_FIRST
(
&
sc
->
segments
);
if
(
smp_segend
(
sg
)
>
sg2
->
p
.
offset
)
{
printf
(
"Out of space in persistent silo
\n
"
);
printf
(
"Committing suicide, restart will make space
\n
"
);
exit
(
0
);
}
}
assert
(
smp_segend
(
sg
)
<=
sc
->
mediasize
);
sg2
=
VTAILQ_FIRST
(
&
sc
->
segments
);
if
(
sg2
!=
NULL
&&
sg2
->
p
.
offset
>
sc
->
free_offset
)
{
if
(
smp_segend
(
sg
)
>
sg2
->
p
.
offset
)
{
printf
(
"Out of space in persistent silo
\n
"
);
printf
(
"Committing suicide, restart will make space
\n
"
);
exit
(
0
);
}
assert
(
smp_segend
(
sg
)
<=
sg2
->
p
.
offset
);
}
sg
->
p
.
offset
=
IRNUP
(
sc
,
sg
->
p
.
offset
);
sg
->
p
.
length
=
IRNDN
(
sc
,
sg
->
p
.
length
);
sc
->
free_offset
=
sg
->
p
.
offset
+
sg
->
p
.
length
;
VTAILQ_INSERT_TAIL
(
&
sc
->
segments
,
sg
,
list
);
/* Neuter the new segment in case there is an old one there */
AN
(
sg
->
p
.
offset
);
smp_def_sign
(
sc
,
sg
->
ctx
,
sg
->
p
.
offset
,
"SEGHEAD"
);
smp_reset_sign
(
sg
->
ctx
);
smp_sync_sign
(
sg
->
ctx
);
/* Set up our allocation points */
sc
->
cur_seg
=
sg
;
sc
->
next_bot
=
sg
->
p
.
offset
+
IRNUP
(
sc
,
SMP_SIGN_SPACE
);
sc
->
next_top
=
smp_segend
(
sg
);
sc
->
next_top
-=
IRNUP
(
sc
,
SMP_SIGN_SPACE
);
IASSERTALIGN
(
sc
,
sc
->
next_bot
);
IASSERTALIGN
(
sc
,
sc
->
next_top
);
sg
->
objs
=
(
void
*
)(
sc
->
base
+
sc
->
next_top
);
}
/*--------------------------------------------------------------------
* Close a segment
*/
static
void
smp_close_seg
(
struct
smp_sc
*
sc
,
struct
smp_seg
*
sg
)
{
uint64_t
left
,
dst
,
len
;
void
*
dp
;
Lck_AssertHeld
(
&
sc
->
mtx
);
assert
(
sg
==
sc
->
cur_seg
);
AN
(
sg
->
p
.
offset
);
sc
->
cur_seg
=
NULL
;
if
(
sg
->
nalloc
==
0
)
{
/* XXX: if segment is empty, delete instead */
VTAILQ_REMOVE
(
&
sc
->
segments
,
sg
,
list
);
free
(
sg
);
return
;
}
/*
* If there is enough space left, that we can move the smp_objects
* down without overwriting the present copy, we will do so to
* compact the segment.
*/
left
=
smp_spaceleft
(
sc
,
sg
);
len
=
sizeof
(
struct
smp_object
)
*
sg
->
p
.
lobjlist
;
if
(
len
<
left
)
{
dst
=
sc
->
next_bot
+
IRNUP
(
sc
,
SMP_SIGN_SPACE
);
dp
=
sc
->
base
+
dst
;
assert
((
uintptr_t
)
dp
+
len
<
(
uintptr_t
)
sg
->
objs
);
memcpy
(
dp
,
sg
->
objs
,
len
);
sc
->
next_top
=
dst
;
sg
->
objs
=
dp
;
sg
->
p
.
length
=
(
sc
->
next_top
-
sg
->
p
.
offset
)
+
len
+
IRNUP
(
sc
,
SMP_SIGN_SPACE
);
(
void
)
smp_spaceleft
(
sc
,
sg
);
/* for the asserts */
}
/* Update the segment header */
sg
->
p
.
objlist
=
sc
->
next_top
;
/* Write the (empty) OBJIDX signature */
sc
->
next_top
-=
IRNUP
(
sc
,
SMP_SIGN_SPACE
);
assert
(
sc
->
next_top
>=
sc
->
next_bot
);
smp_def_sign
(
sc
,
sg
->
ctx
,
sc
->
next_top
,
"OBJIDX"
);
smp_reset_sign
(
sg
->
ctx
);
smp_sync_sign
(
sg
->
ctx
);
/* Write the (empty) SEGTAIL signature */
smp_def_sign
(
sc
,
sg
->
ctx
,
sg
->
p
.
offset
+
sg
->
p
.
length
-
IRNUP
(
sc
,
SMP_SIGN_SPACE
),
"SEGTAIL"
);
smp_reset_sign
(
sg
->
ctx
);
smp_sync_sign
(
sg
->
ctx
);
/* Save segment list */
smp_save_segs
(
sc
);
sc
->
free_offset
=
smp_segend
(
sg
);
}
/*--------------------------------------------------------------------
* Silo worker thread
*/
static
void
*
smp_thread
(
struct
sess
*
sp
,
void
*
priv
)
{
struct
smp_sc
*
sc
;
struct
smp_seg
*
sg
;
(
void
)
sp
;
CAST_OBJ_NOTNULL
(
sc
,
priv
,
SMP_SC_MAGIC
);
/* First, load all the objects from all segments */
VTAILQ_FOREACH
(
sg
,
&
sc
->
segments
,
list
)
if
(
sg
->
flags
&
SMP_SEG_MUSTLOAD
)
smp_load_seg
(
sp
,
sc
,
sg
);
sc
->
flags
|=
SMP_SC_LOADED
;
BAN_Deref
(
&
sc
->
tailban
);
sc
->
tailban
=
NULL
;
printf
(
"Silo completely loaded
\n
"
);
while
(
1
)
(
void
)
sleep
(
1
);
NEEDLESS_RETURN
(
NULL
);
}
/*--------------------------------------------------------------------
* Open a silo in the worker process
*/
static
void
smp_open
(
const
struct
stevedore
*
st
)
{
struct
smp_sc
*
sc
;
ASSERT_CLI
();
CAST_OBJ_NOTNULL
(
sc
,
st
->
priv
,
SMP_SC_MAGIC
);
Lck_New
(
&
sc
->
mtx
,
lck_smp
);
Lck_Lock
(
&
sc
->
mtx
);
sc
->
stevedore
=
st
;
/* We trust the parent to give us a valid silo, for good measure: */
AZ
(
smp_valid_silo
(
sc
));
AZ
(
mprotect
(
sc
->
base
,
4096
,
PROT_READ
));
sc
->
ident
=
SIGN_DATA
(
&
sc
->
idn
);
/* We attempt ban1 first, and if that fails, try ban2 */
if
(
smp_open_bans
(
sc
,
&
sc
->
ban1
))
AZ
(
smp_open_bans
(
sc
,
&
sc
->
ban2
));
/* We attempt seg1 first, and if that fails, try seg2 */
if
(
smp_open_segs
(
sc
,
&
sc
->
seg1
))
AZ
(
smp_open_segs
(
sc
,
&
sc
->
seg2
));
sc
->
tailban
=
BAN_TailRef
();
AN
(
sc
->
tailban
);
/* XXX: save segments to ensure consistency between seg1 & seg2 ? */
/* XXX: abandon early segments to make sure we have free space ? */
/* Open a new segment, so we are ready to write */
smp_new_seg
(
sc
);
/* Start the worker silo worker thread, it will load the objects */
WRK_BgThread
(
&
sc
->
thread
,
"persistence"
,
smp_thread
,
sc
);
VTAILQ_INSERT_TAIL
(
&
silos
,
sc
,
list
);
Lck_Unlock
(
&
sc
->
mtx
);
}
/*--------------------------------------------------------------------
* Close a silo
*/
static
void
smp_close
(
const
struct
stevedore
*
st
)
{
struct
smp_sc
*
sc
;
ASSERT_CLI
();
CAST_OBJ_NOTNULL
(
sc
,
st
->
priv
,
SMP_SC_MAGIC
);
Lck_Lock
(
&
sc
->
mtx
);
smp_close_seg
(
sc
,
sc
->
cur_seg
);
Lck_Unlock
(
&
sc
->
mtx
);
/* XXX: reap thread */
}
/*--------------------------------------------------------------------
* Allocate a bite.
*
* Allocate [min_size...max_size] space from the bottom of the segment,
* as is convenient.
*
* If 'so' + 'idx' is given, also allocate a smp_object from the top
* of the segment.
*
* Return the segment in 'ssg' if given.
*/
static
struct
storage
*
smp_allocx
(
struct
stevedore
*
st
,
size_t
min_size
,
size_t
max_size
,
struct
smp_object
**
so
,
unsigned
*
idx
,
struct
smp_seg
**
ssg
)
{
struct
smp_sc
*
sc
;
struct
storage
*
ss
;
struct
smp_seg
*
sg
;
unsigned
tries
;
uint64_t
left
,
extra
;
CAST_OBJ_NOTNULL
(
sc
,
st
->
priv
,
SMP_SC_MAGIC
);
assert
(
min_size
<=
max_size
);
max_size
=
IRNUP
(
sc
,
max_size
);
min_size
=
IRNUP
(
sc
,
min_size
);
extra
=
IRNUP
(
sc
,
sizeof
(
*
ss
));
if
(
so
!=
NULL
)
{
extra
+=
sizeof
(
**
so
);
AN
(
idx
);
}
Lck_Lock
(
&
sc
->
mtx
);
sg
=
NULL
;
ss
=
NULL
;
for
(
tries
=
0
;
tries
<
3
;
tries
++
)
{
left
=
smp_spaceleft
(
sc
,
sc
->
cur_seg
);
if
(
left
>=
extra
+
min_size
)
break
;
smp_close_seg
(
sc
,
sc
->
cur_seg
);
smp_new_seg
(
sc
);
}
if
(
left
>=
extra
+
min_size
)
{
if
(
left
<
extra
+
max_size
)
max_size
=
IRNDN
(
sc
,
left
-
extra
);
sg
=
sc
->
cur_seg
;
ss
=
(
void
*
)(
sc
->
base
+
sc
->
next_bot
);
sc
->
next_bot
+=
max_size
+
IRNUP
(
sc
,
sizeof
(
*
ss
));
sg
->
nalloc
++
;
if
(
so
!=
NULL
)
{
sc
->
next_top
-=
sizeof
(
**
so
);
*
so
=
(
void
*
)(
sc
->
base
+
sc
->
next_top
);
/* Render this smp_object mostly harmless */
(
*
so
)
->
ttl
=
0
.;
(
*
so
)
->
ban
=
0
.;
(
*
so
)
->
ptr
=
0
;;
sg
->
objs
=
*
so
;
*
idx
=
++
sg
->
p
.
lobjlist
;
}
(
void
)
smp_spaceleft
(
sc
,
sg
);
/* for the assert */
}
Lck_Unlock
(
&
sc
->
mtx
);
if
(
ss
==
NULL
)
return
(
ss
);
AN
(
sg
);
assert
(
max_size
>=
min_size
);
/* Fill the storage structure */
memset
(
ss
,
0
,
sizeof
*
ss
);
ss
->
magic
=
STORAGE_MAGIC
;
ss
->
ptr
=
PRNUP
(
sc
,
ss
+
1
);
ss
->
space
=
max_size
;
ss
->
priv
=
sc
;
ss
->
stevedore
=
st
;
ss
->
fd
=
sc
->
fd
;
if
(
ssg
!=
NULL
)
*
ssg
=
sg
;
return
(
ss
);
}
/*--------------------------------------------------------------------
* Find the per-segment lru list for this object
*/
static
struct
lru
*
smp_getlru
(
const
struct
object
*
o
)
{
struct
smp_seg
*
sg
;
CHECK_OBJ_NOTNULL
(
o
,
OBJECT_MAGIC
);
CAST_OBJ_NOTNULL
(
sg
,
o
->
objcore
->
priv
,
SMP_SEG_MAGIC
);
return
(
sg
->
lru
);
}
/*--------------------------------------------------------------------
* Allocate an object
*/
static
struct
object
*
smp_allocobj
(
struct
stevedore
*
stv
,
struct
sess
*
sp
,
unsigned
ltot
,
const
struct
stv_objsecrets
*
soc
)
{
struct
object
*
o
;
struct
storage
*
st
;
struct
smp_sc
*
sc
;
struct
smp_seg
*
sg
;
struct
smp_object
*
so
;
struct
objcore
*
oc
;
unsigned
objidx
;
CAST_OBJ_NOTNULL
(
sc
,
stv
->
priv
,
SMP_SC_MAGIC
);
AN
(
sp
->
objcore
);
AN
(
sp
->
wrk
->
ttl
>=
0
);
ltot
=
IRNUP
(
sc
,
ltot
);
st
=
smp_allocx
(
stv
,
ltot
,
ltot
,
&
so
,
&
objidx
,
&
sg
);
if
(
st
==
NULL
)
return
(
NULL
);
assert
(
st
->
space
>=
ltot
);
ltot
=
st
->
len
=
st
->
space
;
o
=
STV_MkObject
(
sp
,
st
->
ptr
,
ltot
,
soc
);
CHECK_OBJ_NOTNULL
(
o
,
OBJECT_MAGIC
);
o
->
objstore
=
st
;
oc
=
o
->
objcore
;
CHECK_OBJ_NOTNULL
(
oc
,
OBJCORE_MAGIC
);
oc
->
flags
|=
OC_F_LRUDONTMOVE
;
Lck_Lock
(
&
sc
->
mtx
);
sg
->
nfixed
++
;
sg
->
nobj
++
;
/* We have to do this somewhere, might as well be here... */
assert
(
sizeof
so
->
hash
==
DIGEST_LEN
);
memcpy
(
so
->
hash
,
oc
->
objhead
->
digest
,
DIGEST_LEN
);
so
->
ttl
=
o
->
ttl
;
/* XXX: grace? */
so
->
ptr
=
(
uint8_t
*
)
o
-
sc
->
base
;
so
->
ban
=
o
->
ban_t
;
oc
->
priv
=
sg
;
oc
->
priv2
=
objidx
;
oc
->
methods
=
&
smp_oc_methods
;
Lck_Unlock
(
&
sc
->
mtx
);
return
(
o
);
}
/*--------------------------------------------------------------------
* Allocate a bite
*/
static
struct
storage
*
smp_alloc
(
struct
stevedore
*
st
,
size_t
size
)
{
return
(
smp_allocx
(
st
,
size
>
4096
?
4096
:
size
,
size
,
NULL
,
NULL
,
NULL
));
}
/*--------------------------------------------------------------------
* Trim a bite
* XXX: We could trim the last allocation.
*/
static
void
smp_trim
(
struct
storage
*
ss
,
size_t
size
)
{
(
void
)
ss
;
(
void
)
size
;
}
/*--------------------------------------------------------------------
* We don't track frees of storage, we track the objects which own the
* storage and when there are no more objects in in the first segment,
* it can be reclaimed.
* XXX: We could free the last allocation, but does that happen ?
*/
static
void
__match_proto__
(
storage_free_f
)
smp_free
(
struct
storage
*
st
)
{
/* XXX */
(
void
)
st
;
}
/*--------------------------------------------------------------------
* Pause until all silos have loaded.
*/
void
SMP_Ready
(
void
)
{
struct
smp_sc
*
sc
;
ASSERT_CLI
();
do
{
VTAILQ_FOREACH
(
sc
,
&
silos
,
list
)
if
(
!
(
sc
->
flags
&
SMP_SC_LOADED
))
break
;
if
(
sc
!=
NULL
)
(
void
)
sleep
(
1
);
}
while
(
sc
!=
NULL
);
}
/*--------------------------------------------------------------------*/
const
struct
stevedore
smp_stevedore
=
{
.
magic
=
STEVEDORE_MAGIC
,
.
name
=
"persistent"
,
.
init
=
smp_init
,
.
open
=
smp_open
,
.
close
=
smp_close
,
.
alloc
=
smp_alloc
,
.
allocobj
=
smp_allocobj
,
.
getlru
=
smp_getlru
,
.
free
=
smp_free
,
.
trim
=
smp_trim
,
};
/*--------------------------------------------------------------------
* Persistence is a bear to test unadultered, so we cheat by adding
* a cli command we can use to make it do tricks for us.
*/
static
void
debug_report_silo
(
struct
cli
*
cli
,
const
struct
smp_sc
*
sc
,
int
objs
)
{
struct
smp_seg
*
sg
;
struct
objcore
*
oc
;
cli_out
(
cli
,
"Silo: %s (%s)
\n
"
,
sc
->
stevedore
->
ident
,
sc
->
filename
);
VTAILQ_FOREACH
(
sg
,
&
sc
->
segments
,
list
)
{
cli_out
(
cli
,
" Seg: [0x%jx ... +0x%jx]
\n
"
,
(
uintmax_t
)
sg
->
p
.
offset
,
(
uintmax_t
)
sg
->
p
.
length
);
if
(
sg
==
sc
->
cur_seg
)
cli_out
(
cli
,
" Alloc: [0x%jx ... 0x%jx] = 0x%jx free
\n
"
,
(
uintmax_t
)(
sc
->
next_bot
),
(
uintmax_t
)(
sc
->
next_top
),
(
uintmax_t
)(
sc
->
next_top
-
sc
->
next_bot
));
cli_out
(
cli
,
" %u nobj, %u alloc, %u lobjlist, %u fixed
\n
"
,
sg
->
nobj
,
sg
->
nalloc
,
sg
->
p
.
lobjlist
,
sg
->
nfixed
);
if
(
objs
)
{
VLIST_FOREACH
(
oc
,
&
sg
->
lru
->
lru_head
,
lru_list
)
cli_out
(
cli
,
" %s %p
\n
"
,
oc
==
&
sg
->
lru
->
senteniel
?
"senteniel"
:
"OC: "
,
oc
);
}
}
}
static
void
debug_persistent
(
struct
cli
*
cli
,
const
char
*
const
*
av
,
void
*
priv
)
{
struct
smp_sc
*
sc
;
(
void
)
priv
;
if
(
av
[
2
]
==
NULL
)
{
VTAILQ_FOREACH
(
sc
,
&
silos
,
list
)
debug_report_silo
(
cli
,
sc
,
0
);
return
;
}
VTAILQ_FOREACH
(
sc
,
&
silos
,
list
)
if
(
!
strcmp
(
av
[
2
],
sc
->
stevedore
->
ident
))
break
;
if
(
sc
==
NULL
)
{
cli_out
(
cli
,
"Silo <%s> not found
\n
"
,
av
[
2
]);
cli_result
(
cli
,
CLIS_PARAM
);
return
;
}
if
(
av
[
3
]
==
NULL
)
{
debug_report_silo
(
cli
,
sc
,
0
);
return
;
}
Lck_Lock
(
&
sc
->
mtx
);
if
(
!
strcmp
(
av
[
3
],
"sync"
))
{
smp_close_seg
(
sc
,
sc
->
cur_seg
);
smp_new_seg
(
sc
);
}
else
if
(
!
strcmp
(
av
[
3
],
"dump"
))
{
debug_report_silo
(
cli
,
sc
,
1
);
}
else
{
cli_out
(
cli
,
"Unknown operation
\n
"
);
cli_result
(
cli
,
CLIS_PARAM
);
}
Lck_Unlock
(
&
sc
->
mtx
);
}
static
struct
cli_proto
debug_cmds
[]
=
{
{
"debug.persistent"
,
"debug.persistent"
,
"Persistent debugging magic:
\n
"
"
\t
debug.persistent [stevedore [cmd]]
\n
"
"With no cmd arg, a summary of the silo is returned.
\n
"
"Possible commands:
\n
"
"
\t
sync
\t
Close current segment, open a new one
\n
"
"
\t
dump
\t
include objcores in silo summary
\n
"
""
,
0
,
2
,
"d"
,
debug_persistent
},
{
NULL
}
};
void
SMP_Init
(
void
)
{
CLI_AddFuncs
(
debug_cmds
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment