Commit de941933 authored by Wayne Davison's avatar Wayne Davison

Remove bypassed checksums in --inplace to improve speed.

When checking a checksum that refers to a part of an --inplace file that
has been overwritten w/o getting SUMFLG_SAME_OFFSET set, we remove the
checksum from the list.  This will speed up files that have a lot of
identical checksum blocks (e.g. sequences of zeros) that we can't use
due to them not getting marked as being the same.  Patch provided by
Michael Chapman.
parent 05fce658
......@@ -154,6 +154,9 @@ Changes since 3.0.9:
file for one way to package the resulting files. (Suggestions for
how to make this even easier to install & use are welcomed.)
- Improved the speed of some --inplace updates when there are lots of
identical checksum blocks that end up being unsuable.
- Added the --outbuf=N|L|B option for chosing the output buffering.
- Repating the --fuzzy option now causes the code to look for fuzzy matches
......
......@@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s,
do {
int done_csum2 = 0;
int32 i;
uint32 hash_entry;
int32 i, *prev;
if (DEBUG_GTE(DELTASUM, 4)) {
rprintf(FINFO, "offset=%s sum=%04x%04x\n",
......@@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s,
}
if (tablesize == TRADITIONAL_TABLESIZE) {
if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
hash_entry = SUM2HASH2(s1,s2);
if ((i = hash_table[hash_entry]) < 0)
goto null_hash;
sum = (s1 & 0xffff) | (s2 << 16);
} else {
sum = (s1 & 0xffff) | (s2 << 16);
if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
hash_entry = BIG_SUM2HASH(sum);
if ((i = hash_table[hash_entry]) < 0)
goto null_hash;
}
prev = &hash_table[hash_entry];
hash_hits++;
do {
int32 l;
/* When updating in-place, the chunk's offset must be
* either >= our offset or identical data at that offset.
* Remove any bypassed entries that we can never use. */
if (updating_basis_file && s->sums[i].offset < offset
&& !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) {
*prev = s->sums[i].chain;
continue;
}
prev = &s->sums[i].chain;
if (sum != s->sums[i].sum1)
continue;
......@@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s,
if (l != s->sums[i].len)
continue;
/* in-place: ensure chunk's offset is either >= our
* offset or that the data didn't move. */
if (updating_basis_file && s->sums[i].offset < offset
&& !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
continue;
if (DEBUG_GTE(DELTASUM, 3)) {
rprintf(FINFO,
"potential match at %s i=%ld sum=%08x\n",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment