Commit dc1cee7d authored by Stefan Westerfeld's avatar Stefan Westerfeld

Merge branch 'speed-detection-opt'

* speed-detection-opt:
  Update speed detection comments.
  Check for positive index without fractional shift in speed detection.
  Avoid floating point math for speed detection offset computation.
  Determine speed search compare range [begin,end] before loop (performance).
  Improve performance for speed detection bits compare.
  Reorder memory access in speed detection (performance).
Signed-off-by: Stefan Westerfeld's avatarStefan Westerfeld <stefan@space.twc.de>
parents 267755d2 1e3dc560
......@@ -87,7 +87,7 @@ public:
Mags&
operator() (int row, int col)
{
return m_data[row * m_cols + col];
return m_data[col * m_rows + row];
}
void
resize (int rows, int cols)
......@@ -129,14 +129,21 @@ public:
float dmag = 0;
int count = 0;
};
struct CmpState
{
int offset = 0;
BitValue bit_values[Params::sync_bits];
};
private:
static constexpr int OFFSET_SHIFT = 16;
vector<SyncBit> sync_bits;
MagMatrix sync_matrix;
void prepare_mags (const SpeedScanParams& scan_params);
void compare (double relative_speed);
template<bool B2>
void compare_bits (BitValue *bit_values, double relative_speed, int *start_mi, int offset);
template<int BLOCK>
void compare_bits (vector<CmpState>& cmp_states, double relative_speed);
std::mutex mutex;
vector<Score> result_scores;
......@@ -267,45 +274,62 @@ SpeedSync::prepare_mags (const SpeedScanParams& scan_params)
assert (row == n_sync_rows);
}
template<bool B2> void
SpeedSync::compare_bits (BitValue *bit_values, double relative_speed, int *start_mi, int offset)
template<int BLOCK> void
SpeedSync::compare_bits (vector<CmpState>& cmp_states, double relative_speed)
{
const int steps_per_frame = Params::frame_size / Params::sync_search_step;
const double relative_speed_inv = 1 / relative_speed;
/* search start */
int mi = *start_mi;
while (mi > 0)
{
int index = offset + sync_bits[mi - 1].frame * steps_per_frame;
if (index < 0)
break;
mi--;
}
*start_mi = mi;
for (auto si = sync_bits.begin() + mi; si != sync_bits.end(); si++)
auto begin = cmp_states.end();
auto end = cmp_states.end();
for (size_t mi = 0; mi < sync_bits.size(); mi++)
{
int index = (offset + si->frame * steps_per_frame) * relative_speed_inv + 0.5;
if (index >= sync_matrix.rows())
return;
const int frame_offset = ((BLOCK * frames_per_block + sync_bits[mi].frame) * steps_per_frame * relative_speed_inv + 0.5) * (1 << OFFSET_SHIFT);
auto& bv = bit_values[si->bit];
auto mags = sync_matrix (index, mi);
if (B2)
while (begin > cmp_states.begin())
{
bv.umag += mags.dmag;
bv.dmag += mags.umag;
auto prev = begin - 1;
/*
* don't use OFFSET_SHIFT here; just ensure that index is positive
* to ensure that shifted value will properly round to nearest frame
* later on
*/
int index = prev->offset + frame_offset;
if (index < 0)
break;
begin = prev;
}
else
while (end > cmp_states.begin())
{
bv.umag += mags.umag;
bv.dmag += mags.dmag;
auto prev = end - 1;
int index = (prev->offset + frame_offset) >> OFFSET_SHIFT;
if (index < sync_matrix.rows())
break;
end = prev;
}
bv.count++;
mi++;
for (auto it = begin; it != end; it++)
{
int index = (it->offset + frame_offset) >> OFFSET_SHIFT;
auto& bv = it->bit_values[sync_bits[mi].bit];
auto mags = sync_matrix (index, mi);
if (BLOCK & 1)
{
bv.umag += mags.dmag;
bv.dmag += mags.umag;
}
else
{
bv.umag += mags.umag;
bv.dmag += mags.dmag;
}
bv.count++;
}
}
}
......@@ -314,33 +338,36 @@ SpeedSync::compare (double relative_speed)
{
const int steps_per_frame = Params::frame_size / Params::sync_search_step;
const int pad_start = frames_per_block * steps_per_frame + /* add a bit of overlap to handle boundaries */ steps_per_frame;
Score best_score;
assert (steps_per_frame * Params::sync_search_step == Params::frame_size);
int start_mi1 = sync_bits.size();
int start_mi2 = sync_bits.size();
int start_mi3 = sync_bits.size();
for (int offset = -pad_start; offset < 0; offset++)
vector<CmpState> cmp_states;
for (int offset = -pad_start; offset < 0; offset++)
{
BitValue bit_values[Params::sync_bits];
CmpState cs;
cs.offset = offset * ((1 << OFFSET_SHIFT) / relative_speed);
cmp_states.push_back (cs);
}
/*
* we need to compare 3 blocks here:
* - one block is necessary because we need to test all offsets (-pad_start..0)
* - two more blocks are necessary since speed detection ScanParams uses 50 seconds at most,
* and short payload (12 bits) has a block length of slightly over 30 seconds
*/
compare_bits<false> (bit_values, relative_speed, &start_mi1, offset);
compare_bits<true> (bit_values, relative_speed, &start_mi2, offset + frames_per_block * steps_per_frame);
compare_bits<false> (bit_values, relative_speed, &start_mi3, offset + frames_per_block * steps_per_frame * 2);
/*
* we need to compare 3 blocks here:
* - one block is necessary because we need to test all offsets (-pad_start..0)
* - two more blocks are necessary since speed detection ScanParams uses 50 seconds at most,
* and short payload (12 bits) has a block length of slightly over 30 seconds
*/
compare_bits<0> (cmp_states, relative_speed);
compare_bits<1> (cmp_states, relative_speed);
compare_bits<2> (cmp_states, relative_speed);
Score best_score;
for (const auto& cs : cmp_states)
{
double sync_quality = 0;
int bit_count = 0;
for (size_t bit = 0; bit < Params::sync_bits; bit++)
{
const auto& bv = bit_values[bit];
const auto& bv = cs.bit_values[bit];
sync_quality += SyncFinder::bit_quality (bv.umag, bv.dmag, bit) * bv.count;
bit_count += bv.count;
......@@ -357,7 +384,6 @@ SpeedSync::compare (double relative_speed)
}
}
}
//printf ("%f %f\n", best_score.speed, best_score.quality);
std::lock_guard<std::mutex> lg (mutex);
result_scores.push_back (best_score);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment