Commit 55788b03 authored by Stefan Westerfeld's avatar Stefan Westerfeld

Make fft usage thread safe (required for speed detection).

Signed-off-by: Stefan Westerfeld's avatarStefan Westerfeld <stefan@space.twc.de>
parent 7467a9b0
......@@ -20,6 +20,7 @@
#include <fftw3.h>
#include <map>
#include <mutex>
using std::vector;
using std::complex;
......@@ -39,63 +40,38 @@ free_array_float (float *f)
fftwf_free (f);
}
void
fftar_float (size_t N, float *in, float *out)
{
static map<int, fftwf_plan> plan_for_size;
fftwf_plan& plan = plan_for_size[N];
if (!plan)
{
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
plan = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
// we add code for saving plans here, and use patient planning
}
fftwf_execute_dft_r2c (plan, in, (fftwf_complex *) out);
}
static std::mutex fft_planner_mutex;
void
fftsr_float (size_t N, float *in, float *out)
FFTProcessor::FFTProcessor (size_t N)
{
static map<int, fftwf_plan> plan_for_size;
fftwf_plan& plan = plan_for_size[N];
if (!plan)
{
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
plan = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
// we add code for saving plans here, and use patient planning
}
fftwf_execute_dft_c2r (plan, (fftwf_complex *)in, out);
}
std::lock_guard<std::mutex> lg (fft_planner_mutex);
vector<complex<float>>
fft (const vector<float>& in)
{
vector<complex<float>> out (in.size() / 2 + 1);
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *fft_in = new_array_float (in.size());
float *fft_out = new_array_float (in.size());
plan_fft = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
std::copy (in.begin(), in.end(), fft_in);
fftar_float (in.size(), fft_in, fft_out);
// we add code for saving plans here, and use patient planning
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
free_array_float (plan_out);
free_array_float (plan_in);
}
free_array_float (fft_out);
free_array_float (fft_in);
void
FFTProcessor::fft (float *in, float *out)
{
fftwf_execute_dft_r2c (plan_fft, in, (fftwf_complex *) out);
}
return out;
void
FFTProcessor::ifft (float *in, float *out)
{
fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *)in, out);
}
vector<float>
ifft (const vector<complex<float>>& in)
FFTProcessor::ifft (const vector<complex<float>>& in)
{
vector<float> out ((in.size() - 1) * 2);
......@@ -105,7 +81,7 @@ ifft (const vector<complex<float>>& in)
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
fftsr_float (out.size(), ifft_in, ifft_out);
ifft (ifft_in, ifft_out);
std::copy (ifft_out, ifft_out + out.size(), &out[0]);
......@@ -115,4 +91,23 @@ ifft (const vector<complex<float>>& in)
return out;
}
vector<complex<float>>
FFTProcessor::fft (const vector<float>& in)
{
vector<complex<float>> out (in.size() / 2 + 1);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *fft_in = new_array_float (in.size());
float *fft_out = new_array_float (in.size());
std::copy (in.begin(), in.end(), fft_in);
fft (fft_in, fft_out);
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
free_array_float (fft_out);
free_array_float (fft_in);
return out;
}
......@@ -20,13 +20,24 @@
#include <complex>
#include <vector>
#include <fftw3.h>
/* high level api */
std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in);
class FFTProcessor
{
fftwf_plan plan_fft;
fftwf_plan plan_ifft;
public:
FFTProcessor (size_t N);
/* low level (fast) */
void fft (float *in, float *out);
void ifft (float *in, float *out);
/* high level (convenient) */
std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in);
};
/* more efficient: low level api */
void fftar_float (size_t N, float *in, float *out);
float *new_array_float (size_t N);
void free_array_float (float *f);
......
......@@ -169,6 +169,7 @@ class WatermarkSynth
vector<float> window;
vector<float> synth_samples;
bool first_frame = true;
FFTProcessor fft_processor;
void
generate_window()
......@@ -202,7 +203,8 @@ class WatermarkSynth
}
public:
WatermarkSynth (int n_channels) :
n_channels (n_channels)
n_channels (n_channels),
fft_processor (Params::frame_size)
{
generate_window();
synth_samples.resize (window.size() * n_channels);
......@@ -218,7 +220,7 @@ public:
for (int ch = 0; ch < n_channels; ch++)
{
/* mix watermark signal to output frame */
vector<float> fft_delta_out = ifft (fft_delta_spect[ch]);
vector<float> fft_delta_out = fft_processor.ifft (fft_delta_spect[ch]);
for (int dframe = 0; dframe <= 2; dframe++)
{
......
......@@ -92,7 +92,8 @@ db_from_factor (double factor, double min_dB)
}
FFTAnalyzer::FFTAnalyzer (int n_channels) :
m_n_channels (n_channels)
m_n_channels (n_channels),
m_fft_processor (Params::frame_size)
{
/* generate analysis window */
m_window.resize (Params::frame_size);
......@@ -143,7 +144,7 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
pos += m_n_channels;
}
/* FFT transform */
fftar_float (Params::frame_size, m_frame, m_frame_fft);
m_fft_processor.fft (m_frame, m_frame_fft);
/* complex<float> and frame_fft have the same layout in memory */
const complex<float> *first = (complex<float> *) m_frame_fft;
......
......@@ -24,6 +24,7 @@
#include "random.hh"
#include "rawinputstream.hh"
#include "wavdata.hh"
#include "fft.hh"
#include <assert.h>
......@@ -121,6 +122,7 @@ class FFTAnalyzer
std::vector<float> m_window;
float *m_frame = nullptr;
float *m_frame_fft = nullptr;
FFTProcessor m_fft_processor;
public:
FFTAnalyzer (int n_channels);
~FFTAnalyzer();
......
......@@ -293,6 +293,8 @@ SpeedSync::prepare_mags()
SyncFinder sync_finder;
sync_bits = sync_finder.get_sync_bits (in_data, SyncFinder::Mode::BLOCK);
FFTProcessor fft_processor (sub_frame_size);
float *in = new_array_float (sub_frame_size);
float *out = new_array_float (sub_frame_size);
......@@ -309,7 +311,7 @@ SpeedSync::prepare_mags()
{
in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i];
}
fftar_float (sub_frame_size, in, out);
fft_processor.fft (in, out);
for (int i = Params::min_band; i <= Params::max_band; i++)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment