Commit 55788b03 authored by Stefan Westerfeld's avatar Stefan Westerfeld

Make fft usage thread safe (required for speed detection).

Signed-off-by: Stefan Westerfeld's avatarStefan Westerfeld <stefan@space.twc.de>
parent 7467a9b0
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <fftw3.h> #include <fftw3.h>
#include <map> #include <map>
#include <mutex>
using std::vector; using std::vector;
using std::complex; using std::complex;
...@@ -39,63 +40,38 @@ free_array_float (float *f) ...@@ -39,63 +40,38 @@ free_array_float (float *f)
fftwf_free (f); fftwf_free (f);
} }
void static std::mutex fft_planner_mutex;
fftar_float (size_t N, float *in, float *out)
{
static map<int, fftwf_plan> plan_for_size;
fftwf_plan& plan = plan_for_size[N];
if (!plan)
{
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
plan = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
// we add code for saving plans here, and use patient planning
}
fftwf_execute_dft_r2c (plan, in, (fftwf_complex *) out);
}
void FFTProcessor::FFTProcessor (size_t N)
fftsr_float (size_t N, float *in, float *out)
{ {
static map<int, fftwf_plan> plan_for_size; std::lock_guard<std::mutex> lg (fft_planner_mutex);
fftwf_plan& plan = plan_for_size[N];
if (!plan)
{
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
plan = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
// we add code for saving plans here, and use patient planning
}
fftwf_execute_dft_c2r (plan, (fftwf_complex *)in, out);
}
vector<complex<float>> float *plan_in = new_array_float (N);
fft (const vector<float>& in) float *plan_out = new_array_float (N);
{
vector<complex<float>> out (in.size() / 2 + 1);
/* ensure memory is SSE-aligned (or other vectorized stuff) */ plan_fft = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
float *fft_in = new_array_float (in.size()); plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
float *fft_out = new_array_float (in.size());
std::copy (in.begin(), in.end(), fft_in); // we add code for saving plans here, and use patient planning
fftar_float (in.size(), fft_in, fft_out);
/* complex<float> vector and fft_out have the same layout in memory */ free_array_float (plan_out);
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0])); free_array_float (plan_in);
}
free_array_float (fft_out); void
free_array_float (fft_in); FFTProcessor::fft (float *in, float *out)
{
fftwf_execute_dft_r2c (plan_fft, in, (fftwf_complex *) out);
}
return out; void
FFTProcessor::ifft (float *in, float *out)
{
fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *)in, out);
} }
vector<float> vector<float>
ifft (const vector<complex<float>>& in) FFTProcessor::ifft (const vector<complex<float>>& in)
{ {
vector<float> out ((in.size() - 1) * 2); vector<float> out ((in.size() - 1) * 2);
...@@ -105,7 +81,7 @@ ifft (const vector<complex<float>>& in) ...@@ -105,7 +81,7 @@ ifft (const vector<complex<float>>& in)
/* complex<float> vector and fft_out have the same layout in memory */ /* complex<float> vector and fft_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in)); std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
fftsr_float (out.size(), ifft_in, ifft_out); ifft (ifft_in, ifft_out);
std::copy (ifft_out, ifft_out + out.size(), &out[0]); std::copy (ifft_out, ifft_out + out.size(), &out[0]);
...@@ -115,4 +91,23 @@ ifft (const vector<complex<float>>& in) ...@@ -115,4 +91,23 @@ ifft (const vector<complex<float>>& in)
return out; return out;
} }
vector<complex<float>>
FFTProcessor::fft (const vector<float>& in)
{
vector<complex<float>> out (in.size() / 2 + 1);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *fft_in = new_array_float (in.size());
float *fft_out = new_array_float (in.size());
std::copy (in.begin(), in.end(), fft_in);
fft (fft_in, fft_out);
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
free_array_float (fft_out);
free_array_float (fft_in);
return out;
}
...@@ -20,13 +20,24 @@ ...@@ -20,13 +20,24 @@
#include <complex> #include <complex>
#include <vector> #include <vector>
#include <fftw3.h>
/* high level api */ class FFTProcessor
std::vector<std::complex<float>> fft (const std::vector<float>& in); {
std::vector<float> ifft (const std::vector<std::complex<float>>& in); fftwf_plan plan_fft;
fftwf_plan plan_ifft;
public:
FFTProcessor (size_t N);
/* low level (fast) */
void fft (float *in, float *out);
void ifft (float *in, float *out);
/* high level (convenient) */
std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in);
};
/* more efficient: low level api */
void fftar_float (size_t N, float *in, float *out);
float *new_array_float (size_t N); float *new_array_float (size_t N);
void free_array_float (float *f); void free_array_float (float *f);
......
...@@ -169,6 +169,7 @@ class WatermarkSynth ...@@ -169,6 +169,7 @@ class WatermarkSynth
vector<float> window; vector<float> window;
vector<float> synth_samples; vector<float> synth_samples;
bool first_frame = true; bool first_frame = true;
FFTProcessor fft_processor;
void void
generate_window() generate_window()
...@@ -202,7 +203,8 @@ class WatermarkSynth ...@@ -202,7 +203,8 @@ class WatermarkSynth
} }
public: public:
WatermarkSynth (int n_channels) : WatermarkSynth (int n_channels) :
n_channels (n_channels) n_channels (n_channels),
fft_processor (Params::frame_size)
{ {
generate_window(); generate_window();
synth_samples.resize (window.size() * n_channels); synth_samples.resize (window.size() * n_channels);
...@@ -218,7 +220,7 @@ public: ...@@ -218,7 +220,7 @@ public:
for (int ch = 0; ch < n_channels; ch++) for (int ch = 0; ch < n_channels; ch++)
{ {
/* mix watermark signal to output frame */ /* mix watermark signal to output frame */
vector<float> fft_delta_out = ifft (fft_delta_spect[ch]); vector<float> fft_delta_out = fft_processor.ifft (fft_delta_spect[ch]);
for (int dframe = 0; dframe <= 2; dframe++) for (int dframe = 0; dframe <= 2; dframe++)
{ {
......
...@@ -92,7 +92,8 @@ db_from_factor (double factor, double min_dB) ...@@ -92,7 +92,8 @@ db_from_factor (double factor, double min_dB)
} }
FFTAnalyzer::FFTAnalyzer (int n_channels) : FFTAnalyzer::FFTAnalyzer (int n_channels) :
m_n_channels (n_channels) m_n_channels (n_channels),
m_fft_processor (Params::frame_size)
{ {
/* generate analysis window */ /* generate analysis window */
m_window.resize (Params::frame_size); m_window.resize (Params::frame_size);
...@@ -143,7 +144,7 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index) ...@@ -143,7 +144,7 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
pos += m_n_channels; pos += m_n_channels;
} }
/* FFT transform */ /* FFT transform */
fftar_float (Params::frame_size, m_frame, m_frame_fft); m_fft_processor.fft (m_frame, m_frame_fft);
/* complex<float> and frame_fft have the same layout in memory */ /* complex<float> and frame_fft have the same layout in memory */
const complex<float> *first = (complex<float> *) m_frame_fft; const complex<float> *first = (complex<float> *) m_frame_fft;
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "random.hh" #include "random.hh"
#include "rawinputstream.hh" #include "rawinputstream.hh"
#include "wavdata.hh" #include "wavdata.hh"
#include "fft.hh"
#include <assert.h> #include <assert.h>
...@@ -121,6 +122,7 @@ class FFTAnalyzer ...@@ -121,6 +122,7 @@ class FFTAnalyzer
std::vector<float> m_window; std::vector<float> m_window;
float *m_frame = nullptr; float *m_frame = nullptr;
float *m_frame_fft = nullptr; float *m_frame_fft = nullptr;
FFTProcessor m_fft_processor;
public: public:
FFTAnalyzer (int n_channels); FFTAnalyzer (int n_channels);
~FFTAnalyzer(); ~FFTAnalyzer();
......
...@@ -293,6 +293,8 @@ SpeedSync::prepare_mags() ...@@ -293,6 +293,8 @@ SpeedSync::prepare_mags()
SyncFinder sync_finder; SyncFinder sync_finder;
sync_bits = sync_finder.get_sync_bits (in_data, SyncFinder::Mode::BLOCK); sync_bits = sync_finder.get_sync_bits (in_data, SyncFinder::Mode::BLOCK);
FFTProcessor fft_processor (sub_frame_size);
float *in = new_array_float (sub_frame_size); float *in = new_array_float (sub_frame_size);
float *out = new_array_float (sub_frame_size); float *out = new_array_float (sub_frame_size);
...@@ -309,7 +311,7 @@ SpeedSync::prepare_mags() ...@@ -309,7 +311,7 @@ SpeedSync::prepare_mags()
{ {
in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i]; in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i];
} }
fftar_float (sub_frame_size, in, out); fft_processor.fft (in, out);
for (int i = Params::min_band; i <= Params::max_band; i++) for (int i = Params::min_band; i <= Params::max_band; i++)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment