Commit ef8f2694 authored by Stefan Westerfeld's avatar Stefan Westerfeld

Manage SIMD allocated fft memory buffers in FFTProcessor.

Signed-off-by: Stefan Westerfeld's avatarStefan Westerfeld <stefan@space.twc.de>
parent 3d04d2dc
...@@ -26,48 +26,39 @@ using std::vector; ...@@ -26,48 +26,39 @@ using std::vector;
using std::complex; using std::complex;
using std::map; using std::map;
float *
new_array_float (size_t N)
{
const size_t N_2 = N + 2; /* extra space for r2c extra complex output */
return (float *) fftwf_malloc (sizeof (float) * N_2);
}
void
free_array_float (float *f)
{
fftwf_free (f);
}
static std::mutex fft_planner_mutex; static std::mutex fft_planner_mutex;
FFTProcessor::FFTProcessor (size_t N) FFTProcessor::FFTProcessor (size_t N)
{ {
std::lock_guard<std::mutex> lg (fft_planner_mutex); std::lock_guard<std::mutex> lg (fft_planner_mutex);
float *plan_in = new_array_float (N); const size_t N_2 = N + 2; /* extra space for r2c extra complex output */
float *plan_out = new_array_float (N);
plan_fft = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT); m_in = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT); m_out = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
// we add code for saving plans here, and use patient planning plan_fft = fftwf_plan_dft_r2c_1d (N, m_in, (fftwf_complex *) m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) m_in, m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
free_array_float (plan_out); // we could add code for saving plans here, and use patient planning
free_array_float (plan_in); }
FFTProcessor::~FFTProcessor()
{
fftwf_free (m_in);
fftwf_free (m_out);
} }
void void
FFTProcessor::fft (float *in, float *out) FFTProcessor::fft()
{ {
fftwf_execute_dft_r2c (plan_fft, in, (fftwf_complex *) out); fftwf_execute_dft_r2c (plan_fft, m_in, (fftwf_complex *) m_out);
} }
void void
FFTProcessor::ifft (float *in, float *out) FFTProcessor::ifft()
{ {
fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *)in, out); fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *) m_in, m_out);
} }
vector<float> vector<float>
...@@ -75,18 +66,10 @@ FFTProcessor::ifft (const vector<complex<float>>& in) ...@@ -75,18 +66,10 @@ FFTProcessor::ifft (const vector<complex<float>>& in)
{ {
vector<float> out ((in.size() - 1) * 2); vector<float> out ((in.size() - 1) * 2);
/* ensure memory is SSE-aligned (or other vectorized stuff) */ /* complex<float> vector and m_out have the same layout in memory */
float *ifft_in = new_array_float (out.size()); std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (m_in));
float *ifft_out = new_array_float (out.size()); ifft();
std::copy (m_out, m_out + out.size(), &out[0]);
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
ifft (ifft_in, ifft_out);
std::copy (ifft_out, ifft_out + out.size(), &out[0]);
free_array_float (ifft_out);
free_array_float (ifft_in);
return out; return out;
} }
...@@ -96,18 +79,10 @@ FFTProcessor::fft (const vector<float>& in) ...@@ -96,18 +79,10 @@ FFTProcessor::fft (const vector<float>& in)
{ {
vector<complex<float>> out (in.size() / 2 + 1); vector<complex<float>> out (in.size() / 2 + 1);
/* ensure memory is SSE-aligned (or other vectorized stuff) */ /* complex<float> vector and m_out have the same layout in memory */
float *fft_in = new_array_float (in.size()); std::copy (in.begin(), in.end(), m_in);
float *fft_out = new_array_float (in.size()); fft();
std::copy (m_out, m_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
std::copy (in.begin(), in.end(), fft_in);
fft (fft_in, fft_out);
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
free_array_float (fft_out);
free_array_float (fft_in);
return out; return out;
} }
...@@ -26,20 +26,21 @@ class FFTProcessor ...@@ -26,20 +26,21 @@ class FFTProcessor
{ {
fftwf_plan plan_fft; fftwf_plan plan_fft;
fftwf_plan plan_ifft; fftwf_plan plan_ifft;
float *m_in = nullptr;
float *m_out = nullptr;
public: public:
FFTProcessor (size_t N); FFTProcessor (size_t N);
~FFTProcessor();
/* low level (fast) */ /* low level (fast) */
void fft (float *in, float *out); void fft();
void ifft (float *in, float *out); void ifft();
float *in() { return m_in; }
float *out() { return m_out; };
/* high level (convenient) */ /* high level (convenient) */
std::vector<std::complex<float>> fft (const std::vector<float>& in); std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in); std::vector<float> ifft (const std::vector<std::complex<float>>& in);
}; };
float *new_array_float (size_t N);
void free_array_float (float *f);
#endif /* AUDIOWMARK_FFT_HH */ #endif /* AUDIOWMARK_FFT_HH */
...@@ -115,15 +115,6 @@ FFTAnalyzer::FFTAnalyzer (int n_channels) : ...@@ -115,15 +115,6 @@ FFTAnalyzer::FFTAnalyzer (int n_channels) :
m_window[i] *= 2.0 / window_weight; m_window[i] *= 2.0 / window_weight;
} }
/* allocate properly aligned buffers for SIMD */
m_frame = new_array_float (Params::frame_size);
m_frame_fft = new_array_float (Params::frame_size);
}
FFTAnalyzer::~FFTAnalyzer()
{
free_array_float (m_frame);
free_array_float (m_frame_fft);
} }
vector<vector<complex<float>>> vector<vector<complex<float>>>
...@@ -131,6 +122,9 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index) ...@@ -131,6 +122,9 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
{ {
assert (samples.size() >= (Params::frame_size + start_index) * m_n_channels); assert (samples.size() >= (Params::frame_size + start_index) * m_n_channels);
float *frame = m_fft_processor.in();
float *frame_fft = m_fft_processor.out();
vector<vector<complex<float>>> fft_out; vector<vector<complex<float>>> fft_out;
for (int ch = 0; ch < m_n_channels; ch++) for (int ch = 0; ch < m_n_channels; ch++)
{ {
...@@ -140,14 +134,14 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index) ...@@ -140,14 +134,14 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
/* deinterleave frame data and apply window */ /* deinterleave frame data and apply window */
for (size_t x = 0; x < Params::frame_size; x++) for (size_t x = 0; x < Params::frame_size; x++)
{ {
m_frame[x] = samples[pos] * m_window[x]; frame[x] = samples[pos] * m_window[x];
pos += m_n_channels; pos += m_n_channels;
} }
/* FFT transform */ /* FFT transform */
m_fft_processor.fft (m_frame, m_frame_fft); m_fft_processor.fft();
/* complex<float> and frame_fft have the same layout in memory */ /* complex<float> and frame_fft have the same layout in memory */
const complex<float> *first = (complex<float> *) m_frame_fft; const complex<float> *first = (complex<float> *) frame_fft;
const complex<float> *last = first + Params::frame_size / 2 + 1; const complex<float> *last = first + Params::frame_size / 2 + 1;
fft_out.emplace_back (first, last); fft_out.emplace_back (first, last);
} }
......
...@@ -120,12 +120,9 @@ class FFTAnalyzer ...@@ -120,12 +120,9 @@ class FFTAnalyzer
{ {
int m_n_channels = 0; int m_n_channels = 0;
std::vector<float> m_window; std::vector<float> m_window;
float *m_frame = nullptr;
float *m_frame_fft = nullptr;
FFTProcessor m_fft_processor; FFTProcessor m_fft_processor;
public: public:
FFTAnalyzer (int n_channels); FFTAnalyzer (int n_channels);
~FFTAnalyzer();
std::vector<std::vector<std::complex<float>>> run_fft (const std::vector<float>& samples, size_t start_index); std::vector<std::vector<std::complex<float>>> run_fft (const std::vector<float>& samples, size_t start_index);
std::vector<std::vector<std::complex<float>>> fft_range (const std::vector<float>& samples, size_t start_index, size_t frame_count); std::vector<std::vector<std::complex<float>>> fft_range (const std::vector<float>& samples, size_t start_index, size_t frame_count);
......
...@@ -295,8 +295,8 @@ SpeedSync::prepare_mags() ...@@ -295,8 +295,8 @@ SpeedSync::prepare_mags()
FFTProcessor fft_processor (sub_frame_size); FFTProcessor fft_processor (sub_frame_size);
float *in = new_array_float (sub_frame_size); float *in = fft_processor.in();
float *out = new_array_float (sub_frame_size); float *out = fft_processor.out();
fft_sync_bits.clear(); fft_sync_bits.clear();
size_t pos = 0; size_t pos = 0;
...@@ -311,7 +311,7 @@ SpeedSync::prepare_mags() ...@@ -311,7 +311,7 @@ SpeedSync::prepare_mags()
{ {
in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i]; in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i];
} }
fft_processor.fft (in, out); fft_processor.fft();
for (int i = Params::min_band; i <= Params::max_band; i++) for (int i = Params::min_band; i <= Params::max_band; i++)
{ {
...@@ -341,9 +341,6 @@ SpeedSync::prepare_mags() ...@@ -341,9 +341,6 @@ SpeedSync::prepare_mags()
fft_sync_bits.push_back (mags); fft_sync_bits.push_back (mags);
pos += sub_sync_search_step; pos += sub_sync_search_step;
} }
free_array_float (in);
free_array_float (out);
} }
void void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment