Commit ef8f2694 authored by Stefan Westerfeld's avatar Stefan Westerfeld

Manage SIMD allocated fft memory buffers in FFTProcessor.

Signed-off-by: Stefan Westerfeld's avatarStefan Westerfeld <stefan@space.twc.de>
parent 3d04d2dc
......@@ -26,48 +26,39 @@ using std::vector;
using std::complex;
using std::map;
float *
new_array_float (size_t N)
{
const size_t N_2 = N + 2; /* extra space for r2c extra complex output */
return (float *) fftwf_malloc (sizeof (float) * N_2);
}
void
free_array_float (float *f)
{
fftwf_free (f);
}
static std::mutex fft_planner_mutex;
FFTProcessor::FFTProcessor (size_t N)
{
std::lock_guard<std::mutex> lg (fft_planner_mutex);
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
const size_t N_2 = N + 2; /* extra space for r2c extra complex output */
plan_fft = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
m_in = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
m_out = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
// we add code for saving plans here, and use patient planning
plan_fft = fftwf_plan_dft_r2c_1d (N, m_in, (fftwf_complex *) m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) m_in, m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
free_array_float (plan_out);
free_array_float (plan_in);
// we could add code for saving plans here, and use patient planning
}
FFTProcessor::~FFTProcessor()
{
fftwf_free (m_in);
fftwf_free (m_out);
}
void
FFTProcessor::fft (float *in, float *out)
FFTProcessor::fft()
{
fftwf_execute_dft_r2c (plan_fft, in, (fftwf_complex *) out);
fftwf_execute_dft_r2c (plan_fft, m_in, (fftwf_complex *) m_out);
}
void
FFTProcessor::ifft (float *in, float *out)
FFTProcessor::ifft()
{
fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *)in, out);
fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *) m_in, m_out);
}
vector<float>
......@@ -75,18 +66,10 @@ FFTProcessor::ifft (const vector<complex<float>>& in)
{
vector<float> out ((in.size() - 1) * 2);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *ifft_in = new_array_float (out.size());
float *ifft_out = new_array_float (out.size());
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
ifft (ifft_in, ifft_out);
std::copy (ifft_out, ifft_out + out.size(), &out[0]);
free_array_float (ifft_out);
free_array_float (ifft_in);
/* complex<float> vector and m_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (m_in));
ifft();
std::copy (m_out, m_out + out.size(), &out[0]);
return out;
}
......@@ -96,18 +79,10 @@ FFTProcessor::fft (const vector<float>& in)
{
vector<complex<float>> out (in.size() / 2 + 1);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *fft_in = new_array_float (in.size());
float *fft_out = new_array_float (in.size());
std::copy (in.begin(), in.end(), fft_in);
fft (fft_in, fft_out);
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
free_array_float (fft_out);
free_array_float (fft_in);
/* complex<float> vector and m_out have the same layout in memory */
std::copy (in.begin(), in.end(), m_in);
fft();
std::copy (m_out, m_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
return out;
}
......@@ -26,20 +26,21 @@ class FFTProcessor
{
fftwf_plan plan_fft;
fftwf_plan plan_ifft;
float *m_in = nullptr;
float *m_out = nullptr;
public:
FFTProcessor (size_t N);
~FFTProcessor();
/* low level (fast) */
void fft (float *in, float *out);
void ifft (float *in, float *out);
void fft();
void ifft();
float *in() { return m_in; }
float *out() { return m_out; };
/* high level (convenient) */
std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in);
};
float *new_array_float (size_t N);
void free_array_float (float *f);
#endif /* AUDIOWMARK_FFT_HH */
......@@ -115,15 +115,6 @@ FFTAnalyzer::FFTAnalyzer (int n_channels) :
m_window[i] *= 2.0 / window_weight;
}
/* allocate properly aligned buffers for SIMD */
m_frame = new_array_float (Params::frame_size);
m_frame_fft = new_array_float (Params::frame_size);
}
FFTAnalyzer::~FFTAnalyzer()
{
free_array_float (m_frame);
free_array_float (m_frame_fft);
}
vector<vector<complex<float>>>
......@@ -131,6 +122,9 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
{
assert (samples.size() >= (Params::frame_size + start_index) * m_n_channels);
float *frame = m_fft_processor.in();
float *frame_fft = m_fft_processor.out();
vector<vector<complex<float>>> fft_out;
for (int ch = 0; ch < m_n_channels; ch++)
{
......@@ -140,14 +134,14 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
/* deinterleave frame data and apply window */
for (size_t x = 0; x < Params::frame_size; x++)
{
m_frame[x] = samples[pos] * m_window[x];
frame[x] = samples[pos] * m_window[x];
pos += m_n_channels;
}
/* FFT transform */
m_fft_processor.fft (m_frame, m_frame_fft);
m_fft_processor.fft();
/* complex<float> and frame_fft have the same layout in memory */
const complex<float> *first = (complex<float> *) m_frame_fft;
const complex<float> *first = (complex<float> *) frame_fft;
const complex<float> *last = first + Params::frame_size / 2 + 1;
fft_out.emplace_back (first, last);
}
......
......@@ -120,12 +120,9 @@ class FFTAnalyzer
{
int m_n_channels = 0;
std::vector<float> m_window;
float *m_frame = nullptr;
float *m_frame_fft = nullptr;
FFTProcessor m_fft_processor;
public:
FFTAnalyzer (int n_channels);
~FFTAnalyzer();
std::vector<std::vector<std::complex<float>>> run_fft (const std::vector<float>& samples, size_t start_index);
std::vector<std::vector<std::complex<float>>> fft_range (const std::vector<float>& samples, size_t start_index, size_t frame_count);
......
......@@ -295,8 +295,8 @@ SpeedSync::prepare_mags()
FFTProcessor fft_processor (sub_frame_size);
float *in = new_array_float (sub_frame_size);
float *out = new_array_float (sub_frame_size);
float *in = fft_processor.in();
float *out = fft_processor.out();
fft_sync_bits.clear();
size_t pos = 0;
......@@ -311,7 +311,7 @@ SpeedSync::prepare_mags()
{
in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i];
}
fft_processor.fft (in, out);
fft_processor.fft();
for (int i = Params::min_band; i <= Params::max_band; i++)
{
......@@ -341,9 +341,6 @@ SpeedSync::prepare_mags()
fft_sync_bits.push_back (mags);
pos += sub_sync_search_step;
}
free_array_float (in);
free_array_float (out);
}
void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment