Manage SIMD allocated fft memory buffers in FFTProcessor.

Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>

Manage SIMD allocated fft memory buffers in FFTProcessor.
Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>
ef8f2694 · Stefan Westerfeld · 3d04d2dc · ef8f2694 · ef8f2694 · ef8f2694
Commit ef8f2694 authored Dec 02, 2020 by Stefan Westerfeld
Hide whitespace changes
Inline Side-by-side

Showing with 40 additions and 76 deletions

fft.cc src/fft.cc +24 -49

fft.hh src/fft.hh +7 -6

wmcommon.cc src/wmcommon.cc +6 -12

wmcommon.hh src/wmcommon.hh +0 -3

wmspeed.cc src/wmspeed.cc +3 -6

No files found.
--- a/src/fft.cc
+++ b/src/fft.cc
@@ -26,48 +26,39 @@ using std::vector;
 using std::complex;
 using std::map;

-float *
-new_array_float (size_t N)
-{
-  const size_t N_2 = N + 2; /* extra space for r2c extra complex output */
-
-  return (float *) fftwf_malloc (sizeof (float) * N_2);
-}
-
-void
-free_array_float (float *f)
-{
-  fftwf_free (f);
-}
-
 static std::mutex fft_planner_mutex;

 FFTProcessor::FFTProcessor (size_t N)
 {
  std::lock_guard<std::mutex> lg (fft_planner_mutex);

-  float *plan_in = new_array_float (N);
-  float *plan_out = new_array_float (N);
+  const size_t N_2 = N + 2; /* extra space for r2c extra complex output */

-  plan_fft = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
-  plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
+  m_in  = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
+  m_out = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));

-  // we add code for saving plans here, and use patient planning
+  plan_fft = fftwf_plan_dft_r2c_1d (N, m_in, (fftwf_complex *) m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
+  plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) m_in, m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);

-  free_array_float (plan_out);
-  free_array_float (plan_in);
+  // we could add code for saving plans here, and use patient planning
+}
+
+FFTProcessor::~FFTProcessor()
+{
+  fftwf_free (m_in);
+  fftwf_free (m_out);
 }

 void
-FFTProcessor::fft (float *in, float *out)
+FFTProcessor::fft()
 {
-  fftwf_execute_dft_r2c (plan_fft, in, (fftwf_complex *) out);
+  fftwf_execute_dft_r2c (plan_fft, m_in, (fftwf_complex *) m_out);
 }

 void
-FFTProcessor::ifft (float *in, float *out)
+FFTProcessor::ifft()
 {
-  fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *)in, out);
+  fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *) m_in, m_out);
 }

 vector<float>
@@ -75,18 +66,10 @@ FFTProcessor::ifft (const vector<complex<float>>& in)
 {
  vector<float> out ((in.size() - 1) * 2);

-  /* ensure memory is SSE-aligned (or other vectorized stuff) */
-  float *ifft_in = new_array_float (out.size());
-  float *ifft_out = new_array_float (out.size());
-
-  /* complex<float> vector and fft_out have the same layout in memory */
-  std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
-  ifft (ifft_in, ifft_out);
-
-  std::copy (ifft_out, ifft_out + out.size(), &out[0]);
-
-  free_array_float (ifft_out);
-  free_array_float (ifft_in);
+  /* complex<float> vector and m_out have the same layout in memory */
+  std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (m_in));
+  ifft();
+  std::copy (m_out, m_out + out.size(), &out[0]);

  return out;
 }
@@ -96,18 +79,10 @@ FFTProcessor::fft (const vector<float>& in)
 {
  vector<complex<float>> out (in.size() / 2 + 1);

-  /* ensure memory is SSE-aligned (or other vectorized stuff) */
-  float *fft_in = new_array_float (in.size());
-  float *fft_out = new_array_float (in.size());
-
-  std::copy (in.begin(), in.end(), fft_in);
-  fft (fft_in, fft_out);
-
-  /* complex<float> vector and fft_out have the same layout in memory */
-  std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
-
-  free_array_float (fft_out);
-  free_array_float (fft_in);
+  /* complex<float> vector and m_out have the same layout in memory */
+  std::copy (in.begin(), in.end(), m_in);
+  fft();
+  std::copy (m_out, m_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));

  return out;
 }
--- a/src/fft.hh
+++ b/src/fft.hh
@@ -26,20 +26,21 @@ class FFTProcessor
 {
  fftwf_plan plan_fft;
  fftwf_plan plan_ifft;
+  float *m_in = nullptr;
+  float *m_out = nullptr;
 public:
  FFTProcessor (size_t N);
+  ~FFTProcessor();

  /* low level (fast) */
-  void fft (float *in, float *out);
-  void ifft (float *in, float *out);
+  void   fft();
+  void   ifft();
+  float *in()  { return m_in; }
+  float *out() { return m_out; };

  /* high level (convenient) */
  std::vector<std::complex<float>> fft (const std::vector<float>& in);
  std::vector<float>               ifft (const std::vector<std::complex<float>>& in);
 };

-float *new_array_float (size_t N);
-void   free_array_float (float *f);
-
-
 #endif /* AUDIOWMARK_FFT_HH */
--- a/src/wmcommon.cc
+++ b/src/wmcommon.cc
@@ -115,15 +115,6 @@ FFTAnalyzer::FFTAnalyzer (int n_channels) :
      m_window[i] *= 2.0 / window_weight;
    }

-  /* allocate properly aligned buffers for SIMD */
-  m_frame  = new_array_float (Params::frame_size);
-  m_frame_fft = new_array_float (Params::frame_size);
-}
-
-FFTAnalyzer::~FFTAnalyzer()
-{
-  free_array_float (m_frame);
-  free_array_float (m_frame_fft);
 }

 vector<vector<complex<float>>>
@@ -131,6 +122,9 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
 {
  assert (samples.size() >= (Params::frame_size + start_index) * m_n_channels);

+  float *frame     = m_fft_processor.in();
+  float *frame_fft = m_fft_processor.out();
+
  vector<vector<complex<float>>> fft_out;
  for (int ch = 0; ch < m_n_channels; ch++)
    {
@@ -140,14 +134,14 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
      /* deinterleave frame data and apply window */
      for (size_t x = 0; x < Params::frame_size; x++)
        {
-          m_frame[x] = samples[pos] * m_window[x];
+          frame[x] = samples[pos] * m_window[x];
          pos += m_n_channels;
        }
      /* FFT transform */
-      m_fft_processor.fft (m_frame, m_frame_fft);
+      m_fft_processor.fft();

      /* complex<float> and frame_fft have the same layout in memory */
-      const complex<float> *first = (complex<float> *) m_frame_fft;
+      const complex<float> *first = (complex<float> *) frame_fft;
      const complex<float> *last  = first + Params::frame_size / 2 + 1;
      fft_out.emplace_back (first, last);
    }

--- a/src/wmcommon.hh
+++ b/src/wmcommon.hh
@@ -120,12 +120,9 @@ class FFTAnalyzer
 {
  int           m_n_channels = 0;
  std::vector<float> m_window;
-  float        *m_frame = nullptr;
-  float        *m_frame_fft = nullptr;
  FFTProcessor  m_fft_processor;
 public:
  FFTAnalyzer (int n_channels);
-  ~FFTAnalyzer();

  std::vector<std::vector<std::complex<float>>> run_fft (const std::vector<float>& samples, size_t start_index);
  std::vector<std::vector<std::complex<float>>> fft_range (const std::vector<float>& samples, size_t start_index, size_t frame_count);

--- a/src/wmspeed.cc
+++ b/src/wmspeed.cc
@@ -295,8 +295,8 @@ SpeedSync::prepare_mags()

  FFTProcessor fft_processor (sub_frame_size);

-  float *in = new_array_float (sub_frame_size);
-  float *out = new_array_float (sub_frame_size);
+  float *in = fft_processor.in();
+  float *out = fft_processor.out();

  fft_sync_bits.clear();
  size_t pos = 0;
@@ -311,7 +311,7 @@ SpeedSync::prepare_mags()
            {
              in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i];
            }
-          fft_processor.fft (in, out);
+          fft_processor.fft();

          for (int i = Params::min_band; i <= Params::max_band; i++)
            {
@@ -341,9 +341,6 @@ SpeedSync::prepare_mags()
      fft_sync_bits.push_back (mags);
      pos += sub_sync_search_step;
    }
-
-  free_array_float (in);
-  free_array_float (out);
 }

 void