Make fft usage thread safe (required for speed detection).

Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>

Make fft usage thread safe (required for speed detection).
Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>
55788b03 · Stefan Westerfeld · 7467a9b0 · 55788b03 · 55788b03 · 55788b03
Commit 55788b03 authored Dec 02, 2020 by Stefan Westerfeld
Showing with 70 additions and 57 deletions

fft.cc src/fft.cc +42 -47

fft.hh src/fft.hh +16 -5

wmadd.cc src/wmadd.cc +4 -2

wmcommon.cc src/wmcommon.cc +3 -2

wmcommon.hh src/wmcommon.hh +2 -0

wmspeed.cc src/wmspeed.cc +3 -1

No files found.
--- a/src/fft.cc
+++ b/src/fft.cc
@@ -20,6 +20,7 @@
 #include <fftw3.h>

 #include <map>
+#include <mutex>

 using std::vector;
 using std::complex;
@@ -39,63 +40,38 @@ free_array_float (float *f)
  fftwf_free (f);
 }

-void
-fftar_float (size_t N, float *in, float *out)
-{
-  static map<int, fftwf_plan> plan_for_size;
-
-  fftwf_plan& plan = plan_for_size[N];
-  if (!plan)
-    {
-      float *plan_in = new_array_float (N);
-      float *plan_out = new_array_float (N);
-      plan = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
-
-      // we add code for saving plans here, and use patient planning
-    }
-  fftwf_execute_dft_r2c (plan, in, (fftwf_complex *) out);
-}
+static std::mutex fft_planner_mutex;

-void
-fftsr_float (size_t N, float *in, float *out)
+FFTProcessor::FFTProcessor (size_t N)
 {
-  static map<int, fftwf_plan> plan_for_size;
-
-  fftwf_plan& plan = plan_for_size[N];
-  if (!plan)
-    {
-      float *plan_in = new_array_float (N);
-      float *plan_out = new_array_float (N);
-      plan = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
-
-      // we add code for saving plans here, and use patient planning
-    }
-  fftwf_execute_dft_c2r (plan, (fftwf_complex *)in, out);
-}
+  std::lock_guard<std::mutex> lg (fft_planner_mutex);

-vector<complex<float>>
-fft (const vector<float>& in)
-{
-  vector<complex<float>> out (in.size() / 2 + 1);
+  float *plan_in = new_array_float (N);
+  float *plan_out = new_array_float (N);

-  /* ensure memory is SSE-aligned (or other vectorized stuff) */
-  float *fft_in = new_array_float (in.size());
-  float *fft_out = new_array_float (in.size());
+  plan_fft = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
+  plan_ifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);

-  std::copy (in.begin(), in.end(), fft_in);
-  fftar_float (in.size(), fft_in, fft_out);
+  // we add code for saving plans here, and use patient planning

-  /* complex<float> vector and fft_out have the same layout in memory */
-  std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
+  free_array_float (plan_out);
+  free_array_float (plan_in);
+}

-  free_array_float (fft_out);
-  free_array_float (fft_in);
+void
+FFTProcessor::fft (float *in, float *out)
+{
+  fftwf_execute_dft_r2c (plan_fft, in, (fftwf_complex *) out);
+}

-  return out;
+void
+FFTProcessor::ifft (float *in, float *out)
+{
+  fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *)in, out);
 }

 vector<float>
-ifft (const vector<complex<float>>& in)
+FFTProcessor::ifft (const vector<complex<float>>& in)
 {
  vector<float> out ((in.size() - 1) * 2);

@@ -105,7 +81,7 @@ ifft (const vector<complex<float>>& in)

  /* complex<float> vector and fft_out have the same layout in memory */
  std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
-  fftsr_float (out.size(), ifft_in, ifft_out);
+  ifft (ifft_in, ifft_out);

  std::copy (ifft_out, ifft_out + out.size(), &out[0]);

@@ -115,4 +91,23 @@ ifft (const vector<complex<float>>& in)
  return out;
 }

+vector<complex<float>>
+FFTProcessor::fft (const vector<float>& in)
+{
+  vector<complex<float>> out (in.size() / 2 + 1);
+
+  /* ensure memory is SSE-aligned (or other vectorized stuff) */
+  float *fft_in = new_array_float (in.size());
+  float *fft_out = new_array_float (in.size());
+
+  std::copy (in.begin(), in.end(), fft_in);
+  fft (fft_in, fft_out);
+
+  /* complex<float> vector and fft_out have the same layout in memory */
+  std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
+
+  free_array_float (fft_out);
+  free_array_float (fft_in);

+  return out;
+}
--- a/src/fft.hh
+++ b/src/fft.hh
@@ -20,13 +20,24 @@

 #include <complex>
 #include <vector>
+#include <fftw3.h>

-/* high level api */
-std::vector<std::complex<float>> fft (const std::vector<float>& in);
-std::vector<float>               ifft (const std::vector<std::complex<float>>& in);
+class FFTProcessor
+{
+  fftwf_plan plan_fft;
+  fftwf_plan plan_ifft;
+public:
+  FFTProcessor (size_t N);
+
+  /* low level (fast) */
+  void fft (float *in, float *out);
+  void ifft (float *in, float *out);
+
+  /* high level (convenient) */
+  std::vector<std::complex<float>> fft (const std::vector<float>& in);
+  std::vector<float>               ifft (const std::vector<std::complex<float>>& in);
+};

-/* more efficient: low level api */
-void   fftar_float (size_t N, float *in, float *out);
 float *new_array_float (size_t N);
 void   free_array_float (float *f);


--- a/src/wmadd.cc
+++ b/src/wmadd.cc
@@ -169,6 +169,7 @@ class WatermarkSynth
  vector<float> window;
  vector<float> synth_samples;
  bool          first_frame = true;
+  FFTProcessor  fft_processor;

  void
  generate_window()
@@ -202,7 +203,8 @@ class WatermarkSynth
  }
 public:
  WatermarkSynth (int n_channels) :
-    n_channels (n_channels)
+    n_channels (n_channels),
+    fft_processor (Params::frame_size)
  {
    generate_window();
    synth_samples.resize (window.size() * n_channels);
@@ -218,7 +220,7 @@ public:
    for (int ch = 0; ch < n_channels; ch++)
      {
        /* mix watermark signal to output frame */
-        vector<float> fft_delta_out = ifft (fft_delta_spect[ch]);
+        vector<float> fft_delta_out = fft_processor.ifft (fft_delta_spect[ch]);

        for (int dframe = 0; dframe <= 2; dframe++)
          {

--- a/src/wmcommon.cc
+++ b/src/wmcommon.cc
@@ -92,7 +92,8 @@ db_from_factor (double factor, double min_dB)
 }

 FFTAnalyzer::FFTAnalyzer (int n_channels) :
-  m_n_channels (n_channels)
+  m_n_channels (n_channels),
+  m_fft_processor (Params::frame_size)
 {
  /* generate analysis window */
  m_window.resize (Params::frame_size);
@@ -143,7 +144,7 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
          pos += m_n_channels;
        }
      /* FFT transform */
-      fftar_float (Params::frame_size, m_frame, m_frame_fft);
+      m_fft_processor.fft (m_frame, m_frame_fft);

      /* complex<float> and frame_fft have the same layout in memory */
      const complex<float> *first = (complex<float> *) m_frame_fft;

--- a/src/wmcommon.hh
+++ b/src/wmcommon.hh
@@ -24,6 +24,7 @@
 #include "random.hh"
 #include "rawinputstream.hh"
 #include "wavdata.hh"
+#include "fft.hh"

 #include <assert.h>

@@ -121,6 +122,7 @@ class FFTAnalyzer
  std::vector<float> m_window;
  float        *m_frame = nullptr;
  float        *m_frame_fft = nullptr;
+  FFTProcessor  m_fft_processor;
 public:
  FFTAnalyzer (int n_channels);
  ~FFTAnalyzer();

--- a/src/wmspeed.cc
+++ b/src/wmspeed.cc
@@ -293,6 +293,8 @@ SpeedSync::prepare_mags()
  SyncFinder sync_finder;
  sync_bits = sync_finder.get_sync_bits (in_data, SyncFinder::Mode::BLOCK);

+  FFTProcessor fft_processor (sub_frame_size);
+
  float *in = new_array_float (sub_frame_size);
  float *out = new_array_float (sub_frame_size);

@@ -309,7 +311,7 @@ SpeedSync::prepare_mags()
            {
              in[i] = samples[ch + (pos + i) * in_data_sub.n_channels()] * window[i];
            }
-          fftar_float (sub_frame_size, in, out);
+          fft_processor.fft (in, out);

          for (int i = Params::min_band; i <= Params::max_band; i++)
            {