Integrate non-local bit storage from reorder-basic branch.

- can be disabled using --linear to get the old behaviour - new option --seed to set random seed Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>

Integrate non-local bit storage from reorder-basic branch.
- can be disabled using --linear to get the old behaviour - new option --seed to set random seed Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>
0d48efee · Stefan Westerfeld · 90c4c7f9 · 0d48efee
Commit 0d48efee authored Dec 12, 2018 by Stefan Westerfeld
Hide whitespace changes
Inline Side-by-side

Showing with 297 additions and 39 deletions

audiowmark.cc src/audiowmark.cc +297 -39

No files found.
--- a/src/audiowmark.cc
+++ b/src/audiowmark.cc
@@ -25,6 +25,9 @@ namespace Params
  static int min_band        = 20;
  static double water_delta  = 0.015; // strength of the watermark
  static double pre_scale    = 0.95;  // rescale the signal to avoid clipping after watermark is added
+  static bool mix            = true;
+  static int block           = 32;    // block size for mix step (non-linear bit storage)
+  static unsigned int seed   = 0;
 }

 void
@@ -53,6 +56,7 @@ print_usage()
  printf ("  --frames-per-bit      number of frames per bit            [%d]\n",  Params::frames_per_bit);
  printf ("  --water-delta         set watermarking delta              [%.4f]\n", Params::water_delta);
  printf ("  --pre-scale           set scaling used for normalization  [%.3f]\n", Params::pre_scale);
+  printf ("  --linear              disable non-linear bit storage\n");
 }

 static bool
@@ -141,6 +145,14 @@ parse_options (int   *argc_p,
 	{
          Params::pre_scale = atof (opt_arg);
 	}
+      else if (check_arg (argc, argv, &i, "--linear"))
+	{
+          Params::mix = false;
+	}
+      else if (check_arg (argc, argv, &i, "--seed", &opt_arg))
+	{
+          Params::seed = atoi (opt_arg);
+	}
    }

  /* resort argc/argv */
@@ -215,14 +227,23 @@ get_frame (WavData& wav_data, int f, int ch)
  return result;
 }

+std::mt19937_64
+init_rng (uint64_t seed)
+{
+  const uint64_t  prime = 3126986573;
+
+  std::mt19937_64 rng;
+  rng.seed (seed + prime * Params::seed);
+
+  return rng;
+}
+
 void
 get_up_down (int f, vector<int>& up, vector<int>& down)
 {
  vector<int> used (Params::frame_size / 2);
-  std::mt19937_64 rng;

-  // use per frame random seed, may want to have cryptographically secure algorithm
-  rng.seed (f);
+  std::mt19937_64 rng = init_rng (f); // use per frame random seed, may want to have cryptographically secure algorithm

  auto choose_bands = [&used, &rng] (vector<int>& bands) {
    while (bands.size() < Params::bands_per_frame)
@@ -239,6 +260,19 @@ get_up_down (int f, vector<int>& up, vector<int>& down)
  choose_bands (down);
 }

+template<class T> void
+gen_shuffle (vector<T>& result, int seed)
+{
+  std::mt19937_64 rng = init_rng (seed); // should use cryptographically secure generator, properly seeded
+
+  // Fisher–Yates shuffle
+  for (size_t i = 0; i < result.size() - 1; i++)
+    {
+      size_t j = i + rng() % (result.size() - i);
+      std::swap (result[i], result[j]);
+    }
+}
+
 static unsigned char
 from_hex_nibble (char c)
 {
@@ -378,34 +412,160 @@ add_watermark (const string& infile, const string& outfile, const string& bits)

  printf ("loading %s\n", infile.c_str());

-  WavData wav_data;
-  if (!wav_data.load (infile))
+  WavData in_wav_data;
+  if (!in_wav_data.load (infile))
    {
-      fprintf (stderr, "audiowmark: error loading %s: %s\n", infile.c_str(), wav_data.error_blurb());
+      fprintf (stderr, "audiowmark: error loading %s: %s\n", infile.c_str(), in_wav_data.error_blurb());
      return 1;
    }
+
+  /*
+   * to keep the watermarking code simpler, we pad the wave data with zeros
+   * to avoid processing a partly filled block
+   */
+  vector<float> in_signal (in_wav_data.samples());
+  while (in_signal.size() % (in_wav_data.n_channels() * Params::frame_size * Params::block * Params::frames_per_bit))
+    in_signal.push_back (0);
+
+  WavData wav_data (in_signal, in_wav_data.n_channels(), in_wav_data.mix_freq(), in_wav_data.bit_depth());
+
+  /* we have extra space for the padded wave data -> truncated before save */
  vector<float> out_signal (wav_data.n_values());
  printf ("channels: %d, samples: %zd, mix_freq: %f\n", wav_data.n_channels(), wav_data.n_values(), wav_data.mix_freq());

-  for (int f = 0; f < frame_count (wav_data); f++)
+  if (Params::mix)
    {
-      for (int ch = 0; ch < wav_data.n_channels(); ch++)
+      vector<vector<complex<float>>> fft_out;
+      vector<vector<complex<float>>> fft_delta_spect;
+      for (int f = 0; f < frame_count (wav_data); f++)
        {
-          vector<float> frame = get_frame (wav_data, f, ch);
-          vector<float> new_frame;
+          for (int ch = 0; ch < wav_data.n_channels(); ch++)
+            {
+              vector<float> frame = get_frame (wav_data, f, ch);

-          if (frame.size() == Params::frame_size)
+              /* windowing */
+              double window_weight = 0;
+              for (size_t i = 0; i < frame.size(); i++)
+                {
+                  const double fsize_2 = frame.size() / 2.0;
+                  // const double win =  window_cos ((i - fsize_2) / fsize_2);
+                  const double win = window_hamming ((i - fsize_2) / fsize_2);
+                  //const double win = 1;
+                  frame[i] *= win;
+                  window_weight += win;
+                }
+
+              /* to get normalized fft output corrected by window weight */
+              for (size_t i = 0; i < frame.size(); i++)
+                frame[i] *= 2.0 / window_weight;
+
+              /* FFT transform */
+              fft_out.push_back (fft (frame));
+              fft_delta_spect.push_back (vector<complex<float>> (fft_out.back().size()));
+            }
+        }
+      for (int block = 0; block < frame_count (wav_data) / (Params::block * Params::frames_per_bit); block++)
+        {
+          struct MixEntry {
+            int  frame;
+            int  up;
+            int  down;
+          };
+          vector<MixEntry> mix_entries;
+          for (int f = 0; f < Params::block * Params::frames_per_bit; f++)
            {
-              const int data_bit = bitvec[(f / Params::frames_per_bit) % bitvec.size()];
+              vector<int> up;
+              vector<int> down;
+              get_up_down (f, up, down);

-              new_frame = watermark_frame (frame, f, data_bit);
+              assert (up.size() == down.size());
+              for (size_t i = 0; i < up.size(); i++)
+                mix_entries.push_back ({ f, up[i], down[i] });
            }
-          else
+          gen_shuffle (mix_entries, /* seed */ block);
+
+          const int block_start = block * Params::block * Params::frames_per_bit;
+          for (int f = 0; f < Params::block * Params::frames_per_bit; f++)
            {
-              new_frame = frame;
+              for (int ch = 0; ch < wav_data.n_channels(); ch++)
+                {
+                  for (int frame_b = 0; frame_b < Params::bands_per_frame; frame_b++)
+                    {
+                      int b = f * Params::bands_per_frame + frame_b;
+
+                      const int data_bit = bitvec[((block_start + f) / Params::frames_per_bit) % bitvec.size()];
+                      const double  data_bit_sign = data_bit > 0 ? 1 : -1;
+
+                      const int u = mix_entries[b].up;
+                      const int index = (block_start + mix_entries[b].frame) * wav_data.n_channels() + ch;
+                      {
+                        const float mag_factor = pow (abs (fft_out[index][u]), -Params::water_delta * data_bit_sign);
+
+                        fft_delta_spect[index][u] = fft_out[index][u] * (mag_factor - 1);
+                      }
+                      const int d = mix_entries[b].down;
+                      {
+                        const float mag_factor = pow (abs (fft_out[index][d]), Params::water_delta * data_bit_sign);
+
+                        fft_delta_spect[index][d] = fft_out[index][d] * (mag_factor - 1);
+                      }
+                    }
+                }
+            }
+        }
+
+      for (int f = 0; f < frame_count (wav_data); f++)
+        {
+          for (int ch = 0; ch < wav_data.n_channels(); ch++)
+            {
+              /* add watermark to output frame */
+              vector<float> fft_delta_out = ifft (fft_delta_spect[f * wav_data.n_channels() + ch]);
+
+              vector<float> input_frame = get_frame (wav_data, f, ch);
+              vector<float> synth_window (Params::frame_size);
+              for (size_t i = 0; i < Params::frame_size; i++)
+                {
+                  const double threshold = 0.2;
+
+                  // triangular basic window
+                  const double tri = min (1.0 - fabs (double (2 * i)/Params::frame_size - 1.0), threshold) / threshold;
+
+                  // cosine
+                  synth_window[i] = (cos (tri*M_PI+M_PI)+1) * 0.5;
+                }
+
+              vector<float> new_frame = input_frame;
+              for (size_t i = 0; i < new_frame.size(); i++)
+                new_frame[i] += fft_delta_out[i] * synth_window[i];
+
+              /* modify out signal */
+              for (size_t i = 0; i < new_frame.size(); i++)
+                out_signal[(f * Params::frame_size + i) * wav_data.n_channels() + ch] = new_frame[i] * Params::pre_scale;
+            }
+        }
+    }
+  else
+    {
+      for (int f = 0; f < frame_count (wav_data); f++)
+        {
+          for (int ch = 0; ch < wav_data.n_channels(); ch++)
+            {
+              vector<float> frame = get_frame (wav_data, f, ch);
+              vector<float> new_frame;
+
+              if (frame.size() == Params::frame_size)
+                {
+                  const int data_bit = bitvec[(f / Params::frames_per_bit) % bitvec.size()];
+
+                  new_frame = watermark_frame (frame, f, data_bit);
+                }
+              else
+                {
+                  new_frame = frame;
+                }
+              for (size_t i = 0; i < new_frame.size(); i++)
+                out_signal[(f * Params::frame_size + i) * wav_data.n_channels() + ch] = new_frame[i] * Params::pre_scale;
            }
-          for (size_t i = 0; i < new_frame.size(); i++)
-            out_signal[(f * Params::frame_size + i) * wav_data.n_channels() + ch] = new_frame[i] * Params::pre_scale;
        }
    }

@@ -419,6 +579,8 @@ add_watermark (const string& infile, const string& outfile, const string& bits)
        }
    }

+  out_signal.resize (in_wav_data.n_values()); /* undo zero padding after load */
+
  WavData out_wav_data (out_signal, wav_data.n_channels(), wav_data.mix_freq(), wav_data.bit_depth());
  if (!out_wav_data.save (outfile))
    {
@@ -431,22 +593,30 @@ add_watermark (const string& infile, const string& outfile, const string& bits)
 int
 get_watermark (const string& infile, const string& orig_pattern)
 {
-  WavData wav_data;
-  if (!wav_data.load (infile))
+  WavData in_wav_data;
+  if (!in_wav_data.load (infile))
    {
-      fprintf (stderr, "audiowmark: error loading %s: %s\n", infile.c_str(), wav_data.error_blurb());
+      fprintf (stderr, "audiowmark: error loading %s: %s\n", infile.c_str(), in_wav_data.error_blurb());
      return 1;
    }
  vector<int> bit_vec;

-  double umag = 0, dmag = 0;
-  for (int f = 0; f < frame_count (wav_data); f++)
+  // to keep the watermark detection code simpler, we truncate samples to avoid partial filled blocks
+  vector<float> in_signal (in_wav_data.samples());
+  while (in_signal.size() % (in_wav_data.n_channels() * Params::frame_size * Params::block * Params::frames_per_bit))
+    in_signal.pop_back();
+
+  WavData wav_data (in_signal, in_wav_data.n_channels(), in_wav_data.mix_freq(), in_wav_data.bit_depth());
+
+  if (Params::mix)
    {
-      for (int ch = 0; ch < wav_data.n_channels(); ch++)
+      vector<vector<complex<float>>> fft_out;
+      for (int f = 0; f < frame_count (wav_data); f++)
        {
-          vector<float> frame = get_frame (wav_data, f, ch);
-          if (frame.size() == Params::frame_size)
+          for (int ch = 0; ch < wav_data.n_channels(); ch++)
            {
+              vector<float> frame = get_frame (wav_data, f, ch);
+
              /* windowing */
              double window_weight = 0;
              for (size_t i = 0; i < frame.size(); i++)
@@ -464,28 +634,109 @@ get_watermark (const string& infile, const string& orig_pattern)
                frame[i] *= 2.0 / window_weight;

              /* FFT transform */
-              vector<complex<float>> fft_out = fft (frame);
-
+              fft_out.push_back (fft (frame));
+            }
+        }
+      for (int block = 0; block < frame_count (wav_data) / (Params::block * Params::frames_per_bit); block++)
+        {
+          struct MixEntry {
+            int  frame;
+            int  up;
+            int  down;
+          };
+          vector<MixEntry> mix_entries;
+          for (int f = 0; f < Params::block * Params::frames_per_bit; f++)
+            {
              vector<int> up;
              vector<int> down;
              get_up_down (f, up, down);

-              const double min_db = -96;
-              for (auto u : up)
+              assert (up.size() == down.size());
+              for (size_t i = 0; i < up.size(); i++)
+                mix_entries.push_back ({ f, up[i], down[i] });
+            }
+          gen_shuffle (mix_entries, /* seed */ block);
+
+          double umag = 0, dmag = 0;
+          for (int f = 0; f < Params::block * Params::frames_per_bit; f++)
+            {
+              for (int ch = 0; ch < wav_data.n_channels(); ch++)
                {
-                  umag += db_from_factor (abs (fft_out[u]), min_db);
+                  for (int frame_b = 0; frame_b < Params::bands_per_frame; frame_b++)
+                    {
+                      int b = f * Params::bands_per_frame + frame_b;
+                      const double min_db = -96;
+
+                      const int index = (block * (Params::block * Params::frames_per_bit) + mix_entries[b].frame) * wav_data.n_channels() + ch;
+                      const int u = mix_entries[b].up;
+                      {
+                        umag += db_from_factor (abs (fft_out[index][u]), min_db);
+                      }
+                      const int d = mix_entries[b].down;
+                      {
+                        dmag += db_from_factor (abs (fft_out[index][d]), min_db);
+                      }
+                    }
                }
-              for (auto d : down)
+              if ((f % Params::frames_per_bit) == (Params::frames_per_bit - 1))
                {
-                  dmag += db_from_factor (abs (fft_out[d]), min_db);
+                  bit_vec.push_back ((umag > dmag) ? 1 : 0);
+                  umag = 0;
+                  dmag = 0;
                }
            }
        }
-      if ((f % Params::frames_per_bit) == (Params::frames_per_bit - 1))
+    }
+  else
+    {
+      double umag = 0, dmag = 0;
+      for (int f = 0; f < frame_count (wav_data); f++)
        {
-          bit_vec.push_back ((umag > dmag) ? 1 : 0);
-          umag = 0;
-          dmag = 0;
+          for (int ch = 0; ch < wav_data.n_channels(); ch++)
+            {
+              vector<float> frame = get_frame (wav_data, f, ch);
+              if (frame.size() == Params::frame_size)
+                {
+                  /* windowing */
+                  double window_weight = 0;
+                  for (size_t i = 0; i < frame.size(); i++)
+                    {
+                      const double fsize_2 = frame.size() / 2.0;
+                      // const double win =  window_cos ((i - fsize_2) / fsize_2);
+                      const double win = window_hamming ((i - fsize_2) / fsize_2);
+                      //const double win = 1;
+                      frame[i] *= win;
+                      window_weight += win;
+                    }
+
+                  /* to get normalized fft output corrected by window weight */
+                  for (size_t i = 0; i < frame.size(); i++)
+                    frame[i] *= 2.0 / window_weight;
+
+                  /* FFT transform */
+                  vector<complex<float>> fft_out = fft (frame);
+
+                  vector<int> up;
+                  vector<int> down;
+                  get_up_down (f, up, down);
+
+                  const double min_db = -96;
+                  for (auto u : up)
+                    {
+                      umag += db_from_factor (abs (fft_out[u]), min_db);
+                    }
+                  for (auto d : down)
+                    {
+                      dmag += db_from_factor (abs (fft_out[d]), min_db);
+                    }
+                }
+            }
+          if ((f % Params::frames_per_bit) == (Params::frames_per_bit - 1))
+            {
+              bit_vec.push_back ((umag > dmag) ? 1 : 0);
+              umag = 0;
+              dmag = 0;
+            }
        }
    }
  printf ("pattern %s\n", bit_vec_to_str (bit_vec).c_str());
@@ -515,13 +766,20 @@ get_watermark_delta (const string& origfile, const string& infile, const string&
      return 1;
    }

-  WavData wav_data;
-  if (!wav_data.load (infile))
+  WavData in_wav_data;
+  if (!in_wav_data.load (infile))
    {
-      fprintf (stderr, "audiowmark: error loading %s: %s\n", infile.c_str(), wav_data.error_blurb());
+      fprintf (stderr, "audiowmark: error loading %s: %s\n", infile.c_str(), in_wav_data.error_blurb());
      return 1;
    }

+  // to keep the watermark detection code simpler, we truncate samples to avoid partial filled blocks
+  vector<float> in_signal (in_wav_data.samples());
+  while (in_signal.size() % (in_wav_data.n_channels() * Params::frame_size * Params::block * Params::frames_per_bit))
+    in_signal.pop_back();
+
+  WavData wav_data (in_signal, in_wav_data.n_channels(), in_wav_data.mix_freq(), in_wav_data.bit_depth());
+
  vector<int> bit_vec;
  double error0 = 0;
  double error1 = 0;