Implemented automatic volume normalization.

This replaces the old static --pre-scale factor with a factor computed based on the output signal/watermark. Unlike --pre-scale, this algorithm adapts automatically if --strength is modified. Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>

Implemented automatic volume normalization.
This replaces the old static --pre-scale factor with a factor computed based on the output signal/watermark. Unlike --pre-scale, this algorithm adapts automatically if --strength is modified. Signed-off-by: Stefan Westerfeld <stefan@space.twc.de>
67cc39a3 · Stefan Westerfeld · aacf8e3b · 67cc39a3 · 67cc39a3 · 67cc39a3
Commit 67cc39a3 authored Feb 08, 2019 by Stefan Westerfeld
Hide whitespace changes
Inline Side-by-side

Showing with 26 additions and 27 deletions

README.adoc README.adoc +0 -10

audiowmark.cc src/audiowmark.cc +18 -10

utils.hh src/utils.hh +7 -0

wavdata.cc src/wavdata.cc +1 -7

No files found.
--- a/README.adoc
+++ b/README.adoc
@@ -71,16 +71,6 @@ watermark. Fractional strengths (like 7.5) are possible.
  audiowmark add --strength 15 in.wav out.wav 0123456789abcdef0011223344556677
  audiowmark get --strength 15 out.wav

-Since the watermark modifies the input signal, it will make it louder at some
-positions. To avoid clipping, the input signal is scaled down during
-watermarking. For the strength values mentioned here everything should work,
-however for very strong watermarks you are likely to see a warning like this
-
-  audiowmark: warning: clipping occured in watermarked audio signal
-
-which can be fixed by modifying the scaling factor (which defaults to
-0.95) using `--pre-scale 0.9`.
-
 == Dependencies

 If you compile from source, audiowmark needs the follwing libraries:

--- a/src/audiowmark.cc
+++ b/src/audiowmark.cc
@@ -22,6 +22,7 @@ using std::string;
 using std::vector;
 using std::complex;
 using std::min;
+using std::max;

 namespace Params
 {
@@ -78,7 +79,6 @@ print_usage()
  printf ("\n");
  printf ("Global options:\n");
  printf ("  --strength            set watermark strength              [%.6g]\n", Params::water_delta * 1000);
-  printf ("  --pre-scale           set scaling used for normalization  [%.3f]\n", Params::pre_scale);
  printf ("  --linear              disable non-linear bit storage\n");
  printf ("  --key <file>          load watermarking key from file\n");
 }
@@ -746,19 +746,27 @@ add_watermark (const string& infile, const string& outfile, const string& bits)
  vector<float> samples = orig_wav_data.samples();
  out_signal.resize (samples.size());

+  float max_value = 1e-6;
  for (size_t i = 0; i < samples.size(); i++)
-    samples[i] = (samples[i] + out_signal[i]) * Params::pre_scale;
-
-  bool clipping_warning = false;
-  for (auto value : samples)
    {
-      if (fabs (value) >= 1.0 && !clipping_warning)
-        {
-          fprintf (stderr, "audiowmark: warning: clipping occured in watermarked audio signal\n");
-          clipping_warning = true;
-        }
+      /* Typically the original samples are already in range [-1;1]. However in
+       * some cases (mp3 loader), the samples are not fully normalized; in those
+       * cases, for volume normalization we treat them as-if they had been
+       * clipped already; final clipping will be done while saving.
+       */
+      const float x = bound<float> (-1, samples[i], 1);
+      const float value = fabsf (x + out_signal[i]);
+      if (value > max_value)
+        max_value = value;
    }
+
+  // scale (samples + watermark) down if necessary to avoid clipping
+  const float scale = min (1.0 / max_value, 1.0);
+  for (size_t i = 0; i < samples.size(); i++)
+    samples[i] = (samples[i] + out_signal[i]) * scale;
+
  printf ("Data Blocks:  %d\n", data_blocks);
+  printf ("Volume Norm:  %.3f (%.2f dB)\n", scale, db_from_factor (scale, -96));

  WavData out_wav_data (samples, orig_wav_data.n_channels(), orig_wav_data.sample_rate(), orig_wav_data.bit_depth());
  if (!out_wav_data.save (outfile))

--- a/src/utils.hh
+++ b/src/utils.hh
@@ -10,4 +10,11 @@ std::string      bit_vec_to_str (const std::vector<int>& bit_vec);
 std::vector<unsigned char> hex_str_to_vec (const std::string& str);
 std::string                vec_to_hex_str (const std::vector<unsigned char>& vec);

+template<typename T>
+inline const T&
+bound (const T& min_value, const T& value, const T& max_value)
+{
+  return std::min (std::max (value, min_value), max_value);
+}
+
 #endif /* AUDIOWMARK_UTILS_HH */
--- a/src/wavdata.cc
+++ b/src/wavdata.cc
 #include "wavdata.hh"
 #include "mp3.hh"
+#include "utils.hh"

 #include <math.h>
 #include <sndfile.h>
@@ -7,13 +8,6 @@
 using std::string;
 using std::vector;

-template<typename T>
-inline const T&
-bound (const T& min_value, const T& value, const T& max_value)
-{
-  return std::min (std::max (value, min_value), max_value);
-}
-
 WavData::WavData()
 {
 }