Merge branch 'mp3'

1e1a673c · Stefan Westerfeld · 7181bb40 · cbf3df3c · 1e1a673c · 1e1a673c
Commit 1e1a673c authored Feb 10, 2019 by Stefan Westerfeld
15 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,6 +9,7 @@ RUN apt-get install -y libtool
 RUN apt-get install -y autoconf-archive
 RUN apt-get install -y libgcrypt20-dev
 RUN apt-get install -y libzita-resampler-dev
+RUN apt-get install -y libmpg123-dev

 ADD . /audiowmark
 WORKDIR /audiowmark

--- a/README.adoc
+++ b/README.adoc
@@ -8,15 +8,16 @@ watermark in the output sound file. For human listeners, the files typically
 sound the same.

 However, the 128-bit message can be retrieved from the output sound file. Our
-tests show, that even if the file is converted to mp3 or ogg (with bitrates
-higher than 128 kbit/s), the watermark usually can be retrieved without
-problems. The process of retrieving the message does not need the original
-audio file (blind decoding).
+tests show, that even if the file is converted to mp3 or ogg (with bitrate 128
+kbit/s or higher), the watermark usually can be retrieved without problems. The
+process of retrieving the message does not need the original audio file (blind
+decoding).

 Internally, audiowmark is using the patchwork algorithm to hide the data in the
-spectrum of the audio file. Some pseoudo-randomly selected amplitudes of the
-frequency bands of a 1024-value FFTs are increased or decreased slightly, which
-can be detected later. The algorithm used here is inspired by
+spectrum of the audio file. The signal is split into 1024 sample frames. For
+each frame, some pseoudo-randomly selected amplitudes of the frequency bands of
+a 1024-value FFTs are increased or decreased slightly, which can be detected
+later. The algorithm used here is inspired by

  Martin Steinebach: Digitale Wasserzeichen für Audiodaten.
  Darmstadt University of Technology 2004, ISBN 3-8322-2507-2
@@ -51,6 +52,26 @@ and can be used for the add/get commands as follows:
  audiowmark add --key test.key in.wav out.wav 0123456789abcdef0011223344556677
  audiowmark get --key test.key out.wav

+== Watermark Strength
+
+The watermark strength parameter affects how much the watermarking algorithm
+modifies the input signal. A stronger watermark is more audible, but also more
+robust against modifications. The default strength is 10. A watermark with that
+strength is recoverable after mp3/ogg encoding with 128kbit/s or higher. In our
+informal listening tests, this setting also has a very good subjective quality.
+
+A higher strength (for instance 15) would be helpful for instance if robustness
+against multiple conversions or conversions to low bit rates (i.e. 64kbit/s) is
+desired.
+
+A lower strength (for instance 6) makes the watermark less audible, but also
+less robust. Strengths below 5 are not recommended. To set the strength, the
+same value has to be passed during both, generation and retrieving the
+watermark. Fractional strengths (like 7.5) are possible.
+
+  audiowmark add --strength 15 in.wav out.wav 0123456789abcdef0011223344556677
+  audiowmark get --strength 15 out.wav
+
 == Dependencies

 If you compile from source, audiowmark needs the follwing libraries:
@@ -59,6 +80,7 @@ If you compile from source, audiowmark needs the follwing libraries:
 * libsndfile
 * libgcrypt
 * libzita-resampler
+* libmpg123

 == Building fftw


--- a/configure.ac
+++ b/configure.ac
@@ -17,6 +17,16 @@ AC_DEFUN([AC_SNDFILE_REQUIREMENTS],
    AC_SUBST(SNDFILE_LIBS)
 ])

+dnl
+dnl libmpg123
+dnl
+AC_DEFUN([AC_LIBMPG123_REQUIREMENTS],
+[
+    PKG_CHECK_MODULES(LIBMPG123, [libmpg123])
+    AC_SUBST(LIBMPG123_CFLAGS)
+    AC_SUBST(LIBMPG123_LIBS)
+])
+
 dnl
 dnl zita resampler
 dnl
@@ -57,6 +67,7 @@ AC_DEFUN([AC_FFTW_CHECK],
 ])

 AC_SNDFILE_REQUIREMENTS
+AC_LIBMPG123_REQUIREMENTS
 AC_ZITA_REQUIREMENTS
 AC_FFTW_CHECK
 AM_PATH_LIBGCRYPT

--- a/src/.gitignore
+++ b/src/.gitignore
@@ -4,3 +4,5 @@
 test/
 audiowmark
 testconvcode
+testmp3
+testrandom
--- a/src/Makefile.am
+++ b/src/Makefile.am
 bin_PROGRAMS = audiowmark

-COMMON_SRC = utils.hh utils.cc convcode.hh convcode.cc random.hh random.cc
+COMMON_SRC = utils.hh utils.cc convcode.hh convcode.cc random.hh random.cc mp3.cc mp3.hh wavdata.cc wavdata.hh
+COMMON_LIBS = $(SNDFILE_LIBS) $(FFTW_LIBS) $(LIBGCRYPT_LIBS) $(LIBMPG123_LIBS)

-audiowmark_SOURCES = audiowmark.cc wavdata.cc wavdata.hh fft.cc fft.hh $(COMMON_SRC)
-audiowmark_LDFLAGS = $(SNDFILE_LIBS) $(FFTW_LIBS) $(LIBGCRYPT_LIBS)
+audiowmark_SOURCES = audiowmark.cc fft.cc fft.hh $(COMMON_SRC)
+audiowmark_LDFLAGS = $(COMMON_LIBS)

-noinst_PROGRAMS = testconvcode testrandom
+noinst_PROGRAMS = testconvcode testrandom testmp3

 testconvcode_SOURCES = testconvcode.cc $(COMMON_SRC)
-testconvcode_LDFLAGS = $(LIBGCRYPT_LIBS)
+testconvcode_LDFLAGS = $(COMMON_LIBS)

 testrandom_SOURCES = testrandom.cc $(COMMON_SRC)
-testrandom_LDFLAGS = $(LIBGCRYPT_LIBS)
+testrandom_LDFLAGS = $(COMMON_LIBS)
+
+testmp3_SOURCES = testmp3.cc $(COMMON_SRC)
+testmp3_LDFLAGS = $(COMMON_LIBS)
--- a/src/audiowmark.cc
+++ b/src/audiowmark.cc
--- a/src/ber-test.sh
+++ b/src/ber-test.sh
@@ -58,8 +58,7 @@ do
        exit 1
      fi
      lame -b $2 ${AWM_FILE}.wav ${AWM_FILE}.mp3 --quiet
-      rm ${AWM_FILE}.wav
-      ffmpeg -i ${AWM_FILE}.mp3 ${AWM_FILE}.wav -v quiet -nostdin
+      OUT_FILE=${AWM_FILE}.mp3
    elif [ "x$TRANSFORM" == "xdouble-mp3" ]; then
      if [ "x$2" == "x" ]; then
        echo "need mp3 bitrate" >&2
@@ -72,23 +71,22 @@ do

      # second mp3 step
      lame -b $2 ${AWM_FILE}.wav ${AWM_FILE}.mp3 --quiet
-      rm ${AWM_FILE}.wav
-      ffmpeg -i ${AWM_FILE}.mp3 ${AWM_FILE}.wav -v quiet -nostdin
+      OUT_FILE=${AWM_FILE}.mp3
    elif [ "x$TRANSFORM" == "xogg" ]; then
      if [ "x$2" == "x" ]; then
        echo "need ogg bitrate" >&2
        exit 1
      fi
      oggenc -b $2 ${AWM_FILE}.wav -o ${AWM_FILE}.ogg --quiet
-      oggdec ${AWM_FILE}.ogg -o ${AWM_FILE}.wav --quiet
+      OUT_FILE=${AWM_FILE}.ogg
    elif [ "x$TRANSFORM" == "x" ]; then
-      :
+      OUT_FILE=${AWM_FILE}.wav
    else
      echo "unknown transform $TRANSFORM" >&2
      exit 1
    fi
    # blind decoding
-    audiowmark cmp ${AWM_FILE}.wav $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
+    audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
    # decoding with original
    # audiowmark cmp-delta "$i" t.wav $PATTERN $AWM_PARAMS --test-key $SEED
  done

--- a/src/delta2snr.sh
+++ b/src/delta2snr.sh
-for delta in 0.030 0.020 0.015 0.010 0.005 0.003 0.002 0.001
-do
-  #echo $delta $(AWM_PARAMS="--water-delta=$delta" ber-test.sh double-mp3 128) $(AWM_PARAMS="--water-delta=$delta" ber-test1.sh double-mp3 128)
-  #echo $delta $(AWM_PARAMS="--water-delta=$delta" peaq.sh)
-  echo $delta $(AWM_PARAMS="--water-delta=$delta" snr.sh)
-done
--- a/src/mp3.cc
+++ b/src/mp3.cc
+#include "mp3.hh"
+
+#include <mpg123.h>
+#include <assert.h>
+#include <stdio.h>
+#include <vector>
+
+using std::vector;
+using std::string;
+
+struct ScopedMHandle
+{
+  mpg123_handle *mh         = nullptr;
+  bool           need_close = false;
+
+  ~ScopedMHandle()
+  {
+    if (mh && need_close)
+      mpg123_close (mh);
+
+    if (mh)
+      mpg123_delete (mh);
+  }
+};
+
+void
+mp3_init()
+{
+  static bool mpg123_init_ok = false;
+  if (!mpg123_init_ok)
+    {
+      int err = mpg123_init();
+      if (err != MPG123_OK)
+        {
+          fprintf (stderr, "audiowmark: init mpg123 lib failed\n");
+          exit (1);
+        }
+      mpg123_init_ok = true;
+    }
+}
+
+/* there is no really simple way of detecting if something is an mp3
+ *
+ * so we try to decode a few frames; if that works without error the
+ * file is probably a valid mp3
+ */
+bool
+mp3_detect (const string& filename)
+{
+  int err = 0;
+
+  mp3_init();
+
+  mpg123_handle *mh = mpg123_new (NULL, &err);
+  if (err != MPG123_OK)
+    return false;
+
+  auto smh = ScopedMHandle { mh }; // cleanup on return
+
+  err = mpg123_param (mh, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
+  if (err != MPG123_OK)
+    return false;
+
+  err = mpg123_open (mh, filename.c_str());
+  if (err != MPG123_OK)
+    return false;
+
+  smh.need_close = true;
+
+  long rate;
+  int channels;
+  int encoding;
+  err = mpg123_getformat (mh, &rate, &channels, &encoding);
+  if (err != MPG123_OK)
+    return false;
+
+  size_t buffer_bytes = mpg123_outblock (mh);
+  unsigned char buffer[buffer_bytes];
+
+  for (size_t i = 0; i < 10; i++)
+    {
+      size_t done;
+      err = mpg123_read (mh, buffer, buffer_bytes, &done);
+      if (err == MPG123_DONE)
+        {
+          return true;
+        }
+      else if (err != MPG123_OK)
+        {
+          return false;
+        }
+    }
+  return true;
+}
+
+string
+mp3_load (const string& filename, WavData& wav_data)
+{
+  int err = 0;
+
+  mp3_init();
+
+  mpg123_handle *mh = mpg123_new (NULL, &err);
+  if (err != MPG123_OK)
+    return "mpg123_new failed";
+
+  auto smh = ScopedMHandle { mh }; // cleanup on return
+
+  err = mpg123_param (mh, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
+  if (err != MPG123_OK)
+    return "setting quiet mode failed";
+
+  // allow arbitary amount of data for resync */
+  err = mpg123_param (mh, MPG123_RESYNC_LIMIT, -1, 0);
+  if (err != MPG123_OK)
+    return "setting resync limit parameter failed";
+
+  // force floating point output
+  {
+    const long *rates;
+    size_t      rate_count;
+
+    mpg123_format_none (mh);
+    mpg123_rates (&rates, &rate_count);
+
+    for (size_t i = 0; i < rate_count; i++)
+      {
+        err = mpg123_format (mh, rates[i], MPG123_MONO|MPG123_STEREO, MPG123_ENC_FLOAT_32);
+        if (err != MPG123_OK)
+          return mpg123_strerror (mh);
+      }
+  }
+
+  err = mpg123_open (mh, filename.c_str());
+  if (err != MPG123_OK)
+    return mpg123_strerror (mh);
+
+  smh.need_close = true;
+
+  long rate;
+  int channels;
+  int encoding;
+
+  err = mpg123_getformat (mh, &rate, &channels, &encoding);
+  if (err != MPG123_OK)
+    return mpg123_strerror (mh);
+
+  /* ensure that the format will not change */
+  mpg123_format_none (mh);
+  mpg123_format (mh, rate, channels, encoding);
+
+  size_t buffer_bytes = mpg123_outblock (mh);
+  assert (buffer_bytes % sizeof (float) == 0);
+
+  vector<float> buffer (buffer_bytes / sizeof (float));
+  vector<float> samples;
+
+  while (true)
+    {
+      size_t done = 0;
+
+      err = mpg123_read (mh, reinterpret_cast<unsigned char *> (&buffer[0]), buffer_bytes, &done);
+      if (err == MPG123_OK)
+        {
+          const size_t n_values = done / sizeof (float);
+          samples.insert (samples.end(), buffer.begin(), buffer.begin() + n_values);
+        }
+      else if (err == MPG123_DONE)
+        {
+          wav_data = WavData (samples, channels, rate, 24);
+
+          return ""; /* success */
+        }
+      else if (err == MPG123_NEED_MORE)
+        {
+          // some mp3s have this error before reaching eof -> harmless
+        }
+      else
+        {
+          return mpg123_strerror (mh);
+        }
+    }
+}
--- a/src/mp3.hh
+++ b/src/mp3.hh
+#ifndef AUDIOWMARK_MP3_HH
+#define AUDIOWMARK_MP3_HH
+
+#include <string>
+
+#include "wavdata.hh"
+
+bool        mp3_detect (const std::string& filename);
+std::string mp3_load   (const std::string& filename, WavData& wav_data);
+
+#endif /* AUDIOWMARK_MP3_HH */
--- a/src/snr.sh
+++ b/src/snr.sh
@@ -4,6 +4,5 @@ PATTERN=4e1243bd22c66e76c2ba9eddc1f91394

 for i in test/T*
 do
-  audiowmark add $i t.wav $PATTERN $AWM_PARAMS >/dev/null
-  echo $i $(audiowmark snr $i t.wav)
-done | grep snr | awk '{s += $3; n++} END { print s/n; }'
+  echo $i $(audiowmark add $i t.wav $PATTERN $AWM_PARAMS --snr | grep SNR)
+done | awk '{s += $3; n++} END { print s/n; }'
--- a/src/strength2snr.sh
+++ b/src/strength2snr.sh
+for strength in 30 20 15 10 5 3 2 1
+do
+  echo $strength $(AWM_PARAMS="--strength=$strength" snr.sh)
+done
--- a/src/testmp3.cc
+++ b/src/testmp3.cc
+#include "mp3.hh"
+
+using std::string;
+
+int
+main (int argc, char **argv)
+{
+  WavData wd;
+  if (argc >= 2)
+    {
+      if (mp3_detect (argv[1]))
+        {
+          string error = mp3_load (argv[1], wd);
+          if (error == "")
+            {
+              int sec = wd.n_values() / wd.n_channels() / wd.sample_rate();
+
+              printf ("loaded mp3 %s: %d:%02d\n", argv[1], sec / 60, sec % 60);
+              if (argc == 3)
+                {
+                  wd.save (argv[2]);
+                  printf ("saved wav: %s\n", argv[2]);
+                }
+            }
+          else
+            {
+              printf ("mp3 load %s failed: %s\n", argv[1], error.c_str());
+              return 1;
+            }
+        }
+      else
+        {
+          printf ("mp3 detect %s failed\n", argv[1]);
+          return 1;
+        }
+    }
+}
--- a/src/utils.hh
+++ b/src/utils.hh
@@ -10,4 +10,11 @@ std::string      bit_vec_to_str (const std::vector<int>& bit_vec);
 std::vector<unsigned char> hex_str_to_vec (const std::string& str);
 std::string                vec_to_hex_str (const std::vector<unsigned char>& vec);

+template<typename T>
+inline const T&
+bound (const T& min_value, const T& value, const T& max_value)
+{
+  return std::min (std::max (value, min_value), max_value);
+}
+
 #endif /* AUDIOWMARK_UTILS_HH */
--- a/src/wavdata.cc
+++ b/src/wavdata.cc
 #include "wavdata.hh"
+#include "mp3.hh"
+#include "utils.hh"

 #include <math.h>
 #include <sndfile.h>
@@ -6,13 +8,6 @@
 using std::string;
 using std::vector;

-template<typename T>
-inline const T&
-bound (const T& min_value, const T& value, const T& max_value)
-{
-  return std::min (std::max (value, min_value), max_value);
-}
-
 WavData::WavData()
 {
 }
@@ -35,11 +30,27 @@ WavData::load (const string& filename)
  int error = sf_error (sndfile);
  if (error)
    {
-      m_error_blurb = sf_strerror (sndfile);
-      if (sndfile)
-        sf_close (sndfile);
-
-      return false;
+      if (mp3_detect (filename))
+        {
+          string error = mp3_load (filename, *this);
+          if (error == "")
+            {
+              return true;  // mp3 loaded successfully
+            }
+          else
+            {
+              m_error_blurb = "mp3 load error: " + error;
+              return false;
+            }
+        }
+      else
+        {
+          m_error_blurb = sf_strerror (sndfile);
+          if (sndfile)
+            sf_close (sndfile);
+
+          return false;
+        }
    }

  vector<int> isamples (sfinfo.frames * sfinfo.channels);