Commit 1e1a673c authored by Stefan Westerfeld's avatar Stefan Westerfeld

Merge branch 'mp3'

parents 7181bb40 cbf3df3c
......@@ -9,6 +9,7 @@ RUN apt-get install -y libtool
RUN apt-get install -y autoconf-archive
RUN apt-get install -y libgcrypt20-dev
RUN apt-get install -y libzita-resampler-dev
RUN apt-get install -y libmpg123-dev
ADD . /audiowmark
WORKDIR /audiowmark
......
......@@ -8,15 +8,16 @@ watermark in the output sound file. For human listeners, the files typically
sound the same.
However, the 128-bit message can be retrieved from the output sound file. Our
tests show, that even if the file is converted to mp3 or ogg (with bitrates
higher than 128 kbit/s), the watermark usually can be retrieved without
problems. The process of retrieving the message does not need the original
audio file (blind decoding).
tests show, that even if the file is converted to mp3 or ogg (with bitrate 128
kbit/s or higher), the watermark usually can be retrieved without problems. The
process of retrieving the message does not need the original audio file (blind
decoding).
Internally, audiowmark is using the patchwork algorithm to hide the data in the
spectrum of the audio file. Some pseoudo-randomly selected amplitudes of the
frequency bands of a 1024-value FFTs are increased or decreased slightly, which
can be detected later. The algorithm used here is inspired by
spectrum of the audio file. The signal is split into 1024 sample frames. For
each frame, some pseoudo-randomly selected amplitudes of the frequency bands of
a 1024-value FFTs are increased or decreased slightly, which can be detected
later. The algorithm used here is inspired by
Martin Steinebach: Digitale Wasserzeichen für Audiodaten.
Darmstadt University of Technology 2004, ISBN 3-8322-2507-2
......@@ -51,6 +52,26 @@ and can be used for the add/get commands as follows:
audiowmark add --key test.key in.wav out.wav 0123456789abcdef0011223344556677
audiowmark get --key test.key out.wav
== Watermark Strength
The watermark strength parameter affects how much the watermarking algorithm
modifies the input signal. A stronger watermark is more audible, but also more
robust against modifications. The default strength is 10. A watermark with that
strength is recoverable after mp3/ogg encoding with 128kbit/s or higher. In our
informal listening tests, this setting also has a very good subjective quality.
A higher strength (for instance 15) would be helpful for instance if robustness
against multiple conversions or conversions to low bit rates (i.e. 64kbit/s) is
desired.
A lower strength (for instance 6) makes the watermark less audible, but also
less robust. Strengths below 5 are not recommended. To set the strength, the
same value has to be passed during both, generation and retrieving the
watermark. Fractional strengths (like 7.5) are possible.
audiowmark add --strength 15 in.wav out.wav 0123456789abcdef0011223344556677
audiowmark get --strength 15 out.wav
== Dependencies
If you compile from source, audiowmark needs the follwing libraries:
......@@ -59,6 +80,7 @@ If you compile from source, audiowmark needs the follwing libraries:
* libsndfile
* libgcrypt
* libzita-resampler
* libmpg123
== Building fftw
......
......@@ -17,6 +17,16 @@ AC_DEFUN([AC_SNDFILE_REQUIREMENTS],
AC_SUBST(SNDFILE_LIBS)
])
dnl
dnl libmpg123
dnl
AC_DEFUN([AC_LIBMPG123_REQUIREMENTS],
[
PKG_CHECK_MODULES(LIBMPG123, [libmpg123])
AC_SUBST(LIBMPG123_CFLAGS)
AC_SUBST(LIBMPG123_LIBS)
])
dnl
dnl zita resampler
dnl
......@@ -57,6 +67,7 @@ AC_DEFUN([AC_FFTW_CHECK],
])
AC_SNDFILE_REQUIREMENTS
AC_LIBMPG123_REQUIREMENTS
AC_ZITA_REQUIREMENTS
AC_FFTW_CHECK
AM_PATH_LIBGCRYPT
......
......@@ -4,3 +4,5 @@
test/
audiowmark
testconvcode
testmp3
testrandom
bin_PROGRAMS = audiowmark
COMMON_SRC = utils.hh utils.cc convcode.hh convcode.cc random.hh random.cc
COMMON_SRC = utils.hh utils.cc convcode.hh convcode.cc random.hh random.cc mp3.cc mp3.hh wavdata.cc wavdata.hh
COMMON_LIBS = $(SNDFILE_LIBS) $(FFTW_LIBS) $(LIBGCRYPT_LIBS) $(LIBMPG123_LIBS)
audiowmark_SOURCES = audiowmark.cc wavdata.cc wavdata.hh fft.cc fft.hh $(COMMON_SRC)
audiowmark_LDFLAGS = $(SNDFILE_LIBS) $(FFTW_LIBS) $(LIBGCRYPT_LIBS)
audiowmark_SOURCES = audiowmark.cc fft.cc fft.hh $(COMMON_SRC)
audiowmark_LDFLAGS = $(COMMON_LIBS)
noinst_PROGRAMS = testconvcode testrandom
noinst_PROGRAMS = testconvcode testrandom testmp3
testconvcode_SOURCES = testconvcode.cc $(COMMON_SRC)
testconvcode_LDFLAGS = $(LIBGCRYPT_LIBS)
testconvcode_LDFLAGS = $(COMMON_LIBS)
testrandom_SOURCES = testrandom.cc $(COMMON_SRC)
testrandom_LDFLAGS = $(LIBGCRYPT_LIBS)
testrandom_LDFLAGS = $(COMMON_LIBS)
testmp3_SOURCES = testmp3.cc $(COMMON_SRC)
testmp3_LDFLAGS = $(COMMON_LIBS)
This diff is collapsed.
......@@ -58,8 +58,7 @@ do
exit 1
fi
lame -b $2 ${AWM_FILE}.wav ${AWM_FILE}.mp3 --quiet
rm ${AWM_FILE}.wav
ffmpeg -i ${AWM_FILE}.mp3 ${AWM_FILE}.wav -v quiet -nostdin
OUT_FILE=${AWM_FILE}.mp3
elif [ "x$TRANSFORM" == "xdouble-mp3" ]; then
if [ "x$2" == "x" ]; then
echo "need mp3 bitrate" >&2
......@@ -72,23 +71,22 @@ do
# second mp3 step
lame -b $2 ${AWM_FILE}.wav ${AWM_FILE}.mp3 --quiet
rm ${AWM_FILE}.wav
ffmpeg -i ${AWM_FILE}.mp3 ${AWM_FILE}.wav -v quiet -nostdin
OUT_FILE=${AWM_FILE}.mp3
elif [ "x$TRANSFORM" == "xogg" ]; then
if [ "x$2" == "x" ]; then
echo "need ogg bitrate" >&2
exit 1
fi
oggenc -b $2 ${AWM_FILE}.wav -o ${AWM_FILE}.ogg --quiet
oggdec ${AWM_FILE}.ogg -o ${AWM_FILE}.wav --quiet
OUT_FILE=${AWM_FILE}.ogg
elif [ "x$TRANSFORM" == "x" ]; then
:
OUT_FILE=${AWM_FILE}.wav
else
echo "unknown transform $TRANSFORM" >&2
exit 1
fi
# blind decoding
audiowmark cmp ${AWM_FILE}.wav $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
# decoding with original
# audiowmark cmp-delta "$i" t.wav $PATTERN $AWM_PARAMS --test-key $SEED
done
......
for delta in 0.030 0.020 0.015 0.010 0.005 0.003 0.002 0.001
do
#echo $delta $(AWM_PARAMS="--water-delta=$delta" ber-test.sh double-mp3 128) $(AWM_PARAMS="--water-delta=$delta" ber-test1.sh double-mp3 128)
#echo $delta $(AWM_PARAMS="--water-delta=$delta" peaq.sh)
echo $delta $(AWM_PARAMS="--water-delta=$delta" snr.sh)
done
#include "mp3.hh"
#include <mpg123.h>
#include <assert.h>
#include <stdio.h>
#include <vector>
using std::vector;
using std::string;
struct ScopedMHandle
{
mpg123_handle *mh = nullptr;
bool need_close = false;
~ScopedMHandle()
{
if (mh && need_close)
mpg123_close (mh);
if (mh)
mpg123_delete (mh);
}
};
void
mp3_init()
{
static bool mpg123_init_ok = false;
if (!mpg123_init_ok)
{
int err = mpg123_init();
if (err != MPG123_OK)
{
fprintf (stderr, "audiowmark: init mpg123 lib failed\n");
exit (1);
}
mpg123_init_ok = true;
}
}
/* there is no really simple way of detecting if something is an mp3
*
* so we try to decode a few frames; if that works without error the
* file is probably a valid mp3
*/
bool
mp3_detect (const string& filename)
{
int err = 0;
mp3_init();
mpg123_handle *mh = mpg123_new (NULL, &err);
if (err != MPG123_OK)
return false;
auto smh = ScopedMHandle { mh }; // cleanup on return
err = mpg123_param (mh, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
if (err != MPG123_OK)
return false;
err = mpg123_open (mh, filename.c_str());
if (err != MPG123_OK)
return false;
smh.need_close = true;
long rate;
int channels;
int encoding;
err = mpg123_getformat (mh, &rate, &channels, &encoding);
if (err != MPG123_OK)
return false;
size_t buffer_bytes = mpg123_outblock (mh);
unsigned char buffer[buffer_bytes];
for (size_t i = 0; i < 10; i++)
{
size_t done;
err = mpg123_read (mh, buffer, buffer_bytes, &done);
if (err == MPG123_DONE)
{
return true;
}
else if (err != MPG123_OK)
{
return false;
}
}
return true;
}
string
mp3_load (const string& filename, WavData& wav_data)
{
int err = 0;
mp3_init();
mpg123_handle *mh = mpg123_new (NULL, &err);
if (err != MPG123_OK)
return "mpg123_new failed";
auto smh = ScopedMHandle { mh }; // cleanup on return
err = mpg123_param (mh, MPG123_ADD_FLAGS, MPG123_QUIET, 0);
if (err != MPG123_OK)
return "setting quiet mode failed";
// allow arbitary amount of data for resync */
err = mpg123_param (mh, MPG123_RESYNC_LIMIT, -1, 0);
if (err != MPG123_OK)
return "setting resync limit parameter failed";
// force floating point output
{
const long *rates;
size_t rate_count;
mpg123_format_none (mh);
mpg123_rates (&rates, &rate_count);
for (size_t i = 0; i < rate_count; i++)
{
err = mpg123_format (mh, rates[i], MPG123_MONO|MPG123_STEREO, MPG123_ENC_FLOAT_32);
if (err != MPG123_OK)
return mpg123_strerror (mh);
}
}
err = mpg123_open (mh, filename.c_str());
if (err != MPG123_OK)
return mpg123_strerror (mh);
smh.need_close = true;
long rate;
int channels;
int encoding;
err = mpg123_getformat (mh, &rate, &channels, &encoding);
if (err != MPG123_OK)
return mpg123_strerror (mh);
/* ensure that the format will not change */
mpg123_format_none (mh);
mpg123_format (mh, rate, channels, encoding);
size_t buffer_bytes = mpg123_outblock (mh);
assert (buffer_bytes % sizeof (float) == 0);
vector<float> buffer (buffer_bytes / sizeof (float));
vector<float> samples;
while (true)
{
size_t done = 0;
err = mpg123_read (mh, reinterpret_cast<unsigned char *> (&buffer[0]), buffer_bytes, &done);
if (err == MPG123_OK)
{
const size_t n_values = done / sizeof (float);
samples.insert (samples.end(), buffer.begin(), buffer.begin() + n_values);
}
else if (err == MPG123_DONE)
{
wav_data = WavData (samples, channels, rate, 24);
return ""; /* success */
}
else if (err == MPG123_NEED_MORE)
{
// some mp3s have this error before reaching eof -> harmless
}
else
{
return mpg123_strerror (mh);
}
}
}
#ifndef AUDIOWMARK_MP3_HH
#define AUDIOWMARK_MP3_HH
#include <string>
#include "wavdata.hh"
bool mp3_detect (const std::string& filename);
std::string mp3_load (const std::string& filename, WavData& wav_data);
#endif /* AUDIOWMARK_MP3_HH */
......@@ -4,6 +4,5 @@ PATTERN=4e1243bd22c66e76c2ba9eddc1f91394
for i in test/T*
do
audiowmark add $i t.wav $PATTERN $AWM_PARAMS >/dev/null
echo $i $(audiowmark snr $i t.wav)
done | grep snr | awk '{s += $3; n++} END { print s/n; }'
echo $i $(audiowmark add $i t.wav $PATTERN $AWM_PARAMS --snr | grep SNR)
done | awk '{s += $3; n++} END { print s/n; }'
for strength in 30 20 15 10 5 3 2 1
do
echo $strength $(AWM_PARAMS="--strength=$strength" snr.sh)
done
#include "mp3.hh"
using std::string;
int
main (int argc, char **argv)
{
WavData wd;
if (argc >= 2)
{
if (mp3_detect (argv[1]))
{
string error = mp3_load (argv[1], wd);
if (error == "")
{
int sec = wd.n_values() / wd.n_channels() / wd.sample_rate();
printf ("loaded mp3 %s: %d:%02d\n", argv[1], sec / 60, sec % 60);
if (argc == 3)
{
wd.save (argv[2]);
printf ("saved wav: %s\n", argv[2]);
}
}
else
{
printf ("mp3 load %s failed: %s\n", argv[1], error.c_str());
return 1;
}
}
else
{
printf ("mp3 detect %s failed\n", argv[1]);
return 1;
}
}
}
......@@ -10,4 +10,11 @@ std::string bit_vec_to_str (const std::vector<int>& bit_vec);
std::vector<unsigned char> hex_str_to_vec (const std::string& str);
std::string vec_to_hex_str (const std::vector<unsigned char>& vec);
template<typename T>
inline const T&
bound (const T& min_value, const T& value, const T& max_value)
{
return std::min (std::max (value, min_value), max_value);
}
#endif /* AUDIOWMARK_UTILS_HH */
#include "wavdata.hh"
#include "mp3.hh"
#include "utils.hh"
#include <math.h>
#include <sndfile.h>
......@@ -6,13 +8,6 @@
using std::string;
using std::vector;
template<typename T>
inline const T&
bound (const T& min_value, const T& value, const T& max_value)
{
return std::min (std::max (value, min_value), max_value);
}
WavData::WavData()
{
}
......@@ -35,11 +30,27 @@ WavData::load (const string& filename)
int error = sf_error (sndfile);
if (error)
{
m_error_blurb = sf_strerror (sndfile);
if (sndfile)
sf_close (sndfile);
return false;
if (mp3_detect (filename))
{
string error = mp3_load (filename, *this);
if (error == "")
{
return true; // mp3 loaded successfully
}
else
{
m_error_blurb = "mp3 load error: " + error;
return false;
}
}
else
{
m_error_blurb = sf_strerror (sndfile);
if (sndfile)
sf_close (sndfile);
return false;
}
}
vector<int> isamples (sfinfo.frames * sfinfo.channels);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment