Commit 09e50c8d authored by Stefan Westerfeld's avatar Stefan Westerfeld

Merge branch 'speed-detection'

parents 13d51420 a5803f6b
Overview of Changes in audiowmark-0.6.0:
* implement speed detection/correction (--detect-speed)
Overview of Changes in audiowmark-0.5.0:
* support HTTP Live Streaming for audio/video streaming
......
......@@ -129,6 +129,9 @@ Use watermarking key from file <filename> (see <<key>>).
--strength <s>::
Set the watermarking strength (see <<strength>>).
--detect-speed::
Detect and correct replay speed difference (see <<speed>>).
[[key]]
== Watermark Key
......@@ -170,6 +173,49 @@ watermark. Fractional strengths (like 7.5) are possible.
audiowmark add --strength 15 in.wav out.wav 0123456789abcdef0011223344556677
audiowmark get --strength 15 out.wav
[[speed]]
== Speed Detection
If a watermarked audio signal is played back a little faster or slower than the
original speed, watermark detection will fail. This could happen by accident if
the digital watermark was converted to an analog signal and back and the
original speed was not (exactly) preserved. It could also be done intentionally
as an attack to avoid the watermark from being detected.
In order to be able to find the watermark in these cases, `audiowmark` can try
to figure out the speed difference to the original audio signal and correct the
replay speed before detecting the watermark. The search range for the replay
speed is approximately *[0.8..1.25]*.
Example: add a watermark to `in.wav` and increase the replay speed by 5% using
`sox`.
[subs=+quotes]
....
*$ audiowmark add in.wav out.wav 0123456789abcdef0011223344556677*
[...]
*$ sox out.wav out1.wav speed 1.05*
....
Without speed detection, we get no results. With speed detection the speed
difference is detected and corrected so we get results.
[subs=+quotes]
....
*$ audiowmark get out1.wav*
*$ audiowmark get out1.wav --detect-speed*
speed 1.049966
pattern 0:05 0123456789abcdef0011223344556677 1.209 0.147 A-SPEED
pattern 0:57 0123456789abcdef0011223344556677 1.301 0.143 B-SPEED
pattern 0:57 0123456789abcdef0011223344556677 1.255 0.145 AB-SPEED
pattern 1:49 0123456789abcdef0011223344556677 1.380 0.173 A-SPEED
pattern all 0123456789abcdef0011223344556677 1.297 0.130 SPEED
....
The speed detection algorithm is not enabled by default because it is
relatively slow (total cpu time required) and needs a lot of memory. However
the search is automatically run in parallel using many threads on systems with
many cpu cores. So on good hardware it makes sense to always enable this option
to be robust to replay speed attacks.
== Short Payload (experimental)
By default, the watermark will store a 128-bit message. In this mode, we
......
AC_INIT([audiowmark], [0.5.0])
AC_INIT([audiowmark], [0.6.0])
AC_CONFIG_SRCDIR([src/audiowmark.cc])
AC_CONFIG_AUX_DIR([build-aux])
AC_CONFIG_MACRO_DIR([m4])
......@@ -93,11 +93,15 @@ dnl -------------------------------------------------------------------------
# need c++14 mode
AX_CXX_COMPILE_STDCXX_14(ext)
# use -Wall
# use -Wall / -pthread if available
AC_LANG_PUSH([C++])
AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall"])
AX_CHECK_COMPILE_FLAG([-Wall], [CXXFLAGS="$CXXFLAGS -Wall"])
AX_CHECK_COMPILE_FLAG([-pthread], [CXXFLAGS="$CXXFLAGS -pthread"])
AC_LANG_POP([C++])
# Less cluttered build output
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
AC_CONFIG_FILES([Makefile src/Makefile])
AC_OUTPUT
......
......@@ -10,3 +10,5 @@ teststream
testlimiter
testhls
testmpegts
testshortcode
testthreadpool
......@@ -6,7 +6,8 @@ COMMON_SRC = utils.hh utils.cc convcode.hh convcode.cc random.hh random.cc wavda
sfoutputstream.cc sfoutputstream.hh rawinputstream.cc rawinputstream.hh rawoutputstream.cc rawoutputstream.hh \
rawconverter.cc rawconverter.hh mp3inputstream.cc mp3inputstream.hh wmcommon.cc wmcommon.hh fft.cc fft.hh \
limiter.cc limiter.hh shortcode.cc shortcode.hh mpegts.cc mpegts.hh hls.cc hls.hh audiobuffer.hh \
wmget.cc wmadd.cc
wmget.cc wmadd.cc syncfinder.cc syncfinder.hh wmspeed.cc wmspeed.hh threadpool.cc threadpool.hh \
resample.cc resample.hh
COMMON_LIBS = $(SNDFILE_LIBS) $(FFTW_LIBS) $(LIBGCRYPT_LIBS) $(LIBMPG123_LIBS) $(FFMPEG_LIBS)
AM_CXXFLAGS = $(SNDFILE_CFLAGS) $(FFTW_CFLAGS) $(LIBGCRYPT_CFLAGS) $(LIBMPG123_CFLAGS) $(FFMPEG_CFLAGS)
......@@ -14,7 +15,7 @@ AM_CXXFLAGS = $(SNDFILE_CFLAGS) $(FFTW_CFLAGS) $(LIBGCRYPT_CFLAGS) $(LIBMPG123_C
audiowmark_SOURCES = audiowmark.cc $(COMMON_SRC)
audiowmark_LDFLAGS = $(COMMON_LIBS)
noinst_PROGRAMS = testconvcode testrandom testmp3 teststream testlimiter testshortcode testmpegts
noinst_PROGRAMS = testconvcode testrandom testmp3 teststream testlimiter testshortcode testmpegts testthreadpool
testconvcode_SOURCES = testconvcode.cc $(COMMON_SRC)
testconvcode_LDFLAGS = $(COMMON_LIBS)
......@@ -37,6 +38,9 @@ testshortcode_LDFLAGS = $(COMMON_LIBS)
testmpegts_SOURCES = testmpegts.cc $(COMMON_SRC)
testmpegts_LDFLAGS = $(COMMON_LIBS)
testthreadpool_SOURCES = testthreadpool.cc $(COMMON_SRC)
testthreadpool_LDFLAGS = $(COMMON_LIBS)
if COND_WITH_FFMPEG
COMMON_SRC += hlsoutputstream.cc hlsoutputstream.hh
......
speed detection possible improvements:
- refine with 128 sync step instead of 256?
- use bigger initial region (50s) --detect-speed-harder
- use window hann instead of hamming for sync bits (or all bits)?
- remove lrint and QInterpolator?
- detect silence instead of total volume?
- split big region into two smaller ones?
- connected search on even smaller regions?
- reorder memory access in compare to be more cache efficient
- n-best search?
- soft sync?
possible improvements:
- dynamic bit strength
......
......@@ -61,6 +61,7 @@ print_usage()
printf (" --linear disable non-linear bit storage\n");
printf (" --short <bits> enable short payload mode\n");
printf (" --key <file> load watermarking key from file\n");
printf (" --detect-speed detect/correct replay speed difference (get/cmp)\n");
printf (" -q, --quiet disable information messages\n");
printf ("\n");
printf (" --input-format raw use raw stream as input\n");
......@@ -294,7 +295,7 @@ test_clip (const string& in_file, const string& out_file, int seed, int time_sec
{
// this is unbiased only if 2 * block_size + time_seconds is smaller than overall file length
const size_t values_per_block = (mark_sync_frame_count() + mark_data_frame_count()) * Params::frame_size * in_data.n_channels();
start_point = 2 * values_per_block * (double(rng()) / UINT64_MAX);
start_point = 2 * values_per_block * rng.random_double();
start_point /= in_data.n_channels();
end_point = start_point + time_seconds * in_data.sample_rate();
......@@ -316,6 +317,16 @@ test_clip (const string& in_file, const string& out_file, int seed, int time_sec
return 0;
}
int
test_speed (int seed)
{
Random rng (seed, /* there is no stream for this test */ Random::Stream::data_up_down);
double low = 0.85;
double high = 1.15;
printf ("%.6f\n", low + (rng() / double (UINT64_MAX)) * (high - low));
return 0;
}
int
gen_key (const string& outfile)
{
......@@ -565,6 +576,8 @@ parse_add_options (ArgParser& ap)
void
parse_get_options (ArgParser& ap)
{
float f;
ap.parse_opt ("--test-cut", Params::test_cut);
ap.parse_opt ("--test-truncate", Params::test_truncate);
......@@ -576,6 +589,14 @@ parse_get_options (ArgParser& ap)
{
Params::test_no_sync = true;
}
if (ap.parse_opt ("--detect-speed"))
{
Params::detect_speed = true;
}
if (ap.parse_opt ("--test-speed", f))
{
Params::test_speed = f;
}
}
int
......@@ -670,8 +691,17 @@ main (int argc, char **argv)
}
else if (ap.parse_cmd ("test-clip"))
{
parse_shared_options (ap);
if (ap.parse_args (4, args))
test_clip (args[0], args[1], atoi (args[2].c_str()), atoi (args[3].c_str()));
return test_clip (args[0], args[1], atoi (args[2].c_str()), atoi (args[3].c_str()));
}
else if (ap.parse_cmd ("test-speed"))
{
parse_shared_options (ap);
if (ap.parse_args (1, args))
return test_speed (atoi (args[0].c_str()));
}
error ("audiowmark: error parsing commandline args (use audiowmark -h)\n");
return 1;
......
......@@ -70,6 +70,26 @@ do
else
TEST_CUT_ARGS=""
fi
if [ "x$AWM_SPEED" != x ]; then
if [ "x$AWM_SPEED_PRE_MP3" != x ]; then
# first (optional) mp3 step: simulate quality loss before speed change
lame -b "$AWM_SPEED_PRE_MP3" ${AWM_FILE}.wav ${AWM_FILE}.mp3 --quiet
rm ${AWM_FILE}.wav
ffmpeg -i ${AWM_FILE}.mp3 ${AWM_FILE}.wav -v quiet -nostdin
fi
[ -z $SPEED_SEED ] && SPEED_SEED=0
SPEED=$(audiowmark test-speed $SPEED_SEED --test-key $SEED)
((SPEED_SEED++))
echo in_speed $SPEED
sox -D -V1 ${AWM_FILE}.wav ${AWM_FILE}.speed.wav speed $SPEED
mv ${AWM_FILE}.speed.wav ${AWM_FILE}.wav
TEST_SPEED_ARGS="--detect-speed --test-speed $SPEED"
else
TEST_SPEED_ARGS=""
fi
if [ "x$TRANSFORM" == "xmp3" ]; then
if [ "x$2" == "x" ]; then
echo "need mp3 bitrate" >&2
......@@ -108,18 +128,18 @@ do
for CLIP in $(seq $AWM_MULTI_CLIP)
do
audiowmark test-clip $OUT_FILE ${OUT_FILE}.clip.wav $((CLIP_SEED++)) $AWM_CLIP --test-key $SEED
audiowmark cmp ${OUT_FILE}.clip.wav $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
audiowmark cmp ${OUT_FILE}.clip.wav $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS $TEST_SPEED_ARGS
rm ${OUT_FILE}.clip.wav
echo
done
elif [ "x$AWM_REPORT" == "xtruncv" ]; then
for TRUNC in $AWM_TRUNCATE
do
audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS --test-truncate $TRUNC | sed "s/^/$TRUNC /g"
audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS $TEST_SPEED_ARGS --test-truncate $TRUNC | sed "s/^/$TRUNC /g"
echo
done
else
audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS $TEST_SPEED_ARGS
echo
fi
rm -f ${AWM_FILE}.wav $OUT_FILE # cleanup temp files
......
......@@ -20,99 +20,81 @@
#include <fftw3.h>
#include <map>
#include <mutex>
using std::vector;
using std::complex;
using std::map;
float *
new_array_float (size_t N)
static std::mutex fft_planner_mutex;
static std::map<size_t, fftwf_plan> fft_plan_map;
static std::map<size_t, fftwf_plan> ifft_plan_map;
FFTProcessor::FFTProcessor (size_t N)
{
std::lock_guard<std::mutex> lg (fft_planner_mutex);
const size_t N_2 = N + 2; /* extra space for r2c extra complex output */
return (float *) fftwf_malloc (sizeof (float) * N_2);
m_in = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
m_out = static_cast<float *> (fftwf_malloc (sizeof (float) * N_2));
/* plan if not done already */
fftwf_plan& pfft = fft_plan_map[N];
if (!pfft)
pfft = fftwf_plan_dft_r2c_1d (N, m_in, (fftwf_complex *) m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
fftwf_plan& pifft = ifft_plan_map[N];
if (!pifft)
pifft = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) m_in, m_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
/* store plan for size N as member variables */
plan_fft = pfft;
plan_ifft = pifft;
// we could add code for saving plans here, and use patient planning
}
void
free_array_float (float *f)
FFTProcessor::~FFTProcessor()
{
fftwf_free (f);
fftwf_free (m_in);
fftwf_free (m_out);
}
void
fftar_float (size_t N, float *in, float *out)
FFTProcessor::fft()
{
static map<int, fftwf_plan> plan_for_size;
fftwf_plan& plan = plan_for_size[N];
if (!plan)
{
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
plan = fftwf_plan_dft_r2c_1d (N, plan_in, (fftwf_complex *) plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
// we add code for saving plans here, and use patient planning
}
fftwf_execute_dft_r2c (plan, in, (fftwf_complex *) out);
fftwf_execute_dft_r2c (plan_fft, m_in, (fftwf_complex *) m_out);
}
void
fftsr_float (size_t N, float *in, float *out)
FFTProcessor::ifft()
{
static map<int, fftwf_plan> plan_for_size;
fftwf_plan& plan = plan_for_size[N];
if (!plan)
{
float *plan_in = new_array_float (N);
float *plan_out = new_array_float (N);
plan = fftwf_plan_dft_c2r_1d (N, (fftwf_complex *) plan_in, plan_out, FFTW_ESTIMATE | FFTW_PRESERVE_INPUT);
// we add code for saving plans here, and use patient planning
}
fftwf_execute_dft_c2r (plan, (fftwf_complex *)in, out);
fftwf_execute_dft_c2r (plan_ifft, (fftwf_complex *) m_in, m_out);
}
vector<complex<float>>
fft (const vector<float>& in)
vector<float>
FFTProcessor::ifft (const vector<complex<float>>& in)
{
vector<complex<float>> out (in.size() / 2 + 1);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *fft_in = new_array_float (in.size());
float *fft_out = new_array_float (in.size());
std::copy (in.begin(), in.end(), fft_in);
fftar_float (in.size(), fft_in, fft_out);
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (fft_out, fft_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
vector<float> out ((in.size() - 1) * 2);
free_array_float (fft_out);
free_array_float (fft_in);
/* complex<float> vector and m_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (m_in));
ifft();
std::copy (m_out, m_out + out.size(), &out[0]);
return out;
}
vector<float>
ifft (const vector<complex<float>>& in)
vector<complex<float>>
FFTProcessor::fft (const vector<float>& in)
{
vector<float> out ((in.size() - 1) * 2);
/* ensure memory is SSE-aligned (or other vectorized stuff) */
float *ifft_in = new_array_float (out.size());
float *ifft_out = new_array_float (out.size());
/* complex<float> vector and fft_out have the same layout in memory */
std::copy (in.begin(), in.end(), reinterpret_cast<complex<float> *> (ifft_in));
fftsr_float (out.size(), ifft_in, ifft_out);
std::copy (ifft_out, ifft_out + out.size(), &out[0]);
vector<complex<float>> out (in.size() / 2 + 1);
free_array_float (ifft_out);
free_array_float (ifft_in);
/* complex<float> vector and m_out have the same layout in memory */
std::copy (in.begin(), in.end(), m_in);
fft();
std::copy (m_out, m_out + out.size() * 2, reinterpret_cast<float *> (&out[0]));
return out;
}
......@@ -20,15 +20,27 @@
#include <complex>
#include <vector>
#include <fftw3.h>
/* high level api */
std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in);
class FFTProcessor
{
fftwf_plan plan_fft;
fftwf_plan plan_ifft;
float *m_in = nullptr;
float *m_out = nullptr;
public:
FFTProcessor (size_t N);
~FFTProcessor();
/* more efficient: low level api */
void fftar_float (size_t N, float *in, float *out);
float *new_array_float (size_t N);
void free_array_float (float *f);
/* low level (fast) */
void fft();
void ifft();
float *in() { return m_in; }
float *out() { return m_out; };
/* high level (convenient) */
std::vector<std::complex<float>> fft (const std::vector<float>& in);
std::vector<float> ifft (const std::vector<std::complex<float>>& in);
};
#endif /* AUDIOWMARK_FFT_HH */
#!/bin/bash
STRENGTHS="10 15"
CLIPS="15 30"
echo ".watermarking-speed"
echo '[frame="topbot",options="header",cols="<1,3*<"]'
echo '|=========================='
echo -n "| Strength "
for CLIP in $CLIPS
do
echo -n "| 0:$CLIP"
done
echo -n "| 2:45"
echo
for STRENGTH in $STRENGTHS
do
echo -n "| $STRENGTH "
for CLIP in $CLIPS
do
cat speed-$CLIP-$STRENGTH-* | awk '{bad += $1; n += $2} END {if (n==0) n=1;fer=100.0*bad/n; bold=fer>0?"*":" ";printf ("| %s%.2f%s", bold, fer, bold)}'
done
for FULL in speed-full-$STRENGTH-*
do
if [ "$(echo $FULL | tr -d a-z0-9)" == "---" ]; then
cat $FULL
fi
done | awk '{bad += $1; n += $2} END {if (n==0) n=1;fer=100.0*bad/n; bold=fer>0?"*":" ";printf ("| %s%.2f%s", bold, fer, bold)}'
echo
done
echo '|=========================='
#!/bin/bash
SEEDS=$(seq 10)
STRENGTHS="10 15"
CLIPS="15 30"
echo -n "all:"
for SEED in $SEEDS
do
for STRENGTH in $STRENGTHS
do
# clips
for CLIP in $CLIPS
do
echo -n " speed-$CLIP-$STRENGTH-$SEED"
done
# full file
echo -n " speed-full-$STRENGTH-$SEED"
done
done
echo
echo
for SEED in $SEEDS
do
for STRENGTH in $STRENGTHS
do
# clips
for CLIP in $CLIPS
do
FILE="speed-$CLIP-$STRENGTH-$SEED"
echo "$FILE:"
echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_PARAMS='--strength $STRENGTH' AWM_SPEED=1 AWM_SPEED_PRE_MP3=128 AWM_CLIP='$CLIP' AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh mp3 128 ) >x$FILE"
echo -e "\tmv x$FILE $FILE"
echo
done
# full file
FILE="speed-full-$STRENGTH-$SEED"
echo "$FILE:"
echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_PARAMS='--strength $STRENGTH' AWM_SPEED=1 AWM_SPEED_PRE_MP3=128 AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh mp3 128 ) >x$FILE"
echo -e "\tmv x$FILE $FILE"
echo
done
done
......@@ -245,3 +245,11 @@ Random::gen_key()
gcry_randomize (&key[0], 16, /* long term key material strength */ GCRY_VERY_STRONG_RANDOM);
return vec_to_hex_str (key);
}
uint64_t
Random::seed_from_hash (const vector<float>& floats)
{
unsigned char hash[20];
gcry_md_hash_buffer (GCRY_MD_SHA1, hash, &floats[0], floats.size() * sizeof (float));
return uint64_from_buffer (hash);
}
......@@ -23,6 +23,7 @@
#include <vector>
#include <string>
#include <random>
class Random
{
......@@ -30,7 +31,7 @@ public:
enum class Stream {
data_up_down = 1,
sync_up_down = 2,
pad_up_down = 3, /* unused */
speed_clip = 3,
mix = 4,
bit_order = 5,
frame_position = 6
......@@ -41,6 +42,8 @@ private:
std::vector<uint64_t> buffer;
size_t buffer_pos = 0;
std::uniform_real_distribution<double> double_dist;
void die_on_error (const char *func, gcry_error_t error);
public:
Random (uint64_t seed, Stream stream);
......@@ -54,6 +57,21 @@ public:
return buffer[buffer_pos++];
}
static constexpr uint64_t
min()
{
return 0;
}
static constexpr uint64_t
max()
{
return UINT64_MAX;
}
double
random_double() /* range [0,1) */
{
return double_dist (*this);
}
void refill_buffer();
void seed (uint64_t seed, Stream stream);
......@@ -73,6 +91,7 @@ public:
static void set_global_test_key (uint64_t seed);
static void load_global_key (const std::string& key_file);
static std::string gen_key();
static uint64_t seed_from_hash (const std::vector<float>& floats);
};
#endif /* AUDIOWMARK_RANDOM_HH */
/*
* Copyright (C) 2018-2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "resample.hh"
#include <assert.h>
#include <math.h>
#include <zita-resampler/resampler.h>
#include <zita-resampler/vresampler.h>
using std::vector;
template<class R>
static void
process_resampler (R& resampler, const vector<float>& in, vector<float>& out)
{
resampler.out_count = out.size() / resampler.nchan();
resampler.out_data = &out[0];
/* avoid timeshift: zita needs k/2 - 1 samples before the actual input */
resampler.inp_count = resampler.inpsize () / 2 - 1;
resampler.inp_data = nullptr;
resampler.process();
resampler.inp_count = in.size() / resampler.nchan();
resampler.inp_data = (float *) &in[0];
resampler.process();
/* zita needs k/2 samples after the actual input */
resampler.inp_count = resampler.inpsize() / 2;
resampler.inp_data = nullptr;
resampler.process();
}
WavData
resample (const WavData& wav_data, int rate)
{
/* in our application, resampling should only be called if it is necessary
* since using the resampler with input rate == output rate would be slow
*/
assert (rate != wav_data.sample_rate());
const int hlen = 16;
const double ratio = double (rate) / wav_data.sample_rate();
const vector<float>& in = wav_data.samples();
vector<float> out (lrint (in.size() / wav_data.n_channels() * ratio) * wav_data.n_channels());
/* zita-resampler provides two resampling algorithms
*
* a fast optimized version: Resampler
* this is an optimized version, which works for many common cases,
* like resampling between 22050, 32000, 44100, 48000, 96000 Hz
*
* a slower version: VResampler
* this works for arbitary rates (like 33333 -> 44100 resampling)
*
* so we try using Resampler, and if that fails fall back to VResampler
*/
Resampler resampler;
if (resampler.setup (wav_data.sample_rate(), rate, wav_data.n_channels(), hlen) == 0)
{
process_resampler (resampler, in, out);
return WavData (out, wav_data.n_channels(), rate, wav_data.bit_depth());
}
VResampler vresampler;
if (vresampler.setup (ratio, wav_data.n_channels(), hlen) == 0)
{
process_resampler (vresampler, in, out);
return WavData (out, wav_data.n_channels(), rate, wav_data.bit_depth());
}
error ("audiowmark: resampling from rate %d to rate %d not supported.\n", wav_data.sample_rate(), rate);
exit (1);
}
WavData
resample_ratio (const WavData& wav_data, double ratio, int new_rate)
{
const int hlen = 16;
const vector<float>& in = wav_data.samples();
vector<float> out (lrint (in.size() / wav_data.n_channels() * ratio) * wav_data.n_channels());
VResampler vresampler;
if (vresampler.setup (ratio, wav_data.n_channels(), hlen) != 0)
{
error ("audiowmark: failed to setup vresampler with ratio=%f\n", ratio);
exit (1);
}
process_resampler (vresampler, in, out);
return WavData (out, wav_data.n_channels(), new_rate, wav_data.bit_depth());
}
/*
* Copyright (C) 2018-2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef AUDIOWMARK_RESAMPLE_HH
#define AUDIOWMARK_RESAMPLE_HH
#include "wavdata.hh"
WavData resample (const WavData& wav_data, int rate);
WavData resample_ratio (const WavData& wav_data, double ratio, int new_rate);
#endif /* AUDIOWMARK_RESAMPLE_HH */
This diff is collapsed.
/*
* Copyright (C) 2018-2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef AUDIOWMARK_SYNC_FINDER_HH
#define AUDIOWMARK_SYNC_FINDER_HH
#include "convcode.hh"
#include "wavdata.hh"
/*
* The SyncFinder class searches for sync bits in an input WavData. It is used
* by both, the BlockDecoder and ClipDecoder to find a time index where
* decoding should start.
*
* The first step for finding sync bits is search_approx, which generates a
* list of approximate locations where sync bits match, using a stepping of
* sync_search_step=256 (for a frame size of 1024). The approximate candidate
* locations are later refined with search_refine using sync_search_fine=8 as
* stepping.
*
* BlockDecoder and ClipDecoder have similar but not identical needs, so
* both use this class, using either Mode::BLOCK or Mode::CLIP.
*
* BlockDecoder (Mode::BLOCK)
* - search for full A or full B blocks
* - select candidates by threshold(s) only
* - zero samples are not treated any special
*
* ClipDecoder (Mode::CLIP)
* - search for AB block (one A block followed by one B block) or BA block
* - select candidates by threshold, but only keep at most the 5 best matches
* - zero samples at beginning/end don't affect the score returned by sync_decode
* - zero samples at beginning/end don't cost much cpu time (no fft performed)
*
* The ClipDecoder will always use a big amount of zero padding at the beginning
* and end to be able to find "partial" AB blocks, where most of the data is
* matched with zeros.
*
* ORIG: |AAAAA|BBBBB|AAAAA|BBBBB|
* CLIP: |A|BB|
* ZEROPAD: 00000|A|BB|00000
* MATCH AAAAA|BBBBB
*
* In this example a clip (CLIP) is generated from an original file (ORIG). By
* zero padding we get a file that contains the clip (ZEROPAD). Finally we are
* able to match an AB block to the zeropadded file (MATCH). This gives us an
* index in the zeropadded file that can be used for decoding the available
* data.
*/
class SyncFinder
{
public:
enum class Mode { BLOCK, CLIP };
struct Score {
size_t index;
double quality;
ConvBlockType block_type;
};
struct FrameBit
{
int frame;
std::vector<int> up;
std::vector<int> down;
};
private:
std::vector<std::vector<FrameBit>> sync_bits;
void init_up_down (const WavData& wav_data, Mode mode);
double normalize_sync_quality (double raw_quality);
double sync_decode (const WavData& wav_data, const size_t start_frame,
const std::vector<float>& fft_out_db,
const std::vector<char>& have_frames,
ConvBlockType *block_type);
void scan_silence (const WavData& wav_data);
std::vector<Score> search_approx (const WavData& wav_data, Mode mode);
void sync_select_by_threshold (std::vector<Score>& sync_scores);
void sync_select_n_best (std::vector<Score>& sync_scores, size_t n);
void search_refine (const WavData& wav_data, Mode mode, std::vector<Score>& sync_scores);
std::vector<Score> fake_sync (const WavData& wav_data, Mode mode);
// non-zero sample range: [wav_data_first, wav_data_last)
size_t wav_data_first = 0;
size_t wav_data_last = 0;
public:
std::vector<Score> search (const WavData& wav_data, Mode mode);
std::vector<std::vector<FrameBit>> get_sync_bits (const WavData& wav_data, Mode mode);
private:
void sync_fft (const WavData& wav_data,
size_t index,
size_t frame_count,
std::vector<float>& fft_out_db,
std::vector<char>& have_frames,
const std::vector<char>& want_frames);
const char *find_closest_sync (size_t index);
};
#endif
......@@ -32,6 +32,8 @@ main (int argc, char **argv)
uint64_t x = rng();
printf ("%016" PRIx64 "\n", x);
}
for (size_t i = 0; i < 20; i++)
printf ("%f\n", rng.random_double());
uint64_t s = 0;
double t_start = get_time();
......
/*
* Copyright (C) 2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <unistd.h>
#include "threadpool.hh"
int
main()
{
ThreadPool tp;
int result1 = 0;
int result2 = 0;
tp.add_job ([&result1](){printf ("A\n"); sleep (2); printf ("A done\n"); result1 = 123;});
tp.add_job ([&result2](){printf ("B\n"); sleep (3); printf ("B done\n"); result2 = 456;});
tp.wait_all();
printf ("===\n");
printf ("results: %d, %d\n", result1, result2);
}
/*
* Copyright (C) 2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "threadpool.hh"
#include "utils.hh"
bool
ThreadPool::worker_next_job (Job& job)
{
std::unique_lock<std::mutex> lck (mutex);
if (stop_workers)
return false;
if (jobs.empty())
cond.wait (lck);
if (jobs.empty())
return false;
job = jobs.front();
jobs.erase (jobs.begin());
return true;
}
void
ThreadPool::worker_run()
{
while (!stop_workers)
{
Job job;
if (worker_next_job (job))
{
job.fun();
std::lock_guard<std::mutex> lg (mutex);
jobs_done++;
main_cond.notify_one();
}
}
}
ThreadPool::ThreadPool()
{
for (unsigned int i = 0; i < std::thread::hardware_concurrency(); i++)
{
threads.push_back (std::thread (&ThreadPool::worker_run, this));
}
}
void
ThreadPool::add_job (std::function<void()> fun)
{
std::lock_guard<std::mutex> lg (mutex);
Job job;
job.fun = fun;
jobs.push_back (job);
jobs_added++;
cond.notify_one();
}
void
ThreadPool::wait_all()
{
for (;;)
{
std::unique_lock<std::mutex> lck (mutex);
if (jobs_added == jobs_done)
return;
main_cond.wait (lck);
}
}
ThreadPool::~ThreadPool()
{
{
std::lock_guard<std::mutex> lg (mutex);
stop_workers = true;
cond.notify_all();
}
for (auto& t : threads)
t.join();
if (jobs_added != jobs_done)
{
// user must wait before deleting the ThreadPool
error ("audiowmark: open jobs in ThreadPool::~ThreadPool() [added=%zd, done=%zd] - this should not happen\n", jobs_added, jobs_done);
}
}
/*
* Copyright (C) 2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef AUDIOWMARK_THREAD_POOL_HH
#define AUDIOWMARK_THREAD_POOL_HH
#include <vector>
#include <thread>
#include <functional>
#include <set>
#include <mutex>
#include <condition_variable>
class ThreadPool
{
std::vector<std::thread> threads;
struct Job
{
std::function<void()> fun;
};
std::mutex mutex;
std::condition_variable cond;
std::condition_variable main_cond;
std::vector<Job> jobs;
size_t jobs_added = 0;
size_t jobs_done = 0;
bool stop_workers = false;
bool worker_next_job (Job& job);
void worker_run();
public:
ThreadPool();
~ThreadPool();
void add_job (std::function<void()> fun);
void wait_all();
};
#endif /* AUDIOWMARK_THREAD_POOL_HH */
......@@ -169,6 +169,7 @@ class WatermarkSynth
vector<float> window;
vector<float> synth_samples;
bool first_frame = true;
FFTProcessor fft_processor;
void
generate_window()
......@@ -202,7 +203,8 @@ class WatermarkSynth
}
public:
WatermarkSynth (int n_channels) :
n_channels (n_channels)
n_channels (n_channels),
fft_processor (Params::frame_size)
{
generate_window();
synth_samples.resize (window.size() * n_channels);
......@@ -218,7 +220,7 @@ public:
for (int ch = 0; ch < n_channels; ch++)
{
/* mix watermark signal to output frame */
vector<float> fft_delta_out = ifft (fft_delta_spect[ch]);
vector<float> fft_delta_out = fft_processor.ifft (fft_delta_spect[ch]);
for (int dframe = 0; dframe <= 2; dframe++)
{
......
......@@ -25,6 +25,8 @@ double Params::water_delta = 0.01;
bool Params::mix = true;
bool Params::hard = false; // hard decode bits? (soft decoding is better)
bool Params::snr = false; // compute/show snr while adding watermark
bool Params::detect_speed = false;
double Params::test_speed = -1;
int Params::have_key = 0;
size_t Params::payload_size = 128;
bool Params::payload_short = false;
......@@ -47,49 +49,9 @@ std::string Params::output_label;
using std::vector;
using std::complex;
inline double
window_cos (double x) /* von Hann window */
{
if (fabs (x) > 1)
return 0;
return 0.5 * cos (x * M_PI) + 0.5;
}
inline double
window_hamming (double x) /* sharp (rectangle) cutoffs at boundaries */
{
if (fabs (x) > 1)
return 0;
return 0.54 + 0.46 * cos (M_PI * x);
}
/*
* glibc log2f is a lot faster than glibc log10
*/
inline double
fast_log10 (double l)
{
constexpr double log2_log10_factor = 0.3010299956639811952; // 1 / log2 (10)
return log2f (l) * log2_log10_factor;
}
double
db_from_factor (double factor, double min_dB)
{
if (factor > 0)
{
double dB = fast_log10 (factor); /* Bell */
dB *= 20;
return dB;
}
else
return min_dB;
}
FFTAnalyzer::FFTAnalyzer (int n_channels) :
m_n_channels (n_channels)
m_n_channels (n_channels),
m_fft_processor (Params::frame_size)
{
/* generate analysis window */
m_window.resize (Params::frame_size);
......@@ -111,15 +73,6 @@ FFTAnalyzer::FFTAnalyzer (int n_channels) :
m_window[i] *= 2.0 / window_weight;
}
/* allocate properly aligned buffers for SIMD */
m_frame = new_array_float (Params::frame_size);
m_frame_fft = new_array_float (Params::frame_size);
}
FFTAnalyzer::~FFTAnalyzer()
{
free_array_float (m_frame);
free_array_float (m_frame_fft);
}
vector<vector<complex<float>>>
......@@ -127,6 +80,9 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
{
assert (samples.size() >= (Params::frame_size + start_index) * m_n_channels);
float *frame = m_fft_processor.in();
float *frame_fft = m_fft_processor.out();
vector<vector<complex<float>>> fft_out;
for (int ch = 0; ch < m_n_channels; ch++)
{
......@@ -136,14 +92,14 @@ FFTAnalyzer::run_fft (const vector<float>& samples, size_t start_index)
/* deinterleave frame data and apply window */
for (size_t x = 0; x < Params::frame_size; x++)
{
m_frame[x] = samples[pos] * m_window[x];
frame[x] = samples[pos] * m_window[x];
pos += m_n_channels;
}
/* FFT transform */
fftar_float (Params::frame_size, m_frame, m_frame_fft);
m_fft_processor.fft();
/* complex<float> and frame_fft have the same layout in memory */
const complex<float> *first = (complex<float> *) m_frame_fft;
const complex<float> *first = (complex<float> *) frame_fft;
const complex<float> *last = first + Params::frame_size / 2 + 1;
fft_out.emplace_back (first, last);
}
......@@ -247,4 +203,8 @@ gen_mix_entries()
return mix_entries;
}
int
frame_count (const WavData& wav_data)
{
return wav_data.n_values() / wav_data.n_channels() / Params::frame_size;
}
......@@ -23,6 +23,8 @@
#include "random.hh"
#include "rawinputstream.hh"
#include "wavdata.hh"
#include "fft.hh"
#include <assert.h>
......@@ -43,6 +45,9 @@ public:
static bool snr; // compute/show snr while adding watermark
static int have_key;
static bool detect_speed;
static double test_speed; // for debugging --detect-speed
static size_t payload_size; // number of payload bits for the watermark
static bool payload_short;
......@@ -114,11 +119,9 @@ class FFTAnalyzer
{
int m_n_channels = 0;
std::vector<float> m_window;
float *m_frame = nullptr;
float *m_frame_fft = nullptr;
FFTProcessor m_fft_processor;
public:
FFTAnalyzer (int n_channels);
~FFTAnalyzer();
std::vector<std::vector<std::complex<float>>> run_fft (const std::vector<float>& samples, size_t start_index);
std::vector<std::vector<std::complex<float>>> fft_range (const std::vector<float>& samples, size_t start_index, size_t frame_count);
......@@ -133,11 +136,11 @@ struct MixEntry
std::vector<MixEntry> gen_mix_entries();
double db_from_factor (double factor, double min_dB);
size_t mark_data_frame_count();
size_t mark_sync_frame_count();
int frame_count (const WavData& wav_data);
int sync_frame_pos (int f);
int data_frame_pos (int f);
......@@ -163,6 +166,45 @@ randomize_bit_order (const std::vector<T>& bit_vec, bool encode)
return out_bits;
}
inline double
window_cos (double x) /* von Hann window */
{
if (fabs (x) > 1)
return 0;
return 0.5 * cos (x * M_PI) + 0.5;
}
inline double
window_hamming (double x) /* sharp (rectangle) cutoffs at boundaries */
{
if (fabs (x) > 1)
return 0;
return 0.54 + 0.46 * cos (M_PI * x);
}
static inline float
db_from_complex (float re, float im, float min_dB)
{
float abs2 = re * re + im * im;
if (abs2 > 0)
{
constexpr float log2_db_factor = 3.01029995663981; // 10 / log2 (10)
// glibc log2f is a lot faster than glibc log10
return log2f (abs2) * log2_db_factor;
}
else
return min_dB;
}
static inline float
db_from_complex (std::complex<float> f, float min_dB)
{
return db_from_complex (f.real(), f.imag(), min_dB);
}
int add_stream_watermark (AudioInputStream *in_stream, AudioOutputStream *out_stream, const std::string& bits, size_t zero_frames);
int add_watermark (const std::string& infile, const std::string& outfile, const std::string& bits);
int get_watermark (const std::string& infile, const std::string& orig_pattern);
......
This diff is collapsed.
This diff is collapsed.
/*
* Copyright (C) 2018-2020 Stefan Westerfeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef AUDIOWMARK_WM_SPEED_HH
#define AUDIOWMARK_WM_SPEED_HH
#include "wavdata.hh"
double detect_speed (const WavData& in_data, bool print_results);
#endif /* AUDIOWMARK_WM_SPEED_HH */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment