Merge branch 'short-clip'

82f184e4 · Stefan Westerfeld · d1276514 · 8cd04688 · 82f184e4 · 82f184e4
Commit 82f184e4 authored Mar 22, 2020 by Stefan Westerfeld
12 changed files
--- a/NEWS
+++ b/NEWS
+Overview of Changes in audiowmark-0.3.0:
+
+* replace padding at start with a partial B block
+* add algorithm for decoding the watermark from short clips
+
 Overview of Changes in audiowmark-0.2.1:

 * add limiter to avoid clipping during watermark generation

--- a/configure.ac
+++ b/configure.ac
-AC_INIT([audiowmark], [0.2.1])
+AC_INIT([audiowmark], [0.3.0])
 AC_CONFIG_SRCDIR([src/audiowmark.cc])
 AC_CONFIG_AUX_DIR([build-aux])
 AC_CONFIG_MACRO_DIR([m4])

--- a/src/.gitignore
+++ b/src/.gitignore
@@ -7,3 +7,4 @@ testconvcode
 testmp3
 testrandom
 teststream
+testlimiter
--- a/src/audiowmark.cc
+++ b/src/audiowmark.cc
@@ -441,6 +441,45 @@ test_snr (const string& orig_file, const string& wm_file)
  return 0;
 }

+int
+test_clip (const string& in_file, const string& out_file, int seed, int time_seconds)
+{
+  WavData in_data;
+  Error err = in_data.load (in_file);
+  if (err)
+    {
+      error ("audiowmark: error loading %s: %s\n", in_file.c_str(), err.message());
+      return 1;
+    }
+  bool done = false;
+  Random rng (seed, /* there is no stream for this test */ Random::Stream::data_up_down);
+  size_t start_point, end_point;
+  do
+    {
+      // this is unbiased only if 2 * block_size + time_seconds is smaller than overall file length
+      const size_t values_per_block = (mark_sync_frame_count() + mark_data_frame_count()) * Params::frame_size * in_data.n_channels();
+      start_point = 2 * values_per_block * (double(rng()) / UINT64_MAX);
+      start_point /= in_data.n_channels();
+
+      end_point = start_point + time_seconds * in_data.sample_rate();
+      if (end_point < in_data.n_values() / in_data.n_channels())
+        done = true;
+    }
+  while (!done);
+  //printf ("%.3f %.3f\n", start_point / double (in_data.sample_rate()), end_point / double (in_data.sample_rate()));
+
+  vector<float> out_signal (in_data.samples().begin() + start_point * in_data.n_channels(),
+                            in_data.samples().begin() + end_point * in_data.n_channels());
+  WavData out_wav_data (out_signal, in_data.n_channels(), in_data.sample_rate(), in_data.bit_depth());
+  err = out_wav_data.save (out_file);
+  if (err)
+    {
+      error ("audiowmark: error saving %s: %s\n", out_file.c_str(), err.message());
+      return 1;
+    }
+  return 0;
+}
+
 int
 gen_key (const string& outfile)
 {
@@ -495,6 +534,10 @@ main (int argc, char **argv)
    {
      test_snr (argv[2], argv[3]);
    }
+  else if (op == "test-clip" && argc == 6)
+    {
+      test_clip (argv[2], argv[3], atoi (argv[4]), atoi (argv[5]));
+    }
  else if (op == "gen-key" && argc == 3)
    {
      return gen_key (argv[2]);

--- a/src/ber-test.sh
+++ b/src/ber-test.sh
@@ -16,6 +16,9 @@ fi
 if [ "x$AWM_FILE" == "x" ]; then
  AWM_FILE=t
 fi
+if [ "x$AWM_MULTI_CLIP" == "x" ]; then
+  AWM_MULTI_CLIP=1
+fi

 {
  if [ "x$AWM_SET" == "xsmall" ]; then
@@ -47,8 +50,8 @@ do
      PATTERN=4e1243bd22c66e76c2ba9eddc1f91394
    fi
    echo in_pattern $PATTERN
-    echo in_flags $AWM_PARAMS --test-key $SEED
-    audiowmark add "$i" ${AWM_FILE}.wav $PATTERN $AWM_PARAMS --test-key $SEED >/dev/null
+    echo in_flags $AWM_PARAMS $AWM_PARAMS_ADD --test-key $SEED
+    audiowmark add "$i" ${AWM_FILE}.wav $PATTERN $AWM_PARAMS $AWM_PARAMS_ADD --test-key $SEED --quiet
    if [ "x$AWM_RAND_CUT" != x ]; then
      CUT=$RANDOM
      audiowmark cut-start "${AWM_FILE}.wav" "${AWM_FILE}.wav" $CUT
@@ -91,7 +94,15 @@ do
      exit 1
    fi
    echo
-    if [ "x$AWM_REPORT" == "xtruncv" ]; then
+    if [ "x${AWM_CLIP}" != "x" ]; then
+      for CLIP in $(seq $AWM_MULTI_CLIP)
+      do
+        audiowmark test-clip $OUT_FILE ${OUT_FILE}.clip.wav $((CLIP_SEED++)) $AWM_CLIP --test-key $SEED
+        audiowmark cmp ${OUT_FILE}.clip.wav $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS
+        rm ${OUT_FILE}.clip.wav
+        echo
+      done
+    elif [ "x$AWM_REPORT" == "xtruncv" ]; then
      for TRUNC in $AWM_TRUNCATE
      do
        audiowmark cmp $OUT_FILE $PATTERN $AWM_PARAMS --test-key $SEED $TEST_CUT_ARGS --test-truncate $TRUNC | sed "s/^/$TRUNC /g"

--- a/src/gen-short-clip-adoc.sh
+++ b/src/gen-short-clip-adoc.sh
+#!/bin/bash
+
+STRENGTHS="10 15 20 30"
+STR_CLIPS="5 10 15 20 25 30"
+MAIN_CLIPS="5 10 15 20 30 40 50 60"
+
+echo ".performance-by-clip-length"
+echo '[frame="topbot",options="header",cols="<2,8*>1"]'
+echo '|=========================='
+echo -n "| Quality "
+for CLIP in $MAIN_CLIPS
+do
+  echo -n "| $CLIP"
+done
+echo
+
+for TEST in mp3-256 mp3-128 double-mp3-128 ogg-128
+do
+  echo -n "| $TEST "
+  for CLIP in $MAIN_CLIPS
+  do
+    cat main-$TEST-*-$CLIP | awk '{bad += $1; n += $2} END {if (n==0) n=1;fer=100.0*bad/n; bold=fer>0?"*":" ";printf ("| %s%.2f%s", bold, fer, bold)}'
+  done
+  echo
+done
+echo
+echo '|=========================='
+
+echo ".effects-of-watermarking-strength"
+echo '[frame="topbot",options="header",cols="<1,7*>1"]'
+echo '|=========================='
+echo -n "| Strength | SNR "
+for CLIP in $STR_CLIPS
+do
+  echo -n "| $CLIP"
+done
+echo
+
+for STRENGTH in $STRENGTHS
+do
+  echo -n "| $STRENGTH "
+  cat snr-$STRENGTH | awk '{printf ("| %.2f ", $1);}'
+  for CLIP in $STR_CLIPS
+  do
+    cat str-$STRENGTH-mp3-*-$CLIP | awk '{bad += $1; n += $2} END {if (n==0) n=1;fer=100.0*bad/n; bold=fer>0?"*":" ";printf ("| %s%.2f%s", bold, fer, bold)}'
+  done
+  echo
+done
+echo
+echo '|=========================='
--- a/src/gen-short-clip-mk.sh
+++ b/src/gen-short-clip-mk.sh
+#!/bin/bash
+
+SEEDS=$(seq 5)
+STRENGTHS="10 15 20 30"
+STR_CLIPS="5 10 15 20 25 30"
+MAIN_CLIPS="5 10 15 20 30 40 50 60"
+MULTI_CLIP=4
+
+echo -n "all:"
+
+for STRENGTH in $STRENGTHS
+do
+  FILE="snr-$STRENGTH"
+  echo -n " $FILE"
+done
+
+for SEED in $SEEDS
+do
+  for STRENGTH in $STRENGTHS
+  do
+    for CLIP in $STR_CLIPS
+    do
+      FILE="str-$STRENGTH-mp3-$SEED-$CLIP"
+      echo -n " $FILE"
+    done
+  done
+
+  for CLIP in $MAIN_CLIPS
+  do
+    echo -n " main-mp3-256-$SEED-$CLIP main-mp3-128-$SEED-$CLIP main-ogg-128-$SEED-$CLIP main-double-mp3-128-$SEED-$CLIP"
+  done
+done
+
+echo
+echo
+
+for SEED in $SEEDS
+do
+  for STRENGTH in $STRENGTHS
+  do
+    for CLIP in $STR_CLIPS
+    do
+      FILE="str-$STRENGTH-mp3-$SEED-$CLIP"
+      echo "$FILE:"
+      echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_PARAMS_ADD='--strength $STRENGTH' AWM_CLIP='$CLIP' AWM_MULTI_CLIP='$MULTI_CLIP' AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh mp3 128 ) >x$FILE"
+      echo -e "\tmv x$FILE $FILE"
+      echo
+    done
+  done
+done
+
+for STRENGTH in $STRENGTHS
+do
+  FILE="snr-$STRENGTH"
+  echo "$FILE:"
+  echo -e "\t( cd ..; AWM_SET=huge2 AWM_PARAMS='--strength $STRENGTH' snr.sh ) >x$FILE"
+  echo -e "\tmv x$FILE $FILE"
+  echo
+done
+
+for SEED in $SEEDS
+do
+  for CLIP in $MAIN_CLIPS
+  do
+    FILE="main-mp3-256-$SEED-$CLIP"
+    echo "$FILE:"
+    echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_CLIP='$CLIP' AWM_MULTI_CLIP='$MULTI_CLIP' AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh mp3 256 ) >x$FILE"
+    echo -e "\tmv x$FILE $FILE"
+    echo
+
+    FILE="main-mp3-128-$SEED-$CLIP"
+    echo "$FILE:"
+    echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_CLIP='$CLIP' AWM_MULTI_CLIP='$MULTI_CLIP' AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh mp3 128 ) >x$FILE"
+    echo -e "\tmv x$FILE $FILE"
+    echo
+
+    FILE="main-ogg-128-$SEED-$CLIP"
+    echo "$FILE:"
+    echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_CLIP='$CLIP' AWM_MULTI_CLIP='$MULTI_CLIP' AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh ogg 128 ) >x$FILE"
+    echo -e "\tmv x$FILE $FILE"
+    echo
+
+    FILE="main-double-mp3-128-$SEED-$CLIP"
+    echo "$FILE:"
+    echo -e "\t( cd ..; AWM_RAND_PATTERN=1 AWM_SET=huge2 AWM_CLIP='$CLIP' AWM_MULTI_CLIP='$MULTI_CLIP' AWM_SEEDS=$SEED AWM_FILE='t-$FILE' ber-test.sh double-mp3 128 ) >x$FILE"
+    echo -e "\tmv x$FILE $FILE"
+    echo
+  done
+done
--- a/src/random.hh
+++ b/src/random.hh
@@ -13,7 +13,7 @@ public:
  enum class Stream {
    data_up_down = 1,
    sync_up_down = 2,
-    pad_up_down = 3,
+    pad_up_down = 3,   /* unused */
    mix = 4,
    bit_order = 5,
    frame_position = 6

--- a/src/short.mk
+++ b/src/short.mk
+all: short-60 short-60-mp3 \
+  short-50 short-50-mp3 \
+  short-40 short-40-mp3 \
+  short-30 short-30-mp3 \
+  short-20 short-20-mp3 \
+  short-10 short-10-mp3
+
+short-60:
+	AWM_FILE=t-short-60 AWM_CLIP=60 fer-test.sh 10 "" > tmp-short-60
+	mv tmp-short-60 short-60
+
+short-60-mp3:
+	AWM_FILE=t-short-60-mp3 AWM_CLIP=60 fer-test.sh 10 "" mp3 128 > tmp-short-60-mp3
+	mv tmp-short-60-mp3 short-60-mp3
+
+short-50:
+	AWM_FILE=t-short-50 AWM_CLIP=50 fer-test.sh 10 "" > tmp-short-50
+	mv tmp-short-50 short-50
+
+short-50-mp3:
+	AWM_FILE=t-short-50-mp3 AWM_CLIP=50 fer-test.sh 10 "" mp3 128 > tmp-short-50-mp3
+	mv tmp-short-50-mp3 short-50-mp3
+
+short-40:
+	AWM_FILE=t-short-40 AWM_CLIP=40 fer-test.sh 10 "" > tmp-short-40
+	mv tmp-short-40 short-40
+
+short-40-mp3:
+	AWM_FILE=t-short-40-mp3 AWM_CLIP=40 fer-test.sh 10 "" mp3 128 > tmp-short-40-mp3
+	mv tmp-short-40-mp3 short-40-mp3
+
+short-30:
+	AWM_FILE=t-short-30 AWM_CLIP=30 fer-test.sh 10 "" > tmp-short-30
+	mv tmp-short-30 short-30
+
+short-30-mp3:
+	AWM_FILE=t-short-30-mp3 AWM_CLIP=30 fer-test.sh 10 "" mp3 128 > tmp-short-30-mp3
+	mv tmp-short-30-mp3 short-30-mp3
+
+short-20:
+	AWM_FILE=t-short-20 AWM_CLIP=20 fer-test.sh 10 "" > tmp-short-20
+	mv tmp-short-20 short-20
+
+short-20-mp3:
+	AWM_FILE=t-short-20-mp3 AWM_CLIP=20 fer-test.sh 10 "" mp3 128 > tmp-short-20-mp3
+	mv tmp-short-20-mp3 short-20-mp3
+
+short-10:
+	AWM_FILE=t-short-10 AWM_CLIP=10 fer-test.sh 10 "" > tmp-short-10
+	mv tmp-short-10 short-10
+
+short-10-mp3:
+	AWM_FILE=t-short-10-mp3 AWM_CLIP=10 fer-test.sh 10 "" mp3 128 > tmp-short-10-mp3
+	mv tmp-short-10-mp3 short-10-mp3
--- a/src/snr.sh
+++ b/src/snr.sh
@@ -4,5 +4,5 @@ PATTERN=4e1243bd22c66e76c2ba9eddc1f91394

 for i in test/T*
 do
-  echo $i $(audiowmark add $i t.wav $PATTERN $AWM_PARAMS --snr | grep SNR)
+  echo $i $(audiowmark add $i t.wav $PATTERN $AWM_PARAMS --snr 2>&1 | grep SNR)
 done | awk '{s += $3; n++} END { print s/n; }'
--- a/src/wmadd.cc
+++ b/src/wmadd.cc
@@ -17,6 +17,7 @@
 using std::string;
 using std::vector;
 using std::complex;
+using std::max;

 enum class FrameMod : uint8_t {
  KEEP = 0,
@@ -121,20 +122,6 @@ mark_sync (vector<vector<FrameMod>>& frame_mod, int ab)
    }
 }

-static void
-init_pad_mod_vec (vector<vector<FrameMod>>& pad_mod_vec)
-{
-  UpDownGen up_down_gen (Random::Stream::pad_up_down);
-
-  for (size_t f = 0; f < Params::frames_pad_start; f++)
-    {
-      vector<FrameMod> mod (Params::max_band + 1);
-
-      prepare_frame_mod (up_down_gen, f, mod, 0);
-      pad_mod_vec.push_back (mod);
-    }
-}
-
 static void
 init_frame_mod_vec (vector<vector<FrameMod>>& frame_mod_vec, int ab, const vector<int>& bitvec)
 {
@@ -245,11 +232,8 @@ public:
 */
 class WatermarkGen
 {
-  enum State { PAD, WATERMARK } state = State::PAD;
-
  const int                 n_channels = 0;
-  int                       frame_number = 0;
-  int                       frame_bound = Params::frames_pad_start;
+  size_t                    frame_number = 0;
  int                       ab = 0;
  int                       m_data_blocks = 0;

@@ -257,7 +241,6 @@ class WatermarkGen
  WatermarkSynth            wm_synth;

  vector<int>               bitvec;
-  vector<vector<FrameMod>>  pad_mod_vec;
  vector<vector<FrameMod>>  frame_mod_vec_a;
  vector<vector<FrameMod>>  frame_mod_vec_b;
 public:
@@ -267,7 +250,12 @@ public:
    wm_synth (n_channels),
    bitvec (bitvec)
  {
-    init_pad_mod_vec (pad_mod_vec);
+    /* start writing a partial B-block as padding */
+    init_frame_mod_vec (frame_mod_vec_b, 1, bitvec);
+
+    assert (frame_mod_vec_b.size() > Params::frames_pad_start);
+    frame_number = frame_mod_vec_b.size() - Params::frames_pad_start;
+    ab = 1;
  }
  vector<float>
  run (const vector<float>& samples)
@@ -280,50 +268,29 @@ public:
    for (int ch = 0; ch < n_channels; ch++)
      fft_delta_spect.push_back (vector<complex<float>> (fft_out.back().size()));

-    if (state == State::PAD)
-      {
-        for (int ch = 0; ch < n_channels; ch++)
-          apply_frame_mod (pad_mod_vec[frame_number], fft_out[ch], fft_delta_spect[ch]);
-      }
-    else if (state == State::WATERMARK)
-      {
-        for (int ch = 0; ch < n_channels; ch++)
-          apply_frame_mod (ab ? frame_mod_vec_b[frame_number] : frame_mod_vec_a[frame_number], fft_out[ch], fft_delta_spect[ch]);
-      }
+    const vector<vector<FrameMod>>& frame_mod_vec = ab ? frame_mod_vec_b : frame_mod_vec_a;
+    for (int ch = 0; ch < n_channels; ch++)
+      apply_frame_mod (frame_mod_vec[frame_number], fft_out[ch], fft_delta_spect[ch]);

    frame_number++;
-    if (frame_number == frame_bound)
+    if (frame_number == frame_mod_vec.size())
      {
        frame_number = 0;

-        if (state == PAD)
-          {
-            state = WATERMARK;
-            frame_bound = mark_sync_frame_count() + mark_data_frame_count();
+        ab = (ab + 1) & 1; // write A|B|A|B|...
+        m_data_blocks++;

-            // initialize a-block frame mod vector here to minimize startup latency
-            assert (frame_mod_vec_a.empty());
-            init_frame_mod_vec (frame_mod_vec_a, 0, bitvec);
-          }
-        else if (state == WATERMARK)
-          {
-            ab = (ab + 1) & 1; // write A|B|A|B|...
-            frame_bound = mark_sync_frame_count() + mark_data_frame_count();
-            m_data_blocks++;
-
-            if (frame_mod_vec_b.empty())
-              {
-                // initialize b-block frame mod vector here to minimize startup latency
-                init_frame_mod_vec (frame_mod_vec_b, 1, bitvec);
-              }
-          }
+        // initialize A-block frame mod vector here to minimize startup latency
+        if (frame_mod_vec_a.empty())
+          init_frame_mod_vec (frame_mod_vec_a, 0, bitvec);
      }
    return wm_synth.run (fft_delta_spect);
  }
  int
  data_blocks() const
  {
-    return m_data_blocks;
+    // first block is padding - a partial B block
+    return max (m_data_blocks - 1, 0);
  }
 };


--- a/src/wmget.cc
+++ b/src/wmget.cc
@@ -182,75 +182,165 @@ linear_decode (vector<vector<complex<float>>>& fft_out, int n_channels)
  return raw_bit_vec;
 }

-static double
-normalize_sync_quality (double raw_quality)
-{
-  /* the quality for a good sync block depends on watermark strength
-   *
-   * this is just an approximation, but it should be good enough to be able to
-   * use one single threshold on the normalized value check if we have a sync
-   * block or not - typical output is 1.0 or more for sync blocks and close
-   * to 0.0 for non-sync blocks
-   */
-  return raw_quality / min (Params::water_delta, 0.080) / 2.9;
-}
-
+/*
+ * The SyncFinder class searches for sync bits in an input WavData. It is used
+ * by both, the BlockDecoder and ClipDecoder to find a time index where
+ * decoding should start.
+ *
+ * The first step for finding sync bits is search_approx, which generates a
+ * list of approximate locations where sync bits match, using a stepping of
+ * sync_search_step=256 (for a frame size of 1024). The approximate candidate
+ * locations are later refined with search_refine using sync_search_fine=8 as
+ * stepping.
+ *
+ * BlockDecoder and ClipDecoder have similar but not identical needs, so
+ * both use this class, using either Mode::BLOCK or Mode::CLIP.
+ *
+ * BlockDecoder (Mode::BLOCK)
+ *  - search for full A or full B blocks
+ *  - select candidates by threshold(s) only
+ *  - zero samples are not treated any special
+ *
+ * ClipDecoder (Mode::CLIP)
+ *  - search for AB block (one A block followed by one B block) or BA block
+ *  - select candidates by threshold, but only keep at most the 5 best matches
+ *  - zero samples at beginning/end don't affect the score returned by sync_decode
+ *  - zero samples at beginning/end don't cost much cpu time (no fft performed)
+ *
+ * The ClipDecoder will always use a big amount of zero padding at the beginning
+ * and end to be able to find "partial" AB blocks, where most of the data is
+ * matched with zeros.
+ *
+ * ORIG:   |AAAAA|BBBBB|AAAAA|BBBBB|
+ * CLIP:                   |A|BB|
+ * ZEROPAD:           00000|A|BB|00000
+ * MATCH                AAAAA|BBBBB
+ *
+ * In this example a clip (CLIP) is generated from an original file (ORIG).  By
+ * zero padding we get a file that contains the clip (ZEROPAD). Finally we are
+ * able to match an AB block to the zeropadded file (MATCH). This gives us an
+ * index in the zeropadded file that can be used for decoding the available
+ * data.
+ */
 class SyncFinder
 {
-  vector<vector<int>> up;
-  vector<vector<int>> down;
+public:
+  enum class Mode { BLOCK, CLIP };
+
+  struct Score {
+    size_t        index;
+    double        quality;
+    ConvBlockType block_type;
+  };
+private:
+  struct FrameBit
+  {
+    int frame;
+    vector<int> up;
+    vector<int> down;
+  };
+  vector<vector<FrameBit>> sync_bits;

  void
-  init_up_down (const WavData& wav_data)
+  init_up_down (const WavData& wav_data, Mode mode)
  {
-    up.clear();
-    down.clear();
+    sync_bits.clear();

-    up.resize (Params::sync_bits);
-    down.resize (Params::sync_bits);
+    // "long" blocks consist of two "normal" blocks, which means
+    //   the sync bits pattern is repeated after the end of the first block
+    const int first_block_end = mark_sync_frame_count() + mark_data_frame_count();
+    const int block_count = mode == Mode::CLIP ? 2 : 1;
+    size_t n_bands = Params::max_band - Params::min_band + 1;

    UpDownGen up_down_gen (Random::Stream::sync_up_down);
-    size_t n_bands = Params::max_band - Params::min_band + 1;
    for (int bit = 0; bit < Params::sync_bits; bit++)
      {
+        vector<FrameBit> frame_bits;
        for (int f = 0; f < Params::sync_frames_per_bit; f++)
          {
            UpDownArray frame_up, frame_down;
            up_down_gen.get (f + bit * Params::sync_frames_per_bit, frame_up, frame_down);

-            for (auto u : frame_up)
-              up[bit].push_back (u - Params::min_band + sync_frame_pos (f + bit * Params::sync_frames_per_bit) * n_bands * wav_data.n_channels());
-
-            for (auto d : frame_down)
-              down[bit].push_back (d - Params::min_band + sync_frame_pos (f + bit * Params::sync_frames_per_bit) * n_bands * wav_data.n_channels());
+            for (int block = 0; block < block_count; block++)
+              {
+                FrameBit frame_bit;
+                frame_bit.frame = sync_frame_pos (f + bit * Params::sync_frames_per_bit) + block * first_block_end;
+                for (int ch = 0; ch < wav_data.n_channels(); ch++)
+                  {
+                    if (block == 0)
+                      {
+                        for (auto u : frame_up)
+                          frame_bit.up.push_back (u - Params::min_band + n_bands * ch);
+                        for (auto d : frame_down)
+                          frame_bit.down.push_back (d - Params::min_band + n_bands * ch);
+                      }
+                    else
+                      {
+                        for (auto u : frame_up)
+                          frame_bit.down.push_back (u - Params::min_band + n_bands * ch);
+                        for (auto d : frame_down)
+                          frame_bit.up.push_back (d - Params::min_band + n_bands * ch);
+                      }
+                  }
+                std::sort (frame_bit.up.begin(), frame_bit.up.end());
+                std::sort (frame_bit.down.begin(), frame_bit.down.end());
+                frame_bits.push_back (frame_bit);
+              }
          }
-        sort (up[bit].begin(), up[bit].end());
-        sort (down[bit].begin(), down[bit].end());
+        std::sort (frame_bits.begin(), frame_bits.end(), [] (FrameBit& f1, FrameBit& f2) { return f1.frame < f2.frame; });
+        sync_bits.push_back (frame_bits);
      }
  }
+
+  double
+  normalize_sync_quality (double raw_quality)
+  {
+    /* the quality for a good sync block depends on watermark strength
+     *
+     * this is just an approximation, but it should be good enough to be able to
+     * use one single threshold on the normalized value check if we have a sync
+     * block or not - typical output is 1.0 or more for sync blocks and close
+     * to 0.0 for non-sync blocks
+     */
+    return raw_quality / min (Params::water_delta, 0.080) / 2.9;
+  }
+
  double
-  sync_decode (const WavData& wav_data, const size_t start_frame, const vector<float>& fft_out_db, ConvBlockType *block_type)
+  sync_decode (const WavData& wav_data, const size_t start_frame,
+               const vector<float>& fft_out_db,
+               const vector<char>&  have_frames,
+               ConvBlockType *block_type)
  {
    double sync_quality = 0;

    size_t n_bands = Params::max_band - Params::min_band + 1;
-    for (int bit = 0; bit < Params::sync_bits; bit++)
+    int bit_count = 0;
+    for (size_t bit = 0; bit < sync_bits.size(); bit++)
      {
+        const vector<FrameBit>& frame_bits = sync_bits[bit];
        float umag = 0, dmag = 0;

-        for (int ch = 0; ch < wav_data.n_channels(); ch++)
+        int frame_bit_count = 0;
+        for (const auto& frame_bit : frame_bits)
          {
-            const int index = (start_frame * wav_data.n_channels() + ch) * n_bands;
-
-            for (size_t i = 0; i < up[bit].size(); i++)
+            if (have_frames[start_frame + frame_bit.frame])
              {
-                umag += fft_out_db[index + up[bit][i]];
-                dmag += fft_out_db[index + down[bit][i]];
+                const int index = ((start_frame + frame_bit.frame) * wav_data.n_channels()) * n_bands;
+                for (size_t i = 0; i < frame_bit.up.size(); i++)
+                  {
+                    umag += fft_out_db[index + frame_bit.up[i]];
+                    dmag += fft_out_db[index + frame_bit.down[i]];
+                  }
+                frame_bit_count++;
              }
          }
        /* convert avoiding bias, raw_bit < 0 => 0 bit received; raw_bit > 0 => 1 bit received */
        double raw_bit;
-        if (umag < dmag)
+        if (umag == 0 || dmag == 0)
+          {
+            raw_bit = 0;
+          }
+        else if (umag < dmag)
          {
            raw_bit = 1 - umag / dmag;
          }
@@ -261,9 +351,11 @@ class SyncFinder

        const int expect_data_bit = bit & 1; /* expect 010101 */
        const double q = expect_data_bit ? raw_bit : -raw_bit;
-        sync_quality += q;
+        sync_quality += q * frame_bit_count;
+        bit_count += frame_bit_count;
      }
-    sync_quality /= Params::sync_bits;
+    if (bit_count)
+      sync_quality /= bit_count;
    sync_quality = normalize_sync_quality (sync_quality);

    if (sync_quality < 0)
@@ -277,57 +369,54 @@ class SyncFinder
        return sync_quality;
      }
  }
-public:
-  struct Score {
-    size_t        index;
-    double        quality;
-    ConvBlockType block_type;
-  };
-  vector<Score>
-  search (const WavData& wav_data)
+  void
+  scan_silence (const WavData& wav_data)
  {
-    vector<Score> result_scores;
-    vector<Score> sync_scores;
-
-    if (Params::test_no_sync)
-      {
-        const size_t expect0 = Params::frames_pad_start * Params::frame_size;
-        const size_t expect_step = (mark_sync_frame_count() + mark_data_frame_count()) * Params::frame_size;
-        const size_t expect_end = frame_count (wav_data) * Params::frame_size;
-
-        int ab = 0;
-        for (size_t expect_index = expect0; expect_index + expect_step < expect_end; expect_index += expect_step)
-          result_scores.push_back (Score { expect_index, 1.0, (ab++ & 1) ? ConvBlockType::b : ConvBlockType::a });
+    const vector<float>& samples = wav_data.samples();

-        return result_scores;
-      }
-    init_up_down (wav_data);
+    // find first non-zero sample
+    wav_data_first = 0;
+    while (wav_data_first < samples.size() && samples[wav_data_first] == 0)
+      wav_data_first++;

+    // search wav_data_last to get [wav_data_first, wav_data_last) range
+    wav_data_last = samples.size();
+    while (wav_data_last > wav_data_first && samples[wav_data_last - 1] == 0)
+      wav_data_last--;
+  }
+  vector<Score>
+  search_approx (const WavData& wav_data, Mode mode)
+  {
    vector<float> fft_db;
+    vector<char>  have_frames;
+    vector<Score> sync_scores;

    // compute multiple time-shifted fft vectors
    size_t n_bands = Params::max_band - Params::min_band + 1;
+    int total_frame_count = mark_sync_frame_count() + mark_data_frame_count();
+    if (mode == Mode::CLIP)
+      total_frame_count *= 2;
    for (size_t sync_shift = 0; sync_shift < Params::frame_size; sync_shift += Params::sync_search_step)
      {
-        sync_fft (wav_data, sync_shift, frame_count (wav_data) - 1, fft_db, /* want all frames */ {});
+        sync_fft (wav_data, sync_shift, frame_count (wav_data) - 1, fft_db, have_frames, /* want all frames */ {});
        for (int start_frame = 0; start_frame < frame_count (wav_data); start_frame++)
          {
            const size_t sync_index = start_frame * Params::frame_size + sync_shift;
-            if ((start_frame + mark_sync_frame_count() + mark_data_frame_count()) * wav_data.n_channels() * n_bands < fft_db.size())
+            if ((start_frame + total_frame_count) * wav_data.n_channels() * n_bands < fft_db.size())
              {
                ConvBlockType block_type;
-                double quality = sync_decode (wav_data, start_frame, fft_db, &block_type);
+                double quality = sync_decode (wav_data, start_frame, fft_db, have_frames, &block_type);
                // printf ("%zd %f\n", sync_index, quality);
                sync_scores.emplace_back (Score { sync_index, quality, block_type });
              }
          }
      }
    sort (sync_scores.begin(), sync_scores.end(), [] (const Score& a, const Score &b) { return a.index < b.index; });
-
-    vector<int> want_frames (mark_sync_frame_count() + mark_data_frame_count());
-    for (size_t f = 0; f < mark_sync_frame_count(); f++)
-      want_frames[sync_frame_pos (f)] = 1;
-
+    return sync_scores;
+  }
+  void
+  sync_select_by_threshold (vector<Score>& sync_scores)
+  {
    /* for strength 8 and above:
     *   -> more false positive candidates are rejected, so we can use a lower threshold
     *
@@ -337,14 +426,14 @@ public:
    const double strength = Params::water_delta * 1000;
    const double sync_threshold1 = strength > 7.5 ? 0.4 : 0.5;

+    vector<Score> selected_scores;
+
    for (size_t i = 0; i < sync_scores.size(); i++)
      {
-        // printf ("%zd %f\n", sync_scores[i].index, sync_scores[i].quality);
        if (sync_scores[i].quality > sync_threshold1)
          {
            double q_last = -1;
            double q_next = -1;
-
            if (i > 0)
              q_last = sync_scores[i - 1].quality;

@@ -353,43 +442,129 @@ public:

            if (sync_scores[i].quality > q_last && sync_scores[i].quality > q_next)
              {
-                //printf ("%zd %s %f", sync_scores[i].index, find_closest_sync (sync_scores[i].index), sync_scores[i].quality);
+                selected_scores.emplace_back (sync_scores[i]);
+              }
+          }
+      }
+    sync_scores = selected_scores;
+  }
+  void
+  sync_select_n_best (vector<Score>& sync_scores, size_t n)
+  {
+    std::sort (sync_scores.begin(), sync_scores.end(), [](Score& s1, Score& s2) { return s1.quality > s2.quality; });
+    if (sync_scores.size() > n)
+      sync_scores.resize (n);
+  }
+  void
+  search_refine (const WavData& wav_data, Mode mode, vector<Score>& sync_scores)
+  {
+    vector<float> fft_db;
+    vector<char>  have_frames;
+    vector<Score> result_scores;

-                // refine match
-                double best_quality       = sync_scores[i].quality;
-                size_t best_index         = sync_scores[i].index;
-                ConvBlockType best_block_type = sync_scores[i].block_type; /* doesn't really change during refinement */
+    int total_frame_count = mark_sync_frame_count() + mark_data_frame_count();
+    const int first_block_end = total_frame_count;
+    if (mode == Mode::CLIP)
+      total_frame_count *= 2;

-                int start = std::max (int (sync_scores[i].index) - Params::sync_search_step, 0);
-                int end   = sync_scores[i].index + Params::sync_search_step;
-                for (int fine_index = start; fine_index <= end; fine_index += Params::sync_search_fine)
+    vector<char> want_frames (total_frame_count);
+    for (size_t f = 0; f < mark_sync_frame_count(); f++)
+      {
+        want_frames[sync_frame_pos (f)] = 1;
+        if (mode == Mode::CLIP)
+          want_frames[first_block_end + sync_frame_pos (f)] = 1;
+      }
+
+    for (const auto& score : sync_scores)
+      {
+        //printf ("%zd %s %f", sync_scores[i].index, find_closest_sync (sync_scores[i].index), sync_scores[i].quality);
+
+        // refine match
+        double best_quality       = score.quality;
+        size_t best_index         = score.index;
+        ConvBlockType best_block_type = score.block_type; /* doesn't really change during refinement */
+
+        int start = std::max (int (score.index) - Params::sync_search_step, 0);
+        int end   = score.index + Params::sync_search_step;
+        for (int fine_index = start; fine_index <= end; fine_index += Params::sync_search_fine)
+          {
+            sync_fft (wav_data, fine_index, total_frame_count, fft_db, have_frames, want_frames);
+            if (fft_db.size())
+              {
+                ConvBlockType block_type;
+                double        q = sync_decode (wav_data, 0, fft_db, have_frames, &block_type);
+
+                if (q > best_quality)
                  {
-                    sync_fft (wav_data, fine_index, mark_sync_frame_count() + mark_data_frame_count(), fft_db, want_frames);
-                    if (fft_db.size())
-                      {
-                        ConvBlockType block_type;
-                        double        q = sync_decode (wav_data, 0, fft_db, &block_type);
-
-                        if (q > best_quality)
-                          {
-                            best_quality = q;
-                            best_index   = fine_index;
-                          }
-                      }
+                    best_quality = q;
+                    best_index   = fine_index;
                  }
-                //printf (" => refined: %zd %s %f\n", best_index, find_closest_sync (best_index), best_quality);
-                if (best_quality > Params::sync_threshold2)
-                  result_scores.push_back (Score { best_index, best_quality, best_block_type });
              }
          }
+        //printf (" => refined: %zd %s %f\n", best_index, find_closest_sync (best_index), best_quality);
+        if (best_quality > Params::sync_threshold2)
+          result_scores.push_back (Score { best_index, best_quality, best_block_type });
+      }
+    sync_scores = result_scores;
+  }
+  vector<Score>
+  fake_sync (const WavData& wav_data, Mode mode)
+  {
+    vector<Score> result_scores;
+
+    if (mode == Mode::BLOCK)
+      {
+        const size_t expect0 = Params::frames_pad_start * Params::frame_size;
+        const size_t expect_step = (mark_sync_frame_count() + mark_data_frame_count()) * Params::frame_size;
+        const size_t expect_end = frame_count (wav_data) * Params::frame_size;
+
+        int ab = 0;
+        for (size_t expect_index = expect0; expect_index + expect_step < expect_end; expect_index += expect_step)
+          result_scores.push_back (Score { expect_index, 1.0, (ab++ & 1) ? ConvBlockType::b : ConvBlockType::a });
      }
+
    return result_scores;
  }
+
+  // non-zero sample range: [wav_data_first, wav_data_last)
+  size_t wav_data_first = 0;
+  size_t wav_data_last = 0;
+public:
+  vector<Score>
+  search (const WavData& wav_data, Mode mode)
+  {
+    if (Params::test_no_sync)
+      return fake_sync (wav_data, mode);
+
+    init_up_down (wav_data, mode);
+
+    if (mode == Mode::CLIP)
+      {
+        /* in clip mode we optimize handling large areas of padding which is silent */
+        scan_silence (wav_data);
+      }
+    else
+      {
+        /* in block mode we don't do anything special for silence at beginning/end */
+        wav_data_first = 0;
+        wav_data_last  = wav_data.samples().size();
+      }
+    vector<Score> sync_scores = search_approx (wav_data, mode);
+
+    sync_select_by_threshold (sync_scores);
+    if (mode == Mode::CLIP)
+      sync_select_n_best (sync_scores, 5);
+
+    search_refine (wav_data, mode, sync_scores);
+
+    return sync_scores;
+  }
 private:
  void
-  sync_fft (const WavData& wav_data, size_t index, size_t frame_count, vector<float>& fft_out_db, const vector<int>& want_frames)
+  sync_fft (const WavData& wav_data, size_t index, size_t frame_count, vector<float>& fft_out_db, vector<char>& have_frames, const vector<char>& want_frames)
  {
    fft_out_db.clear();
+    have_frames.clear();

    /* read past end? -> fail */
    if (wav_data.n_values() < (index + frame_count * Params::frame_size) * wav_data.n_channels())
@@ -401,24 +576,31 @@ private:
    int out_pos = 0;

    fft_out_db.resize (wav_data.n_channels() * n_bands * frame_count);
+    have_frames.resize (frame_count);

    for (size_t f = 0; f < frame_count; f++)
      {
-        const double min_db = -96;
-        if (want_frames.size() && !want_frames[f])
+        const size_t f_first = (index + f * Params::frame_size) * wav_data.n_channels();
+        const size_t f_last  = (index + (f + 1) * Params::frame_size) * wav_data.n_channels();
+
+        if ((want_frames.size() && !want_frames[f])   // frame not wanted?
+        ||  (f_last < wav_data_first)                 // frame in silence before input?
+        ||  (f_first > wav_data_last))                // frame in silence after input?
          {
-            for (int ch = 0; ch < wav_data.n_channels(); ch++)
-              for (int i = Params::min_band; i <= Params::max_band; i++)
-                fft_out_db[out_pos++] = min_db;
+            out_pos += n_bands * wav_data.n_channels();
          }
        else
          {
+            constexpr double min_db = -96;
+
            vector<vector<complex<float>>> frame_result = fft_analyzer.run_fft (samples, index + f * Params::frame_size);

            /* computing db-magnitude is expensive, so we better do it here */
            for (int ch = 0; ch < wav_data.n_channels(); ch++)
              for (int i = Params::min_band; i <= Params::max_band; i++)
                fft_out_db[out_pos++] = db_from_factor (abs (frame_result[ch][i]), min_db);
+
+            have_frames[f] = 1;
          }
      }
  }
@@ -444,146 +626,218 @@ private:
  }
 };

-static int
-decode_and_report (const WavData& wav_data, const string& orig_pattern)
+class ResultSet
 {
-  int match_count = 0, total_count = 0, sync_match = 0;
+public:
+  enum class Type { BLOCK, CLIP, ALL };
+  struct Pattern
+  {
+    vector<int>       bit_vec;
+    float             decode_error = 0;
+    SyncFinder::Score sync_score;
+    Type              type;
+  };
+private:
+  vector<Pattern> patterns;

-  SyncFinder                sync_finder;
-  vector<SyncFinder::Score> sync_scores = sync_finder.search (wav_data);
+public:
+  void
+  add_pattern (SyncFinder::Score sync_score, const vector<int>& bit_vec, float decode_error, Type pattern_type)
+  {
+    Pattern p;
+    p.sync_score = sync_score;
+    p.bit_vec = bit_vec;
+    p.decode_error = decode_error;
+    p.type = pattern_type;

-  auto report_pattern = [&] (SyncFinder::Score sync_score, const vector<int>& bit_vec, float decode_error)
+    patterns.push_back (p);
+  }
+  void
+  print()
  {
-    if (sync_score.index)
+    std::stable_sort (patterns.begin(), patterns.end(), [](const Pattern& p1, const Pattern& p2) {
+      const int all1 = p1.type == Type::ALL;
+      const int all2 = p2.type == Type::ALL;
+      if (all1 != all2)
+        return all1 < all2;
+      else
+        return p1.sync_score.index < p2.sync_score.index;
+    });
+    for (const auto& pattern : patterns)
      {
-        const char *block_str = nullptr;
-
-        switch (sync_score.block_type)
+        if (pattern.type == Type::ALL) /* this is the combined pattern "all" */
          {
-            case ConvBlockType::a:  block_str = "A";
-                                    break;
-            case ConvBlockType::b:  block_str = "B";
-                                    break;
-            case ConvBlockType::ab: block_str = "AB";
-                                    break;
+            printf ("pattern   all %s %.3f %.3f\n", bit_vec_to_str (pattern.bit_vec).c_str(),
+                                                    pattern.sync_score.quality, pattern.decode_error);
+          }
+        else
+          {
+            string block_str;
+
+            switch (pattern.sync_score.block_type)
+              {
+                case ConvBlockType::a:  block_str = "A";
+                                        break;
+                case ConvBlockType::b:  block_str = "B";
+                                        break;
+                case ConvBlockType::ab: block_str = "AB";
+                                        break;
+              }
+            if (pattern.type == Type::CLIP)
+              block_str = "CLIP-" + block_str;
+
+            const int seconds = pattern.sync_score.index / Params::mark_sample_rate;
+            printf ("pattern %2d:%02d %s %.3f %.3f %s\n", seconds / 60, seconds % 60, bit_vec_to_str (pattern.bit_vec).c_str(),
+                      pattern.sync_score.quality, pattern.decode_error, block_str.c_str());
          }
-        const int seconds = sync_score.index / wav_data.sample_rate();
-        printf ("pattern %2d:%02d %s %.3f %.3f %s\n", seconds / 60, seconds % 60, bit_vec_to_str (bit_vec).c_str(),
-                sync_score.quality, decode_error, block_str);
-      }
-    else /* this is the combined pattern "all" */
-      {
-        printf ("pattern   all %s %.3f %.3f\n", bit_vec_to_str (bit_vec).c_str(), sync_score.quality, decode_error);
      }
-    if (!orig_pattern.empty())
+  }
+  void
+  print_match_count (const string& orig_pattern)
+  {
+    int match_count = 0;
+
+    vector<int> orig_vec = bit_str_to_vec (orig_pattern);
+    for (auto p : patterns)
      {
        bool        match = true;
-        vector<int> orig_vec = bit_str_to_vec (orig_pattern);

-        for (size_t i = 0; i < bit_vec.size(); i++)
-          match = match && (bit_vec[i] == orig_vec[i % orig_vec.size()]);
+        for (size_t i = 0; i < p.bit_vec.size(); i++)
+          match = match && (p.bit_vec[i] == orig_vec[i % orig_vec.size()]);

        if (match)
          match_count++;
-
      }
-    total_count++;
-  };
+    printf ("match_count %d %zd\n", match_count, patterns.size());
+  }
+};
+
+/*
+ * The block decoder is responsible for finding whole data blocks inside the
+ * input file and decoding them. This only works for files that are large
+ * enough to contain one data block. Incomplete blocks are ignored.
+ *
+ * INPUT:   AA|BBBBB|AAAAA|BBB
+ * MATCH       BBBBB
+ * MATCH             AAAAA
+ *
+ * The basic algorithm is this:
+ *
+ *  - use sync finder to find start index for the blocks
+ *  - decode the blocks
+ *  - try to combine A + B blocks for better error correction (AB)
+ *  - try to combine all available blocks for better error correction (all pattern)
+ */
+class BlockDecoder
+{
+  int debug_sync_frame_count = 0;
+  vector<SyncFinder::Score> sync_scores; // stored here for sync debugging
+public:
+  void
+  run (const WavData& wav_data, ResultSet& result_set)
+  {
+    int total_count = 0;

-  vector<float> raw_bit_vec_all (conv_code_size (ConvBlockType::ab, Params::payload_size));
-  vector<int>   raw_bit_vec_norm (2);
+    SyncFinder sync_finder;
+    sync_scores = sync_finder.search (wav_data, SyncFinder::Mode::BLOCK);

-  SyncFinder::Score score_all { 0, 0 };
-  SyncFinder::Score score_ab  { 0, 0, ConvBlockType::ab };
+    vector<float> raw_bit_vec_all (conv_code_size (ConvBlockType::ab, Params::payload_size));
+    vector<int>   raw_bit_vec_norm (2);

-  ConvBlockType last_block_type = ConvBlockType::b;
-  vector<vector<float>> ab_raw_bit_vec (2);
-  vector<float>         ab_quality (2);
-  FFTAnalyzer           fft_analyzer (wav_data.n_channels());
-  for (auto sync_score : sync_scores)
-    {
-      const size_t count = mark_sync_frame_count() + mark_data_frame_count();
-      const size_t index = sync_score.index;
-      const int    ab = (sync_score.block_type == ConvBlockType::b); /* A -> 0, B -> 1 */
+    SyncFinder::Score score_all { 0, 0 };
+    SyncFinder::Score score_ab  { 0, 0, ConvBlockType::ab };

-      auto fft_range_out = fft_analyzer.fft_range (wav_data.samples(), index, count);
-      if (fft_range_out.size())
-        {
-          /* ---- retrieve bits from watermark ---- */
-          vector<float> raw_bit_vec;
-          if (Params::mix)
-            {
-              raw_bit_vec = mix_decode (fft_range_out, wav_data.n_channels());
-            }
-          else
-            {
-              raw_bit_vec = linear_decode (fft_range_out, wav_data.n_channels());
-            }
-          assert (raw_bit_vec.size() == conv_code_size (ConvBlockType::a, Params::payload_size));
+    ConvBlockType last_block_type = ConvBlockType::b;
+    vector<vector<float>> ab_raw_bit_vec (2);
+    vector<float>         ab_quality (2);
+    FFTAnalyzer           fft_analyzer (wav_data.n_channels());
+    for (auto sync_score : sync_scores)
+      {
+        const size_t count = mark_sync_frame_count() + mark_data_frame_count();
+        const size_t index = sync_score.index;
+        const int    ab = (sync_score.block_type == ConvBlockType::b); /* A -> 0, B -> 1 */

-          raw_bit_vec = randomize_bit_order (raw_bit_vec, /* encode */ false);
+        auto fft_range_out = fft_analyzer.fft_range (wav_data.samples(), index, count);
+        if (fft_range_out.size())
+          {
+            /* ---- retrieve bits from watermark ---- */
+            vector<float> raw_bit_vec;
+            if (Params::mix)
+              {
+                raw_bit_vec = mix_decode (fft_range_out, wav_data.n_channels());
+              }
+            else
+              {
+                raw_bit_vec = linear_decode (fft_range_out, wav_data.n_channels());
+              }
+            assert (raw_bit_vec.size() == conv_code_size (ConvBlockType::a, Params::payload_size));

-          /* ---- deal with this pattern ---- */
-          float decode_error = 0;
-          vector<int> bit_vec = conv_decode_soft (sync_score.block_type, normalize_soft_bits (raw_bit_vec), &decode_error);
+            raw_bit_vec = randomize_bit_order (raw_bit_vec, /* encode */ false);

-          report_pattern (sync_score, bit_vec, decode_error);
+            /* ---- deal with this pattern ---- */
+            float decode_error = 0;
+            vector<int> bit_vec = conv_decode_soft (sync_score.block_type, normalize_soft_bits (raw_bit_vec), &decode_error);

-          /* ---- update "all" pattern ---- */
-          score_all.quality += sync_score.quality;
+            result_set.add_pattern (sync_score, bit_vec, decode_error, ResultSet::Type::BLOCK);
+            total_count += 1;

-          for (size_t i = 0; i < raw_bit_vec.size(); i++)
-            {
-              raw_bit_vec_all[i * 2 + ab] += raw_bit_vec[i];
-            }
-          raw_bit_vec_norm[ab]++;
+            /* ---- update "all" pattern ---- */
+            score_all.quality += sync_score.quality;

-          /* ---- if last block was A & this block is B => deal with combined AB block */
-          ab_raw_bit_vec[ab] = raw_bit_vec;
-          ab_quality[ab]     = sync_score.quality;
-          if (last_block_type == ConvBlockType::a && sync_score.block_type == ConvBlockType::b)
-            {
-              /* join A and B block -> AB block */
-              vector<float> ab_bits (raw_bit_vec.size() * 2);
-              for (size_t i = 0; i <  raw_bit_vec.size(); i++)
-                {
-                  ab_bits[i * 2] = ab_raw_bit_vec[0][i];
-                  ab_bits[i * 2 + 1] = ab_raw_bit_vec[1][i];
-                }
-              vector<int> bit_vec = conv_decode_soft (ConvBlockType::ab, normalize_soft_bits (ab_bits), &decode_error);
-              score_ab.index = sync_score.index;
-              score_ab.quality = (ab_quality[0] + ab_quality[1]) / 2;
-              report_pattern (score_ab, bit_vec, decode_error);
-            }
-          last_block_type = sync_score.block_type;
-        }
-    }
-  if (total_count > 1) /* all pattern: average soft bits of all watermarks and decode */
-    {
-      for (size_t i = 0; i < raw_bit_vec_all.size(); i += 2)
-        {
-          raw_bit_vec_all[i]     /= max (raw_bit_vec_norm[0], 1); /* normalize A soft bits with number of A blocks */
-          raw_bit_vec_all[i + 1] /= max (raw_bit_vec_norm[1], 1); /* normalize B soft bits with number of B blocks */
-        }
-      score_all.quality /= raw_bit_vec_norm[0] + raw_bit_vec_norm[1];
+            for (size_t i = 0; i < raw_bit_vec.size(); i++)
+              {
+                raw_bit_vec_all[i * 2 + ab] += raw_bit_vec[i];
+              }
+            raw_bit_vec_norm[ab]++;

-      vector<float> soft_bit_vec = normalize_soft_bits (raw_bit_vec_all);
+            /* ---- if last block was A & this block is B => deal with combined AB block */
+            ab_raw_bit_vec[ab] = raw_bit_vec;
+            ab_quality[ab]     = sync_score.quality;
+            if (last_block_type == ConvBlockType::a && sync_score.block_type == ConvBlockType::b)
+              {
+                /* join A and B block -> AB block */
+                vector<float> ab_bits (raw_bit_vec.size() * 2);
+                for (size_t i = 0; i <  raw_bit_vec.size(); i++)
+                  {
+                    ab_bits[i * 2] = ab_raw_bit_vec[0][i];
+                    ab_bits[i * 2 + 1] = ab_raw_bit_vec[1][i];
+                  }
+                vector<int> bit_vec = conv_decode_soft (ConvBlockType::ab, normalize_soft_bits (ab_bits), &decode_error);
+                score_ab.index = sync_score.index;
+                score_ab.quality = (ab_quality[0] + ab_quality[1]) / 2;
+                result_set.add_pattern (score_ab, bit_vec, decode_error, ResultSet::Type::BLOCK);
+              }
+            last_block_type = sync_score.block_type;
+          }
+      }
+    if (total_count > 1) /* all pattern: average soft bits of all watermarks and decode */
+      {
+        for (size_t i = 0; i < raw_bit_vec_all.size(); i += 2)
+          {
+            raw_bit_vec_all[i]     /= max (raw_bit_vec_norm[0], 1); /* normalize A soft bits with number of A blocks */
+            raw_bit_vec_all[i + 1] /= max (raw_bit_vec_norm[1], 1); /* normalize B soft bits with number of B blocks */
+          }
+        score_all.quality /= raw_bit_vec_norm[0] + raw_bit_vec_norm[1];

-      float decode_error = 0;
-      vector<int> bit_vec = conv_decode_soft (ConvBlockType::ab, soft_bit_vec, &decode_error);
+        vector<float> soft_bit_vec = normalize_soft_bits (raw_bit_vec_all);

-      report_pattern (score_all, bit_vec, decode_error);
-    }
+        float decode_error = 0;
+        vector<int> bit_vec = conv_decode_soft (ConvBlockType::ab, soft_bit_vec, &decode_error);

-  if (!orig_pattern.empty())
-    {
-      printf ("match_count %d %d\n", match_count, total_count);
+        result_set.add_pattern (score_all, bit_vec, decode_error, ResultSet::Type::ALL);
+      }

+    debug_sync_frame_count = frame_count (wav_data);
+  }
+  void
+  print_debug_sync()
+  {
      /* search sync markers at typical positions */
      const int expect0 = Params::frames_pad_start * Params::frame_size;
      const int expect_step = (mark_sync_frame_count() + mark_data_frame_count()) * Params::frame_size;
-      const int expect_end = frame_count (wav_data) * Params::frame_size;
+      const int expect_end = debug_sync_frame_count * Params::frame_size;

+      int sync_match = 0;
      for (int expect_index = expect0; expect_index + expect_step < expect_end; expect_index += expect_step)
        {
          for (auto sync_score : sync_scores)
@@ -596,6 +850,170 @@ decode_and_report (const WavData& wav_data, const string& orig_pattern)
            }
        }
      printf ("sync_match %d %zd\n", sync_match, sync_scores.size());
+  }
+};
+
+/*
+ * The clip decoder is responsible for decoding short clips. It is designed to
+ * handle input sizes that are smaller than one data block. One case is that
+ * the clip contains a partial A block (so the data could start after the start
+ * of the A block and end before the end of the A block).
+ *
+ * ORIG:   |AAAAA|BBBBB|AAAAA|BBBBB|
+ * CLIP:    |AAA|
+ *
+ * A clip could also contain the end of one block and the start of the next block,
+ * like this:
+ *
+ * ORIG:   |AAAAA|BBBBB|AAAAA|BBBBB|
+ * CLIP:                   |A|BB|
+ *
+ * The basic algorithm is this:
+ *
+ *  - zeropad   |AAA|  => 00000|AAA|00000
+ *  - use sync finder to find start index of one long block in the zeropadded data
+ *  - decode the bits
+ *
+ * For files larger than one data block, we decode twice, at the beginning and end
+ *
+ * INPUT   AAA|BBBBB|A
+ * CLIP #1 AAA|BB
+ * CLIP #2      BBBB|A
+ */
+class ClipDecoder
+{
+  const int frames_per_block = 0;
+
+  vector<float>
+  mix_or_linear_decode (vector<vector<complex<float>>>& fft_out, int n_channels)
+  {
+    if (Params::mix)
+      return mix_decode (fft_out, n_channels);
+    else
+      return linear_decode (fft_out, n_channels);
+  }
+  void
+  run_padded (const WavData& wav_data, ResultSet& result_set, double time_offset_sec)
+  {
+    SyncFinder                sync_finder;
+    vector<SyncFinder::Score> sync_scores = sync_finder.search (wav_data, SyncFinder::Mode::CLIP);
+    FFTAnalyzer               fft_analyzer (wav_data.n_channels());
+
+    for (auto sync_score : sync_scores)
+      {
+        const size_t count = mark_sync_frame_count() + mark_data_frame_count();
+        const size_t index = sync_score.index;
+        auto fft_range_out1 = fft_analyzer.fft_range (wav_data.samples(), index, count);
+        auto fft_range_out2 = fft_analyzer.fft_range (wav_data.samples(), index + count * Params::frame_size, count);
+        if (fft_range_out1.size() && fft_range_out2.size())
+          {
+            const auto raw_bit_vec1 = randomize_bit_order (mix_or_linear_decode (fft_range_out1, wav_data.n_channels()), /* encode */ false);
+            const auto raw_bit_vec2 = randomize_bit_order (mix_or_linear_decode (fft_range_out2, wav_data.n_channels()), /* encode */ false);
+            const size_t bits_per_block = raw_bit_vec1.size();
+            vector<float> raw_bit_vec;
+            for (size_t i = 0; i < bits_per_block; i++)
+              {
+                if (sync_score.block_type == ConvBlockType::a)
+                  {
+                    raw_bit_vec.push_back (raw_bit_vec1[i]);
+                    raw_bit_vec.push_back (raw_bit_vec2[i]);
+                  }
+                else
+                  {
+                    raw_bit_vec.push_back (raw_bit_vec2[i]);
+                    raw_bit_vec.push_back (raw_bit_vec1[i]);
+                  }
+              }
+
+            float decode_error = 0;
+            vector<int> bit_vec = conv_decode_soft (ConvBlockType::ab, normalize_soft_bits (raw_bit_vec), &decode_error);
+
+            SyncFinder::Score sync_score_nopad = sync_score;
+            sync_score_nopad.index = time_offset_sec * wav_data.sample_rate();
+            result_set.add_pattern (sync_score_nopad, bit_vec, decode_error, ResultSet::Type::CLIP);
+          }
+      }
+  }
+  enum class Pos { START, END };
+  void
+  run_block (const WavData& wav_data, ResultSet& result_set, Pos pos)
+  {
+    const size_t n = (frames_per_block + 5) * Params::frame_size * wav_data.n_channels();
+
+    // range of samples used by clip: [first_sample, last_sample)
+    size_t first_sample;
+    size_t last_sample;
+    size_t pad_samples_start = n;
+    size_t pad_samples_end   = n;
+
+    if (pos == Pos::START)
+      {
+        first_sample = 0;
+        last_sample  = min (n, wav_data.n_values());
+
+        // increase padding at start for small blocks
+        //   -> (available samples + padding) must always be one L-block
+        if (last_sample < n)
+          pad_samples_start += n - last_sample;
+      }
+    else // (pos == Pos::END)
+      {
+        if (wav_data.n_values() <= n)
+          return;
+
+        first_sample = wav_data.n_values() - n;
+        last_sample  = wav_data.n_values();
+      }
+    const double time_offset = double (first_sample) / wav_data.sample_rate() / wav_data.n_channels();
+    vector<float> ext_samples (wav_data.samples().begin() + first_sample, wav_data.samples().begin() + last_sample);
+
+    if (0)
+      {
+        printf ("%d: %f..%f\n", int (pos), time_offset, time_offset + double (ext_samples.size()) / wav_data.sample_rate() / wav_data.n_channels());
+        printf ("%f< >%f\n",
+          double (pad_samples_start) / wav_data.sample_rate() / wav_data.n_channels(),
+          double (pad_samples_end) / wav_data.sample_rate() / wav_data.n_channels());
+      }
+    ext_samples.insert (ext_samples.begin(), pad_samples_start, 0);
+    ext_samples.insert (ext_samples.end(),   pad_samples_end, 0);
+
+    WavData l_wav_data (ext_samples, wav_data.n_channels(), wav_data.sample_rate(), wav_data.bit_depth());
+    run_padded (l_wav_data, result_set, time_offset);
+   }
+public:
+  ClipDecoder() :
+    frames_per_block (mark_sync_frame_count() + mark_data_frame_count())
+  {
+  }
+  void
+  run (const WavData& wav_data, ResultSet& result_set)
+  {
+    const int wav_frames = wav_data.n_values() / (Params::frame_size * wav_data.n_channels());
+    if (wav_frames < frames_per_block * 3.1) /* clip decoder is only used for small wavs */
+      {
+        run_block (wav_data, result_set, Pos::START);
+        run_block (wav_data, result_set, Pos::END);
+      }
+  }
+};
+
+static int
+decode_and_report (const WavData& wav_data, const string& orig_pattern)
+{
+  ResultSet result_set;
+
+  BlockDecoder block_decoder;
+  block_decoder.run (wav_data, result_set);
+
+  ClipDecoder clip_decoder;
+  clip_decoder.run (wav_data, result_set);
+  result_set.print();
+
+  if (!orig_pattern.empty())
+    {
+      result_set.print_match_count (orig_pattern);
+
+      block_decoder.print_debug_sync();
    }
  return 0;
 }