lavfi: Add OpenCL overlay filter

Input and output formats must be the same, the overlay format must be the same as the input except possibly with an additional alpha component.

lavfi: Add OpenCL overlay filter
Input and output formats must be the same, the overlay format must be the same as the input except possibly with an additional alpha component.
9204b2de · Mark Thompson · 59d65293 · 9204b2de · 9204b2de · 9204b2de
Commit 9204b2de authored Sep 10, 2017 by Mark Thompson
8 changed files
--- a/Changelog
+++ b/Changelog
@@ -17,6 +17,7 @@ version <next>:
 - Intel QSV-accelerated overlay filter
 - mcompand audio filter
 - acontrast audio filter
+- OpenCL overlay filter


 version 3.4:

--- a/configure
+++ b/configure
@@ -3239,6 +3239,7 @@ negate_filter_deps="lut_filter"
 nnedi_filter_deps="gpl"
 ocr_filter_deps="libtesseract"
 ocv_filter_deps="libopencv"
+overlay_opencl_filter_deps="opencl"
 overlay_qsv_filter_deps="libmfx"
 overlay_qsv_filter_select="qsvvpp"
 owdenoise_filter_deps="gpl"

--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -253,6 +253,8 @@ OBJS-$(CONFIG_OCV_FILTER)                    += vf_libopencv.o
 OBJS-$(CONFIG_OPENCL)                        += deshake_opencl.o unsharp_opencl.o
 OBJS-$(CONFIG_OSCILLOSCOPE_FILTER)           += vf_datascope.o
 OBJS-$(CONFIG_OVERLAY_FILTER)                += vf_overlay.o framesync.o
+OBJS-$(CONFIG_OVERLAY_OPENCL_FILTER)         += vf_overlay_opencl.o opencl.o \
+                                                opencl/overlay.o framesync.o
 OBJS-$(CONFIG_OVERLAY_QSV_FILTER)            += vf_overlay_qsv.o
 OBJS-$(CONFIG_OWDENOISE_FILTER)              += vf_owdenoise.o
 OBJS-$(CONFIG_PAD_FILTER)                    += vf_pad.o

--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -262,6 +262,7 @@ static void register_all(void)
    REGISTER_FILTER(OCV,            ocv,            vf);
    REGISTER_FILTER(OSCILLOSCOPE,   oscilloscope,   vf);
    REGISTER_FILTER(OVERLAY,        overlay,        vf);
+    REGISTER_FILTER(OVERLAY_OPENCL, overlay_opencl, vf);
    REGISTER_FILTER(OVERLAY_QSV,    overlay_qsv,    vf);
    REGISTER_FILTER(OWDENOISE,      owdenoise,      vf);
    REGISTER_FILTER(PAD,            pad,            vf);

--- a/libavfilter/opencl/overlay.cl
+++ b/libavfilter/opencl/overlay.cl
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+__kernel void overlay_no_alpha(__write_only image2d_t dst,
+                               __read_only  image2d_t main,
+                               __read_only  image2d_t overlay,
+                               int x_position,
+                               int y_position)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+
+    int2 overlay_size = get_image_dim(overlay);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    if (loc.x <  x_position ||
+        loc.y <  y_position ||
+        loc.x >= overlay_size.x + x_position ||
+        loc.y >= overlay_size.y + y_position) {
+        float4 val = read_imagef(main, sampler, loc);
+        write_imagef(dst, loc, val);
+    } else {
+        int2 loc_overlay = (int2)(x_position, y_position);
+        float4 val       = read_imagef(overlay, sampler, loc - loc_overlay);
+        write_imagef(dst, loc, val);
+    }
+}
+
+__kernel void overlay_internal_alpha(__write_only image2d_t dst,
+                                     __read_only  image2d_t main,
+                                     __read_only  image2d_t overlay,
+                                     int x_position,
+                                     int y_position)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+
+    int2 overlay_size = get_image_dim(overlay);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    if (loc.x <  x_position ||
+        loc.y <  y_position ||
+        loc.x >= overlay_size.x + x_position ||
+        loc.y >= overlay_size.y + y_position) {
+        float4 val = read_imagef(main, sampler, loc);
+        write_imagef(dst, loc, val);
+    } else {
+        int2 loc_overlay  = (int2)(x_position, y_position);
+        float4 in_main    = read_imagef(main,    sampler, loc);
+        float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay);
+        float4 val        = in_overlay * in_overlay.w + in_main * (1.0f - in_overlay.w);
+        write_imagef(dst, loc, val);
+    }
+}
+
+__kernel void overlay_external_alpha(__write_only image2d_t dst,
+                                     __read_only  image2d_t main,
+                                     __read_only  image2d_t overlay,
+                                     __read_only  image2d_t alpha,
+                                     int x_position,
+                                     int y_position,
+                                     int alpha_adj_x,
+                                     int alpha_adj_y)
+{
+    const sampler_t sampler = (CLK_NORMALIZED_COORDS_FALSE |
+                               CLK_FILTER_NEAREST);
+
+    int2 overlay_size = get_image_dim(overlay);
+    int2 loc = (int2)(get_global_id(0), get_global_id(1));
+
+    if (loc.x <  x_position ||
+        loc.y <  y_position ||
+        loc.x >= overlay_size.x + x_position ||
+        loc.y >= overlay_size.y + y_position) {
+        float4 val = read_imagef(main, sampler, loc);
+        write_imagef(dst, loc, val);
+    } else {
+        int2 loc_overlay  = (int2)(x_position, y_position);
+        float4 in_main    = read_imagef(main,    sampler, loc);
+        float4 in_overlay = read_imagef(overlay, sampler, loc - loc_overlay);
+
+        int2 loc_alpha    = (int2)(loc.x * alpha_adj_x,
+                                   loc.y * alpha_adj_y) - loc_overlay;
+        float4 in_alpha   = read_imagef(alpha,   sampler, loc_alpha);
+
+        float4 val = in_overlay * in_alpha.x + in_main * (1.0f - in_alpha.x);
+        write_imagef(dst, loc, val);
+    }
+}
--- a/libavfilter/opencl_source.h
+++ b/libavfilter/opencl_source.h
@@ -19,4 +19,6 @@
 #ifndef AVFILTER_OPENCL_SOURCE_H
 #define AVFILTER_OPENCL_SOURCE_H

+extern const char *ff_opencl_source_overlay;
+
 #endif /* AVFILTER_OPENCL_SOURCE_H */
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -31,7 +31,7 @@

 #define LIBAVFILTER_VERSION_MAJOR   7
 #define LIBAVFILTER_VERSION_MINOR   2
-#define LIBAVFILTER_VERSION_MICRO 100
+#define LIBAVFILTER_VERSION_MICRO 101

 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
                                               LIBAVFILTER_VERSION_MINOR, \

--- a/libavfilter/vf_overlay_opencl.c
+++ b/libavfilter/vf_overlay_opencl.c