Commit 615479d5 authored by Ashish Singh's avatar Ashish Singh Committed by Ronald S. Bultje

avfilter: add LIBVMAF filter

This one changes the previous vmaf patch to libvmaf to keep it separate from the
native implementation of vmaf inside ffmpeg later.
Signed-off-by: 's avatarAshish Singh <ashk43712@gmail.com>
Signed-off-by: 's avatarRonald S. Bultje <rsbultje@gmail.com>
parent 4de4308d
......@@ -27,6 +27,7 @@ version <next>:
- additional frame format support for Interplay MVE movies
- support for decoding through D3D11VA in ffmpeg
- limiter video filter
- libvmaf video filter
version 3.3:
- CrystalHD decoder moved to new decode API
......
......@@ -256,6 +256,7 @@ External library support:
--enable-libtwolame enable MP2 encoding via libtwolame [no]
--enable-libv4l2 enable libv4l2/v4l-utils [no]
--enable-libvidstab enable video stabilization using vid.stab [no]
--enable-libvmaf enable vmaf filter via libvmaf [no]
--enable-libvo-amrwbenc enable AMR-WB encoding via libvo-amrwbenc [no]
--enable-libvorbis enable Vorbis en/decoding via libvorbis,
native implementation exists [no]
......@@ -1569,6 +1570,7 @@ EXTERNAL_LIBRARY_LIST="
libtheora
libtwolame
libv4l2
libvmaf
libvorbis
libvpx
libwavpack
......@@ -3186,6 +3188,7 @@ uspp_filter_deps="gpl avcodec"
vaguedenoiser_filter_deps="gpl"
vidstabdetect_filter_deps="libvidstab"
vidstabtransform_filter_deps="libvidstab"
vmaf_filter_deps="libvmaf"
zmq_filter_deps="libzmq"
zoompan_filter_deps="swscale"
zscale_filter_deps="libzimg"
......@@ -5902,6 +5905,8 @@ enabled libtwolame && require libtwolame twolame.h twolame_init -ltwolame
die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
enabled libv4l2 && require_pkg_config libv4l2 libv4l2.h v4l2_ioctl
enabled libvidstab && require_pkg_config "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
enabled libvmaf && { check_lib libvmaf "libvmaf.h" "compute_vmaf" -lvmaf -lstdc++ -lpthread -lm ||
die "ERROR: libvmaf must be installed"; }
enabled libvo_amrwbenc && require libvo_amrwbenc vo-amrwbenc/enc_if.h E_IF_init -lvo-amrwbenc
enabled libvorbis && require_pkg_config vorbis vorbis/codec.h vorbis_info_init &&
require_pkg_config vorbisenc vorbis/vorbisenc.h vorbis_encode_init
......
......@@ -9639,6 +9639,70 @@ The formula that generates the correction is:
where @var{r_0} is halve of the image diagonal and @var{r_src} and @var{r_tgt} are the
distances from the focal point in the source and target images, respectively.
@section libvmaf
Obtain the average VMAF (Video Multi-Method Assessment Fusion)
score between two input videos.
This filter takes two input videos.
Both video inputs must have the same resolution and pixel format for
this filter to work correctly. Also it assumes that both inputs
have the same number of frames, which are compared one by one.
The obtained average VMAF score is printed through the logging system.
It requires Netflix's vmaf library (libvmaf) as a pre-requisite.
After installing the library it can be enabled using:
@code{./configure --enable-libvmaf}.
If no model path is specified it uses the default model: @code{vmaf_v0.6.1.pkl}.
On the below examples the input file @file{main.mpg} being processed is
compared with the reference file @file{ref.mpg}.
The filter has following options:
@table @option
@item model_path
Set the model path which is to be used for SVM.
Default value: @code{"vmaf_v0.6.1.pkl"}
@item log_path
Set the file path to be used to store logs.
@item log_fmt
Set the format of the log file (xml or json).
@item enable_transform
Enables transform for computing vmaf.
@item phone_model
Invokes the phone model which will generate VMAF scores higher than in the
regular model, which is more suitable for laptop, TV, etc. viewing conditions.
@item psnr
Enables computing psnr along with vmaf.
@item ssim
Enables computing ssim along with vmaf.
@item ms_ssim
Enables computing ms_ssim along with vmaf.
@item pool
Set the pool method to be used for computing vmaf.
@end table
For example:
@example
ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf -f null -
@end example
Example with options:
@example
ffmpeg -i main.mpg -i ref.mpg -lavfi libvmaf="psnr=1:enable-transform=1" -f null -
@end example
@section limiter
Limits the pixel components values to the specified range [min, max].
......
......@@ -216,6 +216,7 @@ OBJS-$(CONFIG_INTERLACE_FILTER) += vf_interlace.o
OBJS-$(CONFIG_INTERLEAVE_FILTER) += f_interleave.o
OBJS-$(CONFIG_KERNDEINT_FILTER) += vf_kerndeint.o
OBJS-$(CONFIG_LENSCORRECTION_FILTER) += vf_lenscorrection.o
OBJS-$(CONFIG_LIBVMAF_FILTER) += vf_libvmaf.o dualinput.o framesync.o
OBJS-$(CONFIG_LIMITER_FILTER) += vf_limiter.o
OBJS-$(CONFIG_LOOP_FILTER) += f_loop.o
OBJS-$(CONFIG_LUMAKEY_FILTER) += vf_lumakey.o
......
......@@ -228,6 +228,7 @@ static void register_all(void)
REGISTER_FILTER(INTERLEAVE, interleave, vf);
REGISTER_FILTER(KERNDEINT, kerndeint, vf);
REGISTER_FILTER(LENSCORRECTION, lenscorrection, vf);
REGISTER_FILTER(LIBVMAF, libvmaf, vf);
REGISTER_FILTER(LIMITER, limiter, vf);
REGISTER_FILTER(LOOP, loop, vf);
REGISTER_FILTER(LUMAKEY, lumakey, vf);
......
/*
* Copyright (c) 2017 Ronald S. Bultje <rsbultje@gmail.com>
* Copyright (c) 2017 Ashish Pratap Singh <ashk43712@gmail.com>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Calculate the VMAF between two input videos.
*/
#include <inttypes.h>
#include <pthread.h>
#include <libvmaf.h>
#include "libavutil/avstring.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "avfilter.h"
#include "dualinput.h"
#include "drawutils.h"
#include "formats.h"
#include "internal.h"
#include "video.h"
typedef struct LIBVMAFContext {
const AVClass *class;
FFDualInputContext dinput;
const AVPixFmtDescriptor *desc;
char *format;
int width;
int height;
double vmaf_score;
pthread_t vmaf_thread;
pthread_mutex_t lock;
pthread_cond_t cond;
int eof;
AVFrame *gmain;
AVFrame *gref;
int frame_set;
char *model_path;
char *log_path;
char *log_fmt;
int disable_clip;
int disable_avx;
int enable_transform;
int phone_model;
int psnr;
int ssim;
int ms_ssim;
char *pool;
} LIBVMAFContext;
#define OFFSET(x) offsetof(LIBVMAFContext, x)
#define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
static const AVOption libvmaf_options[] = {
{"model_path", "Set the model to be used for computing vmaf.", OFFSET(model_path), AV_OPT_TYPE_STRING, {.str="/usr/local/share/model/vmaf_v0.6.1.pkl"}, 0, 1, FLAGS},
{"log_path", "Set the file path to be used to store logs.", OFFSET(log_path), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
{"log_fmt", "Set the format of the log (xml or json).", OFFSET(log_fmt), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
{"enable_transform", "Enables transform for computing vmaf.", OFFSET(enable_transform), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"phone_model", "Invokes the phone model that will generate higher VMAF scores.", OFFSET(phone_model), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"psnr", "Enables computing psnr along with vmaf.", OFFSET(psnr), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"ssim", "Enables computing ssim along with vmaf.", OFFSET(ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"ms_ssim", "Enables computing ms-ssim along with vmaf.", OFFSET(ms_ssim), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS},
{"pool", "Set the pool method to be used for computing vmaf.", OFFSET(pool), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 1, FLAGS},
{ NULL }
};
AVFILTER_DEFINE_CLASS(libvmaf);
#define read_frame_fn(type, bits) \
static int read_frame_##bits##bit(float *ref_data, float *main_data, \
float *temp_data, int stride, \
double *score, void *ctx) \
{ \
LIBVMAFContext *s = (LIBVMAFContext *) ctx; \
int ret; \
\
pthread_mutex_lock(&s->lock); \
\
while (!s->frame_set && !s->eof) { \
pthread_cond_wait(&s->cond, &s->lock); \
} \
\
if (s->frame_set) { \
int ref_stride = s->gref->linesize[0]; \
int main_stride = s->gmain->linesize[0]; \
\
const type *ref_ptr = (const type *) s->gref->data[0]; \
const type *main_ptr = (const type *) s->gmain->data[0]; \
\
float *ptr = ref_data; \
\
int h = s->height; \
int w = s->width; \
\
int i,j; \
\
for (i = 0; i < h; i++) { \
for ( j = 0; j < w; j++) { \
ptr[j] = (float)ref_ptr[j]; \
} \
ref_ptr += ref_stride / sizeof(*ref_ptr); \
ptr += stride / sizeof(*ptr); \
} \
\
ptr = main_data; \
\
for (i = 0; i < h; i++) { \
for (j = 0; j < w; j++) { \
ptr[j] = (float)main_ptr[j]; \
} \
main_ptr += main_stride / sizeof(*main_ptr); \
ptr += stride / sizeof(*ptr); \
} \
} \
\
ret = !s->frame_set; \
\
s->frame_set = 0; \
\
pthread_cond_signal(&s->cond); \
pthread_mutex_unlock(&s->lock); \
\
if (ret) { \
return 2; \
} \
\
return 0; \
}
read_frame_fn(uint8_t, 8);
read_frame_fn(uint16_t, 10);
static void compute_vmaf_score(LIBVMAFContext *s)
{
int (*read_frame)(float *ref_data, float *main_data, float *temp_data,
int stride, double *score, void *ctx);
if (s->desc->comp[0].depth <= 8) {
read_frame = read_frame_8bit;
} else {
read_frame = read_frame_10bit;
}
s->vmaf_score = compute_vmaf(s->format, s->width, s->height, read_frame, s,
s->model_path, s->log_path, s->log_fmt, 0, 0,
s->enable_transform, s->phone_model, s->psnr,
s->ssim, s->ms_ssim, s->pool);
}
static void *call_vmaf(void *ctx)
{
LIBVMAFContext *s = (LIBVMAFContext *) ctx;
compute_vmaf_score(s);
av_log(ctx, AV_LOG_INFO, "VMAF score: %f\n",s->vmaf_score);
pthread_exit(NULL);
}
static AVFrame *do_vmaf(AVFilterContext *ctx, AVFrame *main, const AVFrame *ref)
{
LIBVMAFContext *s = ctx->priv;
pthread_mutex_lock(&s->lock);
while (s->frame_set != 0) {
pthread_cond_wait(&s->cond, &s->lock);
}
av_frame_ref(s->gref, ref);
av_frame_ref(s->gmain, main);
s->frame_set = 1;
pthread_cond_signal(&s->cond);
pthread_mutex_unlock(&s->lock);
return main;
}
static av_cold int init(AVFilterContext *ctx)
{
LIBVMAFContext *s = ctx->priv;
s->gref = av_frame_alloc();
s->gmain = av_frame_alloc();
pthread_mutex_init(&s->lock, NULL);
pthread_cond_init (&s->cond, NULL);
s->dinput.process = do_vmaf;
return 0;
}
static int query_formats(AVFilterContext *ctx)
{
static const enum AVPixelFormat pix_fmts[] = {
AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P,
AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, AV_PIX_FMT_YUV420P10LE,
AV_PIX_FMT_NONE
};
AVFilterFormats *fmts_list = ff_make_format_list(pix_fmts);
if (!fmts_list)
return AVERROR(ENOMEM);
return ff_set_common_formats(ctx, fmts_list);
}
static int config_input_ref(AVFilterLink *inlink)
{
AVFilterContext *ctx = inlink->dst;
LIBVMAFContext *s = ctx->priv;
int th;
if (ctx->inputs[0]->w != ctx->inputs[1]->w ||
ctx->inputs[0]->h != ctx->inputs[1]->h) {
av_log(ctx, AV_LOG_ERROR, "Width and height of input videos must be same.\n");
return AVERROR(EINVAL);
}
if (ctx->inputs[0]->format != ctx->inputs[1]->format) {
av_log(ctx, AV_LOG_ERROR, "Inputs must be of same pixel format.\n");
return AVERROR(EINVAL);
}
s->desc = av_pix_fmt_desc_get(inlink->format);
s->width = ctx->inputs[0]->w;
s->height = ctx->inputs[0]->h;
th = pthread_create(&s->vmaf_thread, NULL, call_vmaf, (void *) s);
if (th) {
av_log(ctx, AV_LOG_ERROR, "Thread creation failed.\n");
return AVERROR(EINVAL);
}
return 0;
}
static int config_output(AVFilterLink *outlink)
{
AVFilterContext *ctx = outlink->src;
LIBVMAFContext *s = ctx->priv;
AVFilterLink *mainlink = ctx->inputs[0];
int ret;
outlink->w = mainlink->w;
outlink->h = mainlink->h;
outlink->time_base = mainlink->time_base;
outlink->sample_aspect_ratio = mainlink->sample_aspect_ratio;
outlink->frame_rate = mainlink->frame_rate;
if ((ret = ff_dualinput_init(ctx, &s->dinput)) < 0)
return ret;
return 0;
}
static int filter_frame(AVFilterLink *inlink, AVFrame *inpicref)
{
LIBVMAFContext *s = inlink->dst->priv;
return ff_dualinput_filter_frame(&s->dinput, inlink, inpicref);
}
static int request_frame(AVFilterLink *outlink)
{
LIBVMAFContext *s = outlink->src->priv;
return ff_dualinput_request_frame(&s->dinput, outlink);
}
static av_cold void uninit(AVFilterContext *ctx)
{
LIBVMAFContext *s = ctx->priv;
ff_dualinput_uninit(&s->dinput);
pthread_mutex_lock(&s->lock);
s->eof = 1;
pthread_cond_signal(&s->cond);
pthread_mutex_unlock(&s->lock);
pthread_join(s->vmaf_thread, NULL);
av_frame_free(&s->gref);
av_frame_free(&s->gmain);
pthread_mutex_destroy(&s->lock);
pthread_cond_destroy(&s->cond);
}
static const AVFilterPad libvmaf_inputs[] = {
{
.name = "main",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = filter_frame,
},{
.name = "reference",
.type = AVMEDIA_TYPE_VIDEO,
.filter_frame = filter_frame,
.config_props = config_input_ref,
},
{ NULL }
};
static const AVFilterPad libvmaf_outputs[] = {
{
.name = "default",
.type = AVMEDIA_TYPE_VIDEO,
.config_props = config_output,
.request_frame = request_frame,
},
{ NULL }
};
AVFilter ff_vf_libvmaf = {
.name = "libvmaf",
.description = NULL_IF_CONFIG_SMALL("Calculate the VMAF between two video streams."),
.init = init,
.uninit = uninit,
.query_formats = query_formats,
.priv_size = sizeof(LIBVMAFContext),
.priv_class = &libvmaf_class,
.inputs = libvmaf_inputs,
.outputs = libvmaf_outputs,
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment