avfilter/dnn: Refactor DNN parameter configuration system

This patch trying to resolve mulitiple issues related to parameter configuration: Firstly, each DNN filters duplicate DNN_COMMON_OPTIONS, which should be the common options of backend. Secondly, backend options are hidden behind the scene. It's a AV_OPT_TYPE_STRING backend_configs for user, and parsed by each backend. We don't know each backend support what kind of options from the help message. Third, DNN backends duplicate DNN_BACKEND_COMMON_OPTIONS. Last but not the least, pass backend options via AV_OPT_TYPE_STRING makes it hard to pass AV_OPT_TYPE_BINARY to backend, if not impossible. This patch puts backend common options and each backend options inside DnnContext to reduce code duplication, make options user friendly, and easy to extend for future usecase. For example, ./ffmpeg -h filter=dnn_processing dnn_processing AVOptions: dnn_backend <int> ..FV....... DNN backend (from INT_MIN to INT_MAX) (default tensorflow) tensorflow 1 ..FV....... tensorflow backend flag openvino 2 ..FV....... openvino backend flag torch 3 ..FV....... torch backend flag dnn_base AVOptions: model <string> ..F........ path to model file input <string> ..F........ input name of the model output <string> ..F........ output name of the model backend_configs <string> ..F.......P backend configs (deprecated) options <string> ..F.......P backend configs (deprecated) nireq <int> ..F........ number of request (from 0 to INT_MAX) (default 0) async <boolean> ..F........ use DNN async inference (default true) device <string> ..F........ device to run model dnn_tensorflow AVOptions: sess_config <string> ..F........ config for SessionOptions dnn_openvino AVOptions: batch_size <int> ..F........ batch size per request (from 1 to 1000) (default 1) input_resizable <boolean> ..F........ can input be resizable or not (default false) layout <int> ..F........ input layout of model (from 0 to 2) (default none) none 0 ..F........ none nchw 1 ..F........ nchw nhwc 2 ..F........ nhwc scale <float> ..F........ Add scale preprocess operation. Divide each element of input by specified value. (from INT_MIN to INT_MAX) (default 0) mean <float> ..F........ Add mean preprocess operation. Subtract specified value from each element of input. (from INT_MIN to INT_MAX) (default 0) dnn_th AVOptions: optimize <int> ..F........ turn on graph executor optimization (from 0 to 1) (default 0) Signed-off-by: Zhao Zhili <zhilizhao@tencent.com> Reviewed-by: Wenbin Chen <wenbin.chen@intel.com> Reviewed-by: Guo Yejun <yejun.guo@intel.com>

avfilter/dnn: Refactor DNN parameter configuration system
This patch trying to resolve mulitiple issues related to parameter configuration: Firstly, each DNN filters duplicate DNN_COMMON_OPTIONS, which should be the common options of backend. Secondly, backend options are hidden behind the scene. It's a AV_OPT_TYPE_STRING backend_configs for user, and parsed by each backend. We don't know each backend support what kind of options from the help message. Third, DNN backends duplicate DNN_BACKEND_COMMON_OPTIONS. Last but not the least, pass backend options via AV_OPT_TYPE_STRING makes it hard to pass AV_OPT_TYPE_BINARY to backend, if not impossible. This patch puts backend common options and each backend options inside DnnContext to reduce code duplication, make options user friendly, and easy to extend for future usecase. For example, ./ffmpeg -h filter=dnn_processing dnn_processing AVOptions: dnn_backend <int> ..FV....... DNN backend (from INT_MIN to INT_MAX) (default tensorflow) tensorflow 1 ..FV....... tensorflow backend flag openvino 2 ..FV....... openvino backend flag torch 3 ..FV....... torch backend flag dnn_base AVOptions: model <string> ..F........ path to model file input <string> ..F........ input name of the model output <string> ..F........ output name of the model backend_configs <string> ..F.......P backend configs (deprecated) options <string> ..F.......P backend configs (deprecated) nireq <int> ..F........ number of request (from 0 to INT_MAX) (default 0) async <boolean> ..F........ use DNN async inference (default true) device <string> ..F........ device to run model dnn_tensorflow AVOptions: sess_config <string> ..F........ config for SessionOptions dnn_openvino AVOptions: batch_size <int> ..F........ batch size per request (from 1 to 1000) (default 1) input_resizable <boolean> ..F........ can input be resizable or not (default false) layout <int> ..F........ input layout of model (from 0 to 2) (default none) none 0 ..F........ none nchw 1 ..F........ nchw nhwc 2 ..F........ nhwc scale <float> ..F........ Add scale preprocess operation. Divide each element of input by specified value. (from INT_MIN to INT_MAX) (default 0) mean <float> ..F........ Add mean preprocess operation. Subtract specified value from each element of input. (from INT_MIN to INT_MAX) (default 0) dnn_th AVOptions: optimize <int> ..F........ turn on graph executor optimization (from 0 to 1) (default 0) Signed-off-by: Zhao Zhili <zhilizhao@tencent.com> Reviewed-by: Wenbin Chen <wenbin.chen@intel.com> Reviewed-by: Guo Yejun <yejun.guo@intel.com>
8c21f1e3 · Zhao Zhili · Guo Yejun · 115c96b9 · 8c21f1e3 · 8c21f1e3
Commit 8c21f1e3 authored May 07, 2024 by Zhao Zhili Committed by Guo Yejun May 18, 2024
13 changed files
--- a/libavfilter/dnn/dnn_backend_common.h
+++ b/libavfilter/dnn/dnn_backend_common.h
@@ -28,9 +28,16 @@
 #include "../dnn_interface.h"
 #include "libavutil/thread.h"

-#define DNN_BACKEND_COMMON_OPTIONS \
-    { "nireq",           "number of request",             OFFSET(options.nireq),           AV_OPT_TYPE_INT,    { .i64 = 0 },     0, INT_MAX, FLAGS }, \
-    { "async",           "use DNN async inference",       OFFSET(options.async),           AV_OPT_TYPE_BOOL,   { .i64 = 1 },     0,       1, FLAGS },
+#define DNN_DEFINE_CLASS_EXT(name, desc, options) \
+    {                                           \
+        .class_name = desc,                     \
+        .item_name  = av_default_item_name,     \
+        .option     = options,                  \
+        .version    = LIBAVUTIL_VERSION_INT,    \
+        .category   = AV_CLASS_CATEGORY_FILTER, \
+    }
+#define DNN_DEFINE_CLASS(fname) \
+    DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options)

 // one task for one function call from dnn interface
 typedef struct TaskItem {

--- a/libavfilter/dnn/dnn_backend_openvino.c
+++ b/libavfilter/dnn/dnn_backend_openvino.c
--- a/libavfilter/dnn/dnn_backend_tf.c
+++ b/libavfilter/dnn/dnn_backend_tf.c
@@ -36,19 +36,8 @@
 #include "safe_queue.h"
 #include <tensorflow/c/c_api.h>

-typedef struct TFOptions{
-    char *sess_config;
-    uint8_t async;
-    uint32_t nireq;
-} TFOptions;
-
-typedef struct TFContext {
-    const AVClass *class;
-    TFOptions options;
-} TFContext;
-
-typedef struct TFModel{
-    TFContext ctx;
+typedef struct TFModel {
+    DnnContext *ctx;
    DNNModel *model;
    TF_Graph *graph;
    TF_Session *session;
@@ -76,15 +65,13 @@ typedef struct TFRequestItem {
    DNNAsyncExecModule exec_module;
 } TFRequestItem;

-#define OFFSET(x) offsetof(TFContext, x)
+#define OFFSET(x) offsetof(TFOptions, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption dnn_tensorflow_options[] = {
-    { "sess_config", "config for SessionOptions", OFFSET(options.sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
-    DNN_BACKEND_COMMON_OPTIONS
+    { "sess_config", "config for SessionOptions", OFFSET(sess_config), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(dnn_tensorflow);

 static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue);
 static void infer_completion_callback(void *args);
@@ -160,7 +147,7 @@ static int tf_start_inference(void *args)
    TFModel *tf_model = task->model;

    if (!request) {
-        av_log(&tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n");
+        av_log(tf_model->ctx, AV_LOG_ERROR, "TFRequestItem is NULL\n");
        return AVERROR(EINVAL);
    }

@@ -170,7 +157,7 @@ static int tf_start_inference(void *args)
                  task->nb_output, NULL, 0, NULL,
                  request->status);
    if (TF_GetCode(request->status) != TF_OK) {
-        av_log(&tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
+        av_log(tf_model->ctx, AV_LOG_ERROR, "%s", TF_Message(request->status));
        return DNN_GENERIC_ERROR;
    }
    return 0;
@@ -198,7 +185,7 @@ static inline void destroy_request_item(TFRequestItem **arg) {
 static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
 {
    TFModel *tf_model = task->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    LastLevelTaskItem *lltask = av_malloc(sizeof(*lltask));
    if (!lltask) {
        av_log(ctx, AV_LOG_ERROR, "Unable to allocate space for LastLevelTaskItem\n");
@@ -278,7 +265,7 @@ static TF_Tensor *allocate_input_tensor(const DNNData *input)
 static int get_input_tf(void *model, DNNData *input, const char *input_name)
 {
    TFModel *tf_model = model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    TF_Status *status;
    TF_DataType dt;
    int64_t dims[4];
@@ -328,7 +315,7 @@ static int get_output_tf(void *model, const char *input_name, int input_width, i
 {
    int ret;
    TFModel *tf_model = model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    TaskItem task;
    TFRequestItem *request;
    DNNExecBaseParams exec_params = {
@@ -399,7 +386,7 @@ static int hex_to_data(uint8_t *data, const char *p)

 static int load_tf_model(TFModel *tf_model, const char *model_filename)
 {
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    TF_Buffer *graph_def;
    TF_ImportGraphDefOptions *graph_opts;
    TF_SessionOptions *sess_opts;
@@ -408,7 +395,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
    int sess_config_length = 0;

    // prepare the sess config data
-    if (tf_model->ctx.options.sess_config != NULL) {
+    if (ctx->tf_option.sess_config != NULL) {
        const char *config;
        /*
        tf_model->ctx.options.sess_config is hex to present the serialized proto
@@ -416,11 +403,11 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
        proto in a python script, tools/python/tf_sess_config.py is a script example
        to generate the configs of sess_config.
        */
-        if (strncmp(tf_model->ctx.options.sess_config, "0x", 2) != 0) {
+        if (strncmp(ctx->tf_option.sess_config, "0x", 2) != 0) {
            av_log(ctx, AV_LOG_ERROR, "sess_config should start with '0x'\n");
            return AVERROR(EINVAL);
        }
-        config = tf_model->ctx.options.sess_config + 2;
+        config = ctx->tf_option.sess_config + 2;
        sess_config_length = hex_to_data(NULL, config);

        sess_config = av_mallocz(sess_config_length + AV_INPUT_BUFFER_PADDING_SIZE);
@@ -461,7 +448,7 @@ static int load_tf_model(TFModel *tf_model, const char *model_filename)
        if (TF_GetCode(tf_model->status) != TF_OK) {
            TF_DeleteSessionOptions(sess_opts);
            av_log(ctx, AV_LOG_ERROR, "Failed to set config for sess options with %s\n",
-                                      tf_model->ctx.options.sess_config);
+                                      ctx->tf_option.sess_config);
            return DNN_GENERIC_ERROR;
        }
    }
@@ -529,15 +516,14 @@ static void dnn_free_model_tf(DNNModel **model)
            TF_DeleteStatus(tf_model->status);
        }
        av_freep(&tf_model);
-        av_freep(model);
+        av_freep(&model);
    }
 }

-static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
+static DNNModel *dnn_load_model_tf(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
    DNNModel *model = NULL;
    TFModel *tf_model = NULL;
-    TFContext *ctx = NULL;

    model = av_mallocz(sizeof(DNNModel));
    if (!model){
@@ -551,23 +537,15 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
    }
    model->model = tf_model;
    tf_model->model = model;
-    ctx = &tf_model->ctx;
-    ctx->class = &dnn_tensorflow_class;
-
-    //parse options
-    av_opt_set_defaults(ctx);
-    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
-        goto err;
-    }
+    tf_model->ctx = ctx;

-    if (load_tf_model(tf_model, model_filename) != 0){
-        av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", model_filename);
+    if (load_tf_model(tf_model, ctx->model_filename) != 0){
+        av_log(ctx, AV_LOG_ERROR, "Failed to load TensorFlow model: \"%s\"\n", ctx->model_filename);
        goto err;
    }

-    if (ctx->options.nireq <= 0) {
-        ctx->options.nireq = av_cpu_count() / 2 + 1;
+    if (ctx->nireq <= 0) {
+        ctx->nireq = av_cpu_count() / 2 + 1;
    }

 #if !HAVE_PTHREAD_CANCEL
@@ -582,7 +560,7 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f
        goto err;
    }

-    for (int i = 0; i < ctx->options.nireq; i++) {
+    for (int i = 0; i < ctx->nireq; i++) {
        TFRequestItem *item = av_mallocz(sizeof(*item));
        if (!item) {
            goto err;
@@ -617,7 +595,6 @@ static DNNModel *dnn_load_model_tf(const char *model_filename, DNNFunctionType f

    model->get_input = &get_input_tf;
    model->get_output = &get_output_tf;
-    model->options = options;
    model->filter_ctx = filter_ctx;
    model->func_type = func_type;

@@ -632,7 +609,7 @@ static int fill_model_input_tf(TFModel *tf_model, TFRequestItem *request) {
    LastLevelTaskItem *lltask;
    TaskItem *task;
    TFInferRequest *infer_request = NULL;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    int ret = 0;

    lltask = ff_queue_pop_front(tf_model->lltask_queue);
@@ -728,7 +705,7 @@ static void infer_completion_callback(void *args) {
    DNNData *outputs;
    TFInferRequest *infer_request = request->infer_request;
    TFModel *tf_model = task->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;

    outputs = av_calloc(task->nb_output, sizeof(*outputs));
    if (!outputs) {
@@ -787,7 +764,7 @@ err:
 static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue)
 {
    TFModel *tf_model;
-    TFContext *ctx;
+    DnnContext *ctx;
    LastLevelTaskItem *lltask;
    TaskItem *task;
    int ret = 0;
@@ -800,7 +777,7 @@ static int execute_model_tf(TFRequestItem *request, Queue *lltask_queue)
    lltask = ff_queue_peek_front(lltask_queue);
    task = lltask->task;
    tf_model = task->model;
-    ctx = &tf_model->ctx;
+    ctx = tf_model->ctx;

    ret = fill_model_input_tf(tf_model, request);
    if (ret != 0) {
@@ -833,7 +810,7 @@ err:
 static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
    TFModel *tf_model = model->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    TaskItem *task;
    TFRequestItem *request;
    int ret = 0;
@@ -849,7 +826,7 @@ static int dnn_execute_model_tf(const DNNModel *model, DNNExecBaseParams *exec_p
        return AVERROR(ENOMEM);
    }

-    ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->options.async, 1);
+    ret = ff_dnn_fill_task(task, exec_params, tf_model, ctx->async, 1);
    if (ret != 0) {
        av_log(ctx, AV_LOG_ERROR, "Fill task with invalid parameter(s).\n");
        av_freep(&task);
@@ -887,7 +864,7 @@ static DNNAsyncStatusType dnn_get_result_tf(const DNNModel *model, AVFrame **in,
 static int dnn_flush_tf(const DNNModel *model)
 {
    TFModel *tf_model = model->model;
-    TFContext *ctx = &tf_model->ctx;
+    DnnContext *ctx = tf_model->ctx;
    TFRequestItem *request;
    int ret;

@@ -915,6 +892,7 @@ static int dnn_flush_tf(const DNNModel *model)
 }

 const DNNModule ff_dnn_backend_tf = {
+    .clazz          = DNN_DEFINE_CLASS(dnn_tensorflow),
    .load_model     = dnn_load_model_tf,
    .execute_model  = dnn_execute_model_tf,
    .get_result     = dnn_get_result_tf,

--- a/libavfilter/dnn/dnn_backend_torch.cpp
+++ b/libavfilter/dnn/dnn_backend_torch.cpp
@@ -36,18 +36,8 @@ extern "C" {
 #include "safe_queue.h"
 }

-typedef struct THOptions{
-    char *device_name;
-    int optimize;
-} THOptions;
-
-typedef struct THContext {
-    const AVClass *c_class;
-    THOptions options;
-} THContext;
-
 typedef struct THModel {
-    THContext ctx;
+    DnnContext *ctx;
    DNNModel *model;
    torch::jit::Module *jit_model;
    SafeQueue *request_queue;
@@ -67,20 +57,17 @@ typedef struct THRequestItem {
 } THRequestItem;


-#define OFFSET(x) offsetof(THContext, x)
+#define OFFSET(x) offsetof(THOptions, x)
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM
 static const AVOption dnn_th_options[] = {
-    { "device", "device to run model", OFFSET(options.device_name), AV_OPT_TYPE_STRING, { .str = "cpu" }, 0, 0, FLAGS },
-    { "optimize", "turn on graph executor optimization", OFFSET(options.optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
+    { "optimize", "turn on graph executor optimization", OFFSET(optimize), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, FLAGS},
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(dnn_th);
-
 static int extract_lltask_from_task(TaskItem *task, Queue *lltask_queue)
 {
    THModel *th_model = (THModel *)task->model;
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
    LastLevelTaskItem *lltask = (LastLevelTaskItem *)av_malloc(sizeof(*lltask));
    if (!lltask) {
        av_log(ctx, AV_LOG_ERROR, "Failed to allocate memory for LastLevelTaskItem\n");
@@ -153,7 +140,6 @@ static void dnn_free_model_th(DNNModel **model)
    }
    ff_queue_destroy(th_model->task_queue);
    delete th_model->jit_model;
-    av_opt_free(&th_model->ctx);
    av_freep(&th_model);
    av_freep(model);
 }
@@ -181,7 +167,7 @@ static int fill_model_input_th(THModel *th_model, THRequestItem *request)
    TaskItem *task = NULL;
    THInferRequest *infer_request = NULL;
    DNNData input = { 0 };
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
    int ret, width_idx, height_idx, channel_idx;

    lltask = (LastLevelTaskItem *)ff_queue_pop_front(th_model->lltask_queue);
@@ -241,7 +227,7 @@ static int th_start_inference(void *args)
    LastLevelTaskItem *lltask = NULL;
    TaskItem *task = NULL;
    THModel *th_model = NULL;
-    THContext *ctx = NULL;
+    DnnContext *ctx = NULL;
    std::vector<torch::jit::IValue> inputs;
    torch::NoGradGuard no_grad;

@@ -253,9 +239,9 @@ static int th_start_inference(void *args)
    lltask = request->lltask;
    task = lltask->task;
    th_model = (THModel *)task->model;
-    ctx = &th_model->ctx;
+    ctx = th_model->ctx;

-    if (ctx->options.optimize)
+    if (ctx->torch_option.optimize)
        torch::jit::setGraphExecutorOptimize(true);
    else
        torch::jit::setGraphExecutorOptimize(false);
@@ -292,7 +278,7 @@ static void infer_completion_callback(void *args) {
        outputs.dims[2] = sizes.at(2); // H
        outputs.dims[3] = sizes.at(3); // W
    } else {
-        avpriv_report_missing_feature(&th_model->ctx, "Support of this kind of model");
+        avpriv_report_missing_feature(th_model->ctx, "Support of this kind of model");
        goto err;
    }

@@ -304,7 +290,7 @@ static void infer_completion_callback(void *args) {
            if (th_model->model->frame_post_proc != NULL) {
                th_model->model->frame_post_proc(task->out_frame, &outputs, th_model->model->filter_ctx);
            } else {
-                ff_proc_from_dnn_to_frame(task->out_frame, &outputs, &th_model->ctx);
+                ff_proc_from_dnn_to_frame(task->out_frame, &outputs, th_model->ctx);
            }
        } else {
            task->out_frame->width = outputs.dims[dnn_get_width_idx_by_layout(outputs.layout)];
@@ -312,7 +298,7 @@ static void infer_completion_callback(void *args) {
        }
        break;
    default:
-        avpriv_report_missing_feature(&th_model->ctx, "model function type %d", th_model->model->func_type);
+        avpriv_report_missing_feature(th_model->ctx, "model function type %d", th_model->model->func_type);
        goto err;
    }
    task->inference_done++;
@@ -322,7 +308,7 @@ err:

    if (ff_safe_queue_push_back(th_model->request_queue, request) < 0) {
        destroy_request_item(&request);
-        av_log(&th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
+        av_log(th_model->ctx, AV_LOG_ERROR, "Unable to push back request_queue when failed to start inference.\n");
    }
 }

@@ -352,7 +338,7 @@ static int execute_model_th(THRequestItem *request, Queue *lltask_queue)
        goto err;
    }
    if (task->async) {
-        avpriv_report_missing_feature(&th_model->ctx, "LibTorch async");
+        avpriv_report_missing_feature(th_model->ctx, "LibTorch async");
    } else {
        ret = th_start_inference((void *)(request));
        if (ret != 0) {
@@ -375,7 +361,7 @@ static int get_output_th(void *model, const char *input_name, int input_width, i
 {
    int ret = 0;
    THModel *th_model = (THModel*) model;
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
    TaskItem task = { 0 };
    THRequestItem *request = NULL;
    DNNExecBaseParams exec_params = {
@@ -424,12 +410,12 @@ static THInferRequest *th_create_inference_request(void)
    return request;
 }

-static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx)
+static DNNModel *dnn_load_model_th(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
    DNNModel *model = NULL;
    THModel *th_model = NULL;
    THRequestItem *item = NULL;
-    THContext *ctx;
+    const char *device_name = ctx->device ? ctx->device : "cpu";

    model = (DNNModel *)av_mallocz(sizeof(DNNModel));
    if (!model) {
@@ -443,24 +429,17 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f
    }
    th_model->model = model;
    model->model = th_model;
-    th_model->ctx.c_class = &dnn_th_class;
-    ctx = &th_model->ctx;
-    //parse options
-    av_opt_set_defaults(ctx);
-    if (av_opt_set_from_string(ctx, options, NULL, "=", "&") < 0) {
-        av_log(ctx, AV_LOG_ERROR, "Failed to parse options \"%s\"\n", options);
-        return NULL;
-    }
+    th_model->ctx = ctx;

-    c10::Device device = c10::Device(ctx->options.device_name);
+    c10::Device device = c10::Device(device_name);
    if (!device.is_cpu()) {
-        av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", ctx->options.device_name);
+        av_log(ctx, AV_LOG_ERROR, "Not supported device:\"%s\"\n", device_name);
        goto fail;
    }

    try {
        th_model->jit_model = new torch::jit::Module;
-        (*th_model->jit_model) = torch::jit::load(model_filename);
+        (*th_model->jit_model) = torch::jit::load(ctx->model_filename);
    } catch (const c10::Error& e) {
        av_log(ctx, AV_LOG_ERROR, "Failed to load torch model\n");
        goto fail;
@@ -502,7 +481,6 @@ static DNNModel *dnn_load_model_th(const char *model_filename, DNNFunctionType f

    model->get_input = &get_input_th;
    model->get_output = &get_output_th;
-    model->options = NULL;
    model->filter_ctx = filter_ctx;
    model->func_type = func_type;
    return model;
@@ -519,7 +497,7 @@ fail:
 static int dnn_execute_model_th(const DNNModel *model, DNNExecBaseParams *exec_params)
 {
    THModel *th_model = (THModel *)model->model;
-    THContext *ctx = &th_model->ctx;
+    DnnContext *ctx = th_model->ctx;
    TaskItem *task;
    THRequestItem *request;
    int ret = 0;
@@ -582,7 +560,7 @@ static int dnn_flush_th(const DNNModel *model)

    request = (THRequestItem *)ff_safe_queue_pop_front(th_model->request_queue);
    if (!request) {
-        av_log(&th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
+        av_log(th_model->ctx, AV_LOG_ERROR, "unable to get infer request.\n");
        return AVERROR(EINVAL);
    }

@@ -590,6 +568,7 @@ static int dnn_flush_th(const DNNModel *model)
 }

 extern const DNNModule ff_dnn_backend_torch = {
+    .clazz          = DNN_DEFINE_CLASS(dnn_th),
    .load_model     = dnn_load_model_th,
    .execute_model  = dnn_execute_model_th,
    .get_result     = dnn_get_result_th,

--- a/libavfilter/dnn/dnn_interface.c
+++ b/libavfilter/dnn/dnn_interface.c
@@ -24,12 +24,61 @@
 */

 #include "../dnn_interface.h"
+#include "libavutil/avassert.h"
 #include "libavutil/mem.h"
+#include "libavutil/opt.h"
+#include "libavfilter/internal.h"

 extern const DNNModule ff_dnn_backend_openvino;
 extern const DNNModule ff_dnn_backend_tf;
 extern const DNNModule ff_dnn_backend_torch;

+#define OFFSET(x) offsetof(DnnContext, x)
+#define FLAGS AV_OPT_FLAG_FILTERING_PARAM
+static const AVOption dnn_base_options[] = {
+        {"model", "path to model file",
+                OFFSET(model_filename), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {"input", "input name of the model",
+                OFFSET(model_inputname), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {"output", "output name of the model",
+                OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {"backend_configs", "backend configs (deprecated)",
+                OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},
+        {"options", "backend configs (deprecated)",
+                OFFSET(backend_options), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},
+        {"nireq", "number of request",
+                OFFSET(nireq), AV_OPT_TYPE_INT, {.i64 = 0}, 0, INT_MAX, FLAGS},
+        {"async", "use DNN async inference",
+                OFFSET(async), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS},
+        {"device", "device to run model",
+                OFFSET(device), AV_OPT_TYPE_STRING, {.str = NULL}, 0, 0, FLAGS},
+        {NULL}
+};
+
+AVFILTER_DEFINE_CLASS(dnn_base);
+
+typedef struct DnnBackendInfo {
+    const size_t offset;
+    union {
+        const AVClass *class;
+        const DNNModule *module;
+    };
+} DnnBackendInfo;
+
+static const DnnBackendInfo dnn_backend_info_list[] = {
+        {0, .class = &dnn_base_class},
+        // Must keep the same order as in DNNOptions, so offset value in incremental order
+#if CONFIG_LIBTENSORFLOW
+        {offsetof(DnnContext, tf_option), .module = &ff_dnn_backend_tf},
+#endif
+#if CONFIG_LIBOPENVINO
+        {offsetof(DnnContext, ov_option), .module = &ff_dnn_backend_openvino},
+#endif
+#if CONFIG_LIBTORCH
+        {offsetof(DnnContext, torch_option), .module = &ff_dnn_backend_torch},
+#endif
+};
+
 const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
 {
    switch(backend_type){
@@ -52,3 +101,44 @@ const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx)
        return NULL;
    }
 }
+
+void ff_dnn_init_child_class(DnnContext *ctx)
+{
+    for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list); i++) {
+        const AVClass **ptr = (const AVClass **) ((char *) ctx + dnn_backend_info_list[i].offset);
+        *ptr = dnn_backend_info_list[i].class;
+    }
+}
+
+void *ff_dnn_child_next(DnnContext *obj, void *prev) {
+    size_t pre_offset;
+
+    if (!prev) {
+        av_assert0(obj->clazz);
+        return obj;
+    }
+
+    pre_offset = (char *)prev - (char *)obj;
+    for (int i = 0; i < FF_ARRAY_ELEMS(dnn_backend_info_list) - 1; i++) {
+        if (dnn_backend_info_list[i].offset == pre_offset) {
+            const AVClass **ptr = (const AVClass **) ((char *) obj + dnn_backend_info_list[i + 1].offset);
+            av_assert0(*ptr);
+            return ptr;
+        }
+    }
+
+    return NULL;
+}
+
+const AVClass *ff_dnn_child_class_iterate(void **iter)
+{
+    uintptr_t i = (uintptr_t) *iter;
+
+    if (i < FF_ARRAY_ELEMS(dnn_backend_info_list)) {
+        *iter = (void *)(i + 1);
+        return dnn_backend_info_list[i].class;
+    }
+
+    return NULL;
+}
+
--- a/libavfilter/dnn_filter_common.c
+++ b/libavfilter/dnn_filter_common.c
@@ -19,6 +19,7 @@
 #include "dnn_filter_common.h"
 #include "libavutil/avstring.h"
 #include "libavutil/mem.h"
+#include "libavutil/opt.h"

 #define MAX_SUPPORTED_OUTPUTS_NB 4

@@ -52,6 +53,23 @@ static char **separate_output_names(const char *expr, const char *val_sep, int *
    return parsed_vals;
 }

+typedef struct DnnFilterBase {
+    const AVClass *class;
+    DnnContext dnnctx;
+} DnnFilterBase;
+
+int ff_dnn_filter_init_child_class(AVFilterContext *filter) {
+    DnnFilterBase *base = filter->priv;
+    ff_dnn_init_child_class(&base->dnnctx);
+    return 0;
+}
+
+void *ff_dnn_filter_child_next(void *obj, void *prev)
+{
+    DnnFilterBase *base = obj;
+    return ff_dnn_child_next(&base->dnnctx, prev);
+}
+
 int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx)
 {
    DNNBackendType backend = ctx->backend_type;
@@ -91,7 +109,25 @@ int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *fil
        return AVERROR(EINVAL);
    }

-    ctx->model = (ctx->dnn_module->load_model)(ctx->model_filename, func_type, ctx->backend_options, filter_ctx);
+    if (ctx->backend_options) {
+        void *child = NULL;
+
+        av_log(filter_ctx, AV_LOG_WARNING,
+               "backend_configs is deprecated, please set backend options directly\n");
+        while (child = ff_dnn_child_next(ctx, child)) {
+            if (*(const AVClass **)child == &ctx->dnn_module->clazz) {
+                int ret = av_opt_set_from_string(child, ctx->backend_options,
+                                                 NULL, "=", "&");
+                if (ret < 0) {
+                    av_log(filter_ctx, AV_LOG_ERROR, "failed to parse options \"%s\"\n",
+                           ctx->backend_options);
+                    return ret;
+                }
+            }
+        }
+    }
+
+    ctx->model = (ctx->dnn_module->load_model)(ctx, func_type, filter_ctx);
    if (!ctx->model) {
        av_log(filter_ctx, AV_LOG_ERROR, "could not load DNN model\n");
        return AVERROR(EINVAL);

--- a/libavfilter/dnn_filter_common.h
+++ b/libavfilter/dnn_filter_common.h
@@ -26,28 +26,23 @@

 #include "dnn_interface.h"

-typedef struct DnnContext {
-    char *model_filename;
-    DNNBackendType backend_type;
-    char *model_inputname;
-    char *model_outputnames_string;
-    char *backend_options;
-    int async;
-
-    char **model_outputnames;
-    uint32_t nb_outputs;
-    const DNNModule *dnn_module;
-    DNNModel *model;
-} DnnContext;
-
-#define DNN_COMMON_OPTIONS \
-    { "model",              "path to model file",         OFFSET(model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
-    { "input",              "input name of the model",    OFFSET(model_inputname),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
-    { "output",             "output name of the model",   OFFSET(model_outputnames_string), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS },\
-    { "backend_configs",    "backend configs",            OFFSET(backend_options),  AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },\
-    { "options", "backend configs (deprecated, use backend_configs)", OFFSET(backend_options),  AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, FLAGS | AV_OPT_FLAG_DEPRECATED},\
-    { "async",              "use DNN async inference (ignored, use backend_configs='async=1')",    OFFSET(async),            AV_OPT_TYPE_BOOL,      { .i64 = 1},     0, 1, FLAGS},
-
+#define AVFILTER_DNN_DEFINE_CLASS_EXT(name, desc, options) \
+    static const AVClass name##_class = {       \
+        .class_name = desc,                     \
+        .item_name  = av_default_item_name,     \
+        .option     = options,                  \
+        .version    = LIBAVUTIL_VERSION_INT,    \
+        .category   = AV_CLASS_CATEGORY_FILTER,            \
+        .child_next = ff_dnn_filter_child_next,            \
+        .child_class_iterate = ff_dnn_child_class_iterate, \
+    }
+
+#define AVFILTER_DNN_DEFINE_CLASS(fname) \
+    AVFILTER_DNN_DEFINE_CLASS_EXT(fname, #fname, fname##_options)
+
+void *ff_dnn_filter_child_next(void *obj, void *prev);
+
+int ff_dnn_filter_init_child_class(AVFilterContext *filter);

 int ff_dnn_init(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
 int ff_dnn_set_frame_proc(DnnContext *ctx, FramePrePostProc pre_proc, FramePrePostProc post_proc);

--- a/libavfilter/dnn_interface.h
+++ b/libavfilter/dnn_interface.h
@@ -93,8 +93,6 @@ typedef int (*ClassifyPostProc)(AVFrame *frame, DNNData *output, uint32_t bbox_i
 typedef struct DNNModel{
    // Stores model that can be different for different backends.
    void *model;
-    // Stores options when the model is executed by the backend
-    const char *options;
    // Stores FilterContext used for the interaction between AVFrame and DNNData
    AVFilterContext *filter_ctx;
    // Stores function type of the model
@@ -117,10 +115,65 @@ typedef struct DNNModel{
    ClassifyPostProc classify_post_proc;
 } DNNModel;

+typedef struct TFOptions{
+    const AVClass *clazz;
+
+    char *sess_config;
+} TFOptions;
+
+typedef struct OVOptions {
+    const AVClass *clazz;
+
+    int batch_size;
+    int input_resizable;
+    DNNLayout layout;
+    float scale;
+    float mean;
+} OVOptions;
+
+typedef struct THOptions {
+    const AVClass *clazz;
+    int optimize;
+} THOptions;
+
+typedef struct DNNModule DNNModule;
+
+typedef struct DnnContext {
+    const AVClass *clazz;
+
+    DNNModel *model;
+
+    char *model_filename;
+    DNNBackendType backend_type;
+    char *model_inputname;
+    char *model_outputnames_string;
+    char *backend_options;
+    int async;
+
+    char **model_outputnames;
+    uint32_t nb_outputs;
+    const DNNModule *dnn_module;
+
+    int nireq;
+    char *device;
+
+#if CONFIG_LIBTENSORFLOW
+    TFOptions tf_option;
+#endif
+
+#if CONFIG_LIBOPENVINO
+    OVOptions ov_option;
+#endif
+#if CONFIG_LIBTORCH
+    THOptions torch_option;
+#endif
+} DnnContext;
+
 // Stores pointers to functions for loading, executing, freeing DNN models for one of the backends.
-typedef struct DNNModule{
+struct DNNModule {
+    const AVClass clazz;
    // Loads model and parameters from given file. Returns NULL if it is not possible.
-    DNNModel *(*load_model)(const char *model_filename, DNNFunctionType func_type, const char *options, AVFilterContext *filter_ctx);
+    DNNModel *(*load_model)(DnnContext *ctx, DNNFunctionType func_type, AVFilterContext *filter_ctx);
    // Executes model with specified input and output. Returns the error code otherwise.
    int (*execute_model)(const DNNModel *model, DNNExecBaseParams *exec_params);
    // Retrieve inference result.
@@ -129,11 +182,15 @@ typedef struct DNNModule{
    int (*flush)(const DNNModel *model);
    // Frees memory allocated for model.
    void (*free_model)(DNNModel **model);
-} DNNModule;
+};

 // Initializes DNNModule depending on chosen backend.
 const DNNModule *ff_get_dnn_module(DNNBackendType backend_type, void *log_ctx);

+void ff_dnn_init_child_class(DnnContext *ctx);
+void *ff_dnn_child_next(DnnContext *obj, void *prev);
+const AVClass *ff_dnn_child_class_iterate(void **iter);
+
 static inline int dnn_get_width_idx_by_layout(DNNLayout layout)
 {
    return layout == DL_NHWC ? 2 : 3;

--- a/libavfilter/vf_derain.c
+++ b/libavfilter/vf_derain.c
@@ -46,13 +46,10 @@ static const AVOption derain_options[] = {
 #if (CONFIG_LIBTENSORFLOW == 1)
    { "tensorflow",  "tensorflow backend flag",     0,                      AV_OPT_TYPE_CONST,  { .i64 = 1 },    0, 0, FLAGS, .unit = "backend" },
 #endif
-    { "model",       "path to model file",          OFFSET(dnnctx.model_filename),   AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
-    { "input",       "input name of the model",     OFFSET(dnnctx.model_inputname),  AV_OPT_TYPE_STRING,    { .str = "x" },  0, 0, FLAGS },
-    { "output",      "output name of the model",    OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING,    { .str = "y" },  0, 0, FLAGS },
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(derain);
+AVFILTER_DNN_DEFINE_CLASS(derain);

 static int filter_frame(AVFilterLink *inlink, AVFrame *in)
 {
@@ -113,6 +110,7 @@ const AVFilter ff_vf_derain = {
    .name          = "derain",
    .description   = NULL_IF_CONFIG_SMALL("Apply derain filter to the input."),
    .priv_size     = sizeof(DRContext),
+    .preinit       = ff_dnn_filter_init_child_class,
    .init          = init,
    .uninit        = uninit,
    FILTER_INPUTS(derain_inputs),

--- a/libavfilter/vf_dnn_classify.c
+++ b/libavfilter/vf_dnn_classify.c
@@ -50,14 +50,13 @@ static const AVOption dnn_classify_options[] = {
 #if (CONFIG_LIBOPENVINO == 1)
    { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = DNN_OV },    0, 0, FLAGS, .unit = "backend" },
 #endif
-    DNN_COMMON_OPTIONS
    { "confidence",  "threshold of confidence",    OFFSET2(confidence),      AV_OPT_TYPE_FLOAT,     { .dbl = 0.5 },  0, 1, FLAGS},
    { "labels",      "path to labels file",        OFFSET2(labels_filename), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
    { "target",      "which one to be classified", OFFSET2(target),          AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(dnn_classify);
+AVFILTER_DNN_DEFINE_CLASS(dnn_classify);

 static int dnn_classify_post_proc(AVFrame *frame, DNNData *output, uint32_t bbox_index, AVFilterContext *filter_ctx)
 {
@@ -299,6 +298,7 @@ const AVFilter ff_vf_dnn_classify = {
    .name          = "dnn_classify",
    .description   = NULL_IF_CONFIG_SMALL("Apply DNN classify filter to the input."),
    .priv_size     = sizeof(DnnClassifyContext),
+    .preinit       = ff_dnn_filter_init_child_class,
    .init          = dnn_classify_init,
    .uninit        = dnn_classify_uninit,
    FILTER_INPUTS(ff_video_default_filterpad),

--- a/libavfilter/vf_dnn_detect.c
+++ b/libavfilter/vf_dnn_detect.c
@@ -70,7 +70,6 @@ static const AVOption dnn_detect_options[] = {
 #if (CONFIG_LIBOPENVINO == 1)
    { "openvino",    "openvino backend flag",      0,                        AV_OPT_TYPE_CONST,     { .i64 = DNN_OV },    0, 0, FLAGS, .unit = "backend" },
 #endif
-    DNN_COMMON_OPTIONS
    { "confidence",  "threshold of confidence",    OFFSET2(confidence),      AV_OPT_TYPE_FLOAT,     { .dbl = 0.5 },  0, 1, FLAGS},
    { "labels",      "path to labels file",        OFFSET2(labels_filename), AV_OPT_TYPE_STRING,    { .str = NULL }, 0, 0, FLAGS },
    { "model_type",  "DNN detection model type",   OFFSET2(model_type),      AV_OPT_TYPE_INT,       { .i64 = DDMT_SSD },    INT_MIN, INT_MAX, FLAGS, .unit = "model_type" },
@@ -85,7 +84,7 @@ static const AVOption dnn_detect_options[] = {
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(dnn_detect);
+AVFILTER_DNN_DEFINE_CLASS(dnn_detect);

 static inline float sigmoid(float x) {
    return 1.f / (1.f + exp(-x));
@@ -851,6 +850,7 @@ const AVFilter ff_vf_dnn_detect = {
    .name          = "dnn_detect",
    .description   = NULL_IF_CONFIG_SMALL("Apply DNN detect filter to the input."),
    .priv_size     = sizeof(DnnDetectContext),
+    .preinit       = ff_dnn_filter_init_child_class,
    .init          = dnn_detect_init,
    .uninit        = dnn_detect_uninit,
    FILTER_INPUTS(dnn_detect_inputs),

--- a/libavfilter/vf_dnn_processing.c
+++ b/libavfilter/vf_dnn_processing.c
@@ -54,11 +54,10 @@ static const AVOption dnn_processing_options[] = {
 #if (CONFIG_LIBTORCH == 1)
    { "torch",       "torch backend flag",         0,                        AV_OPT_TYPE_CONST,     { .i64 = DNN_TH },    0, 0, FLAGS, "backend" },
 #endif
-    DNN_COMMON_OPTIONS
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(dnn_processing);
+AVFILTER_DNN_DEFINE_CLASS(dnn_processing);

 static av_cold int init(AVFilterContext *context)
 {
@@ -373,6 +372,7 @@ const AVFilter ff_vf_dnn_processing = {
    .name          = "dnn_processing",
    .description   = NULL_IF_CONFIG_SMALL("Apply DNN processing filter to the input."),
    .priv_size     = sizeof(DnnProcessingContext),
+    .preinit       = ff_dnn_filter_init_child_class,
    .init          = init,
    .uninit        = uninit,
    FILTER_INPUTS(dnn_processing_inputs),

--- a/libavfilter/vf_sr.c
+++ b/libavfilter/vf_sr.c
@@ -50,13 +50,10 @@ static const AVOption sr_options[] = {
    { "tensorflow", "tensorflow backend flag", 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, FLAGS, .unit = "backend" },
 #endif
    { "scale_factor", "scale factor for SRCNN model", OFFSET(scale_factor), AV_OPT_TYPE_INT, { .i64 = 2 }, 2, 4, FLAGS },
-    { "model", "path to model file specifying network architecture and its parameters", OFFSET(dnnctx.model_filename), AV_OPT_TYPE_STRING, {.str=NULL}, 0, 0, FLAGS },
-    { "input",       "input name of the model",     OFFSET(dnnctx.model_inputname),  AV_OPT_TYPE_STRING,    { .str = "x" },  0, 0, FLAGS },
-    { "output",      "output name of the model",    OFFSET(dnnctx.model_outputnames_string), AV_OPT_TYPE_STRING,    { .str = "y" },  0, 0, FLAGS },
    { NULL }
 };

-AVFILTER_DEFINE_CLASS(sr);
+AVFILTER_DNN_DEFINE_CLASS(sr);

 static av_cold int init(AVFilterContext *context)
 {
@@ -192,6 +189,7 @@ const AVFilter ff_vf_sr = {
    .name          = "sr",
    .description   = NULL_IF_CONFIG_SMALL("Apply DNN-based image super resolution to the input."),
    .priv_size     = sizeof(SRContext),
+    .preinit       = ff_dnn_filter_init_child_class,
    .init          = init,
    .uninit        = uninit,
    FILTER_INPUTS(sr_inputs),