Commit d4d09c8e authored by Ramiro Polla's avatar Ramiro Polla

lavc/aarch64/fdct: add neon-optimized fdct for aarch64

The code is imported from libjpeg-turbo-3.0.1. The neon registers used
have been changed to avoid modifying v8-v15.
Reviewed-by: 's avatarMartin Storsjö <martin@martin.st>
parent 27f6211c
# subsystems
OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_init_aarch64.o
OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_init_aarch64.o
OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_init.o
OBJS-$(CONFIG_H264CHROMA) += aarch64/h264chroma_init_aarch64.o
OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
......@@ -37,6 +38,7 @@ ARMV8-OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp.o
# subsystems
NEON-OBJS-$(CONFIG_AAC_DECODER) += aarch64/sbrdsp_neon.o
NEON-OBJS-$(CONFIG_AC3DSP) += aarch64/ac3dsp_neon.o
NEON-OBJS-$(CONFIG_FDCTDSP) += aarch64/fdctdsp_neon.o
NEON-OBJS-$(CONFIG_FMTCONVERT) += aarch64/fmtconvert_neon.o
NEON-OBJS-$(CONFIG_H264CHROMA) += aarch64/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_neon.o \
......
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_AARCH64_FDCT_H
#define AVCODEC_AARCH64_FDCT_H
#include <stdint.h>
void ff_fdct_neon(int16_t *block);
#endif /* AVCODEC_AARCH64_FDCT_H */
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/aarch64/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/fdctdsp.h"
#include "fdct.h"
av_cold void ff_fdctdsp_init_aarch64(FDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
int cpu_flags = av_get_cpu_flags();
if (have_neon(cpu_flags)) {
if (!high_bit_depth) {
if (avctx->dct_algo == FF_DCT_AUTO ||
avctx->dct_algo == FF_DCT_NEON) {
c->fdct = ff_fdct_neon;
}
}
}
}
This diff is collapsed.
......@@ -1538,6 +1538,7 @@ typedef struct AVCodecContext {
#define FF_DCT_MMX 3
#define FF_DCT_ALTIVEC 5
#define FF_DCT_FAAN 6
#define FF_DCT_NEON 7
/**
* IDCT algorithm, see FF_IDCT_* below.
......
......@@ -42,7 +42,9 @@ av_cold void ff_fdctdsp_init(FDCTDSPContext *c, AVCodecContext *avctx)
c->fdct248 = ff_fdct248_islow_8;
}
#if ARCH_PPC
#if ARCH_AARCH64
ff_fdctdsp_init_aarch64(c, avctx, high_bit_depth);
#elif ARCH_PPC
ff_fdctdsp_init_ppc(c, avctx, high_bit_depth);
#elif ARCH_X86
ff_fdctdsp_init_x86(c, avctx, high_bit_depth);
......
......@@ -32,6 +32,8 @@ typedef struct FDCTDSPContext {
FF_VISIBILITY_PUSH_HIDDEN
void ff_fdctdsp_init(FDCTDSPContext *c, struct AVCodecContext *avctx);
void ff_fdctdsp_init_aarch64(FDCTDSPContext *c, struct AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_fdctdsp_init_ppc(FDCTDSPContext *c, struct AVCodecContext *avctx,
unsigned high_bit_depth);
void ff_fdctdsp_init_x86(FDCTDSPContext *c, struct AVCodecContext *avctx,
......
......@@ -158,6 +158,7 @@ static const AVOption avcodec_options[] = {
{"mmx", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_MMX }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
{"altivec", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_ALTIVEC }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
{"faan", "floating point AAN DCT", 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_FAAN }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
{"neon", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_DCT_NEON }, INT_MIN, INT_MAX, V|E, .unit = "dct"},
{"lumi_mask", "compresses bright areas stronger than medium ones", OFFSET(lumi_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
{"tcplx_mask", "temporal complexity masking", OFFSET(temporal_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
{"scplx_mask", "spatial complexity masking", OFFSET(spatial_cplx_masking), AV_OPT_TYPE_FLOAT, {.dbl = 0 }, -FLT_MAX, FLT_MAX, V|E},
......
......@@ -19,9 +19,11 @@
#include "config.h"
#include "libavutil/cpu.h"
#include "libavcodec/aarch64/fdct.h"
#include "libavcodec/aarch64/idct.h"
static const struct algo fdct_tab_arch[] = {
{ "neon", ff_fdct_neon, FF_IDCT_PERM_NONE, AV_CPU_FLAG_NEON },
{ 0 }
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment