Commit 636ae0e0 authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

lavc/flacdsp: R-V V packed decorrelate_{l,r}s

flac_decorrelate_ms_16_c:       457.2
flac_decorrelate_ms_16_rvv_i32: 203.0
flac_decorrelate_ms_32_c:       457.2
flac_decorrelate_ms_32_rvv_i32: 203.5
flac_decorrelate_rs_16_c:       456.2
flac_decorrelate_rs_16_rvv_i32: 207.0
flac_decorrelate_rs_32_c:       456.2
flac_decorrelate_rs_32_rvv_i32: 210.5
parent be167503
...@@ -121,6 +121,8 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int cha ...@@ -121,6 +121,8 @@ av_cold void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int cha
#if ARCH_ARM #if ARCH_ARM
ff_flacdsp_init_arm(c, fmt, channels); ff_flacdsp_init_arm(c, fmt, channels);
#elif ARCH_RISCV
ff_flacdsp_init_riscv(c, fmt, channels);
#elif ARCH_X86 #elif ARCH_X86
ff_flacdsp_init_x86(c, fmt, channels); ff_flacdsp_init_x86(c, fmt, channels);
#endif #endif
......
...@@ -38,6 +38,7 @@ typedef struct FLACDSPContext { ...@@ -38,6 +38,7 @@ typedef struct FLACDSPContext {
void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); void ff_flacdsp_init(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); void ff_flacdsp_init_arm(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels); void ff_flacdsp_init_x86(FLACDSPContext *c, enum AVSampleFormat fmt, int channels);
#endif /* AVCODEC_FLACDSP_H */ #endif /* AVCODEC_FLACDSP_H */
...@@ -12,6 +12,8 @@ OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_init.o \ ...@@ -12,6 +12,8 @@ OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_init.o \
RVV-OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_rvv.o RVV-OBJS-$(CONFIG_BSWAPDSP) += riscv/bswapdsp_rvv.o
OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_init.o OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_init.o
RVV-OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_rvv.o RVV-OBJS-$(CONFIG_EXR_DECODER) += riscv/exrdsp_rvv.o
OBJS-$(CONFIG_FLAC_DECODER) += riscv/flacdsp_init.o
RVV-OBJS-$(CONFIG_FLAC_DECODER) += riscv/flacdsp_rvv.o
OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_init.o OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_init.o
RVV-OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_rvv.o RVV-OBJS-$(CONFIG_FMTCONVERT) += riscv/fmtconvert_rvv.o
OBJS-$(CONFIG_G722DSP) += riscv/g722dsp_init.o OBJS-$(CONFIG_G722DSP) += riscv/g722dsp_init.o
......
/*
* Copyright © 2023 Rémi Denis-Courmont.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavcodec/flacdsp.h"
void ff_flac_decorrelate_ls_16_rvv(uint8_t **out, int32_t **in,
int channels, int len, int shift);
void ff_flac_decorrelate_rs_16_rvv(uint8_t **out, int32_t **in,
int channels, int len, int shift);
void ff_flac_decorrelate_ls_32_rvv(uint8_t **out, int32_t **in,
int channels, int len, int shift);
void ff_flac_decorrelate_rs_32_rvv(uint8_t **out, int32_t **in,
int channels, int len, int shift);
av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt,
int channels)
{
#if HAVE_RVV && (__riscv_xlen >= 64)
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
switch (fmt) {
case AV_SAMPLE_FMT_S16:
c->decorrelate[1] = ff_flac_decorrelate_ls_16_rvv;
c->decorrelate[2] = ff_flac_decorrelate_rs_16_rvv;
break;
case AV_SAMPLE_FMT_S32:
c->decorrelate[1] = ff_flac_decorrelate_ls_32_rvv;
c->decorrelate[2] = ff_flac_decorrelate_rs_32_rvv;
break;
}
}
#endif
}
/*
* Copyright © 2023 Rémi Denis-Courmont.
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/riscv/asm.S"
#if (__riscv_xlen == 64)
func ff_flac_decorrelate_ls_16_rvv, zve32x
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
1:
vsetvli t0, a3, e32, m8, ta, ma
vle32.v v0, (a1)
sub a3, a3, t0
vle32.v v8, (a2)
sh2add a1, t0, a1
vsll.vx v0, v0, a4
sh2add a2, t0, a2
vsll.vx v8, v8, a4
vsub.vv v8, v0, v8
vsetvli zero, zero, e16, m4, ta, ma
vncvt.x.x.w v16, v0
vncvt.x.x.w v20, v8
vsseg2e16.v v16, (a0)
sh2add a0, t0, a0
bnez a3, 1b
ret
endfunc
func ff_flac_decorrelate_rs_16_rvv, zve32x
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
1:
vsetvli t0, a3, e32, m8, ta, ma
vle32.v v0, (a1)
sub a3, a3, t0
vle32.v v8, (a2)
sh2add a1, t0, a1
vsll.vx v0, v0, a4
sh2add a2, t0, a2
vsll.vx v8, v8, a4
vadd.vv v0, v0, v8
vsetvli zero, zero, e16, m4, ta, ma
vncvt.x.x.w v16, v0
vncvt.x.x.w v20, v8
vsseg2e16.v v16, (a0)
sh2add a0, t0, a0
bnez a3, 1b
ret
endfunc
func ff_flac_decorrelate_ls_32_rvv, zve32x
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
1:
vsetvli t0, a3, e32, m4, ta, ma
vle32.v v0, (a1)
sub a3, a3, t0
vle32.v v4, (a2)
sh2add a1, t0, a1
vsll.vx v0, v0, a4
sh2add a2, t0, a2
vsll.vx v4, v4, a4
vsub.vv v4, v0, v4
vsseg2e32.v v0, (a0)
sh3add a0, t0, a0
bnez a3, 1b
ret
endfunc
func ff_flac_decorrelate_rs_32_rvv, zve32x
ld a0, (a0)
ld a2, 8(a1)
ld a1, (a1)
1:
vsetvli t0, a3, e32, m4, ta, ma
vle32.v v0, (a1)
sub a3, a3, t0
vle32.v v4, (a2)
sh2add a1, t0, a1
vsll.vx v0, v0, a4
sh2add a2, t0, a2
vsll.vx v4, v4, a4
vadd.vv v0, v0, v4
vsseg2e32.v v0, (a0)
sh3add a0, t0, a0
bnez a3, 1b
ret
endfunc
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment