Commit 9238f6cb authored by Rémi Denis-Courmont's avatar Rémi Denis-Courmont

lavu/float_dsp: R-V V scalarproduct_double

C908:
scalarproduct_double_c:       39.2
scalarproduct_double_rvv_f64: 10.5

X60:
scalarproduct_double_c:       35.0
scalarproduct_double_rvv_f64:  5.2
parent b1149520
......@@ -46,6 +46,8 @@ void ff_vector_dmac_scalar_rvv(double *dst, const double *src, double mul,
int len);
void ff_vector_dmul_scalar_rvv(double *dst, const double *src, double mul,
int len);
double ff_scalarproduct_double_rvv(const double *v1, const double *v2,
size_t len);
av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
{
......@@ -68,6 +70,7 @@ av_cold void ff_float_dsp_init_riscv(AVFloatDSPContext *fdsp)
fdsp->vector_dmul = ff_vector_dmul_rvv;
fdsp->vector_dmac_scalar = ff_vector_dmac_scalar_rvv;
fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_rvv;
fdsp->scalarproduct_double = ff_scalarproduct_double_rvv;
}
}
#endif
......
......@@ -249,3 +249,24 @@ NOHWD mv a2, a3
ret
endfunc
func ff_scalarproduct_double_rvv, zve64f
vsetvli t0, zero, e64, m8, ta, ma
vmv.v.x v8, zero
vmv.s.x v0, zero
1:
vsetvli t0, a2, e64, m8, tu, ma
vle64.v v16, (a0)
sub a2, a2, t0
vle64.v v24, (a1)
sh3add a0, t0, a0
vfmacc.vv v8, v16, v24
sh3add a1, t0, a1
bnez a2, 1b
vsetvli t0, zero, e64, m8, ta, ma
vfredusum.vs v0, v8, v0
vfmv.f.s fa0, v0
NOHWD fmv.x.w a0, fa0
ret
endfunc
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment