Commit 5402c188 authored by James Almer's avatar James Almer

x86/af_afir: add ff_fcmul_add_avx()

fcmul_add_c: 1228.8
fcmul_add_sse3: 334.3
fcmul_add_avx: 186.3

Tested on a Core i5 4460 @ 3.2GHz
Reviewed-by: 's avatarPaul B Mahol <onemda@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent ba89dc27
......@@ -27,7 +27,7 @@ SECTION .text
; void ff_fcmul_add(float *sum, const float *t, const float *c, int len)
;------------------------------------------------------------------------------
INIT_XMM sse3
%macro FCMUL_ADD 0
cglobal fcmul_add, 4,4,6, sum, t, c, len
shl lend, 3
add tq, lenq
......@@ -61,3 +61,9 @@ ALIGN 16
addss xm0, [sumq + lenq]
movss [sumq + lenq], xm0
RET
%endmacro
INIT_XMM sse3
FCMUL_ADD
INIT_YMM avx
FCMUL_ADD
......@@ -24,6 +24,8 @@
void ff_fcmul_add_sse3(float *sum, const float *t, const float *c,
ptrdiff_t len);
void ff_fcmul_add_avx(float *sum, const float *t, const float *c,
ptrdiff_t len);
av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
{
......@@ -32,4 +34,7 @@ av_cold void ff_afir_init_x86(AudioFIRDSPContext *s)
if (EXTERNAL_SSE3(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_sse3;
}
if (EXTERNAL_AVX_FAST(cpu_flags)) {
s->fcmul_add = ff_fcmul_add_avx;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment