Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Stefan Westerfeld
ffmpeg
Commits
047c362d
Commit
047c362d
authored
Oct 24, 2021
by
Paul B Mahol
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avfilter/vf_nlmeans: add x86 SIMD
parent
aebdffb9
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
146 additions
and
3 deletions
+146
-3
vf_nlmeans.c
libavfilter/vf_nlmeans.c
+6
-3
vf_nlmeans.h
libavfilter/vf_nlmeans.h
+1
-0
Makefile
libavfilter/x86/Makefile
+2
-0
vf_nlmeans.asm
libavfilter/x86/vf_nlmeans.asm
+97
-0
vf_nlmeans_init.c
libavfilter/x86/vf_nlmeans_init.c
+40
-0
No files found.
libavfilter/vf_nlmeans.c
View file @
047c362d
...
@@ -308,9 +308,9 @@ static int config_input(AVFilterLink *inlink)
...
@@ -308,9 +308,9 @@ static int config_input(AVFilterLink *inlink)
s
->
ii
=
s
->
ii_orig
+
s
->
ii_lz_32
+
1
;
s
->
ii
=
s
->
ii_orig
+
s
->
ii_lz_32
+
1
;
// allocate weighted average for every pixel
// allocate weighted average for every pixel
s
->
linesize
=
inlink
->
w
;
s
->
linesize
=
inlink
->
w
+
100
;
s
->
total_weight
=
av_malloc_array
(
inlink
->
w
,
inlink
->
h
*
sizeof
(
*
s
->
total_weight
));
s
->
total_weight
=
av_malloc_array
(
s
->
linesize
,
inlink
->
h
*
sizeof
(
*
s
->
total_weight
));
s
->
sum
=
av_malloc_array
(
inlink
->
w
,
inlink
->
h
*
sizeof
(
*
s
->
sum
));
s
->
sum
=
av_malloc_array
(
s
->
linesize
,
inlink
->
h
*
sizeof
(
*
s
->
sum
));
if
(
!
s
->
total_weight
||
!
s
->
sum
)
if
(
!
s
->
total_weight
||
!
s
->
sum
)
return
AVERROR
(
ENOMEM
);
return
AVERROR
(
ENOMEM
);
...
@@ -519,6 +519,9 @@ void ff_nlmeans_init(NLMeansDSPContext *dsp)
...
@@ -519,6 +519,9 @@ void ff_nlmeans_init(NLMeansDSPContext *dsp)
if
(
ARCH_AARCH64
)
if
(
ARCH_AARCH64
)
ff_nlmeans_init_aarch64
(
dsp
);
ff_nlmeans_init_aarch64
(
dsp
);
if
(
ARCH_X86
)
ff_nlmeans_init_x86
(
dsp
);
}
}
static
av_cold
int
init
(
AVFilterContext
*
ctx
)
static
av_cold
int
init
(
AVFilterContext
*
ctx
)
...
...
libavfilter/vf_nlmeans.h
View file @
047c362d
...
@@ -41,5 +41,6 @@ typedef struct NLMeansDSPContext {
...
@@ -41,5 +41,6 @@ typedef struct NLMeansDSPContext {
void
ff_nlmeans_init
(
NLMeansDSPContext
*
dsp
);
void
ff_nlmeans_init
(
NLMeansDSPContext
*
dsp
);
void
ff_nlmeans_init_aarch64
(
NLMeansDSPContext
*
dsp
);
void
ff_nlmeans_init_aarch64
(
NLMeansDSPContext
*
dsp
);
void
ff_nlmeans_init_x86
(
NLMeansDSPContext
*
dsp
);
#endif
/* AVFILTER_NLMEANS_H */
#endif
/* AVFILTER_NLMEANS_H */
libavfilter/x86/Makefile
View file @
047c362d
...
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
...
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter_init.o
OBJS-$(CONFIG_LUT3D_FILTER)
+=
x86/vf_lut3d_init.o
OBJS-$(CONFIG_LUT3D_FILTER)
+=
x86/vf_lut3d_init.o
OBJS-$(CONFIG_MASKEDCLAMP_FILTER)
+=
x86/vf_maskedclamp_init.o
OBJS-$(CONFIG_MASKEDCLAMP_FILTER)
+=
x86/vf_maskedclamp_init.o
OBJS-$(CONFIG_MASKEDMERGE_FILTER)
+=
x86/vf_maskedmerge_init.o
OBJS-$(CONFIG_MASKEDMERGE_FILTER)
+=
x86/vf_maskedmerge_init.o
OBJS-$(CONFIG_NLMEANS_FILTER)
+=
x86/vf_nlmeans_init.o
OBJS-$(CONFIG_NOISE_FILTER)
+=
x86/vf_noise.o
OBJS-$(CONFIG_NOISE_FILTER)
+=
x86/vf_noise.o
OBJS-$(CONFIG_OVERLAY_FILTER)
+=
x86/vf_overlay_init.o
OBJS-$(CONFIG_OVERLAY_FILTER)
+=
x86/vf_overlay_init.o
OBJS-$(CONFIG_PP7_FILTER)
+=
x86/vf_pp7_init.o
OBJS-$(CONFIG_PP7_FILTER)
+=
x86/vf_pp7_init.o
...
@@ -61,6 +62,7 @@ X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
...
@@ -61,6 +62,7 @@ X86ASM-OBJS-$(CONFIG_LIMITER_FILTER) += x86/vf_limiter.o
X86ASM-OBJS-$(CONFIG_LUT3D_FILTER)
+=
x86/vf_lut3d.o
X86ASM-OBJS-$(CONFIG_LUT3D_FILTER)
+=
x86/vf_lut3d.o
X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER)
+=
x86/vf_maskedclamp.o
X86ASM-OBJS-$(CONFIG_MASKEDCLAMP_FILTER)
+=
x86/vf_maskedclamp.o
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER)
+=
x86/vf_maskedmerge.o
X86ASM-OBJS-$(CONFIG_MASKEDMERGE_FILTER)
+=
x86/vf_maskedmerge.o
X86ASM-OBJS-$(CONFIG_NLMEANS_FILTER)
+=
x86/vf_nlmeans.o
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER)
+=
x86/vf_overlay.o
X86ASM-OBJS-$(CONFIG_OVERLAY_FILTER)
+=
x86/vf_overlay.o
X86ASM-OBJS-$(CONFIG_PP7_FILTER)
+=
x86/vf_pp7.o
X86ASM-OBJS-$(CONFIG_PP7_FILTER)
+=
x86/vf_pp7.o
X86ASM-OBJS-$(CONFIG_PSNR_FILTER)
+=
x86/vf_psnr.o
X86ASM-OBJS-$(CONFIG_PSNR_FILTER)
+=
x86/vf_psnr.o
...
...
libavfilter/x86/vf_nlmeans.asm
0 → 100644
View file @
047c362d
;*****************************************************************************
;* x86-optimized functions for nlmeans filter
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
%include
"libavutil/x86/x86util.asm"
%if
HAVE_AVX2_EXTERNAL
&&
ARCH_X86_64
SECTION_RODATA
32
ending_lut
:
dd
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
\
0
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
\
0
,
0
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
\
0
,
0
,
0
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
\
0
,
0
,
0
,
0
,
-
1
,
-
1
,
-
1
,
-
1
,
\
0
,
0
,
0
,
0
,
0
,
-
1
,
-
1
,
-
1
,
\
0
,
0
,
0
,
0
,
0
,
0
,
-
1
,
-
1
,
\
0
,
0
,
0
,
0
,
0
,
0
,
0
,
-
1
,
\
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
SECTION
.
text
; void ff_compute_weights_line(const uint32_t *const iia,
; const uint32_t *const iib,
; const uint32_t *const iid,
; const uint32_t *const iie,
; const uint8_t *const src,
; float *total,
; float *sum,
; const float *const lut,
; int max,
; int startx, int endx);
INIT_YMM
avx2
cglobal
compute_weights_line
,
8
,
13
,
5
,
0
,
iia
,
iib
,
iid
,
iie
,
src
,
total
,
sum
,
lut
,
x
,
startx
,
endx
,
mod
,
elut
movsxd
startxq
,
dword
startxm
movsxd
endxq
,
dword
endxm
VPBROADCASTD
m2
,
r8m
mov
xq
,
startxq
mov
modq
,
mmsize
/
4
lea
elutq
,
[
ending_lut
]
vpcmpeqd
m4
,
m4
.
loop
:
mov
startxq
,
endxq
sub
startxq
,
xq
cmp
startxq
,
modq
cmovge
startxq
,
modq
sal
startxq
,
5
movu
m0
,
[
iieq
+
xq
*
4
]
psubd
m0
,
[
iidq
+
xq
*
4
]
psubd
m0
,
[
iibq
+
xq
*
4
]
paddd
m0
,
[
iiaq
+
xq
*
4
]
por
m0
,
[
elutq
+
startxq
]
pminud
m0
,
m2
pslld
m0
,
2
mova
m3
,
m4
vgatherdps
m1
,
[
lutq
+
m0
]
,
m3
pmovzxbd
m0
,
[
srcq
+
xq
]
cvtdq2ps
m0
,
m0
mulps
m0
,
m1
addps
m1
,
[
totalq
+
xq
*
4
]
addps
m0
,
[
sumq
+
xq
*
4
]
movups
[
totalq
+
xq
*
4
]
,
m1
movups
[
sumq
+
xq
*
4
]
,
m0
add
xq
,
mmsize
/
4
cmp
xq
,
endxq
jl
.
loop
RET
%endif
libavfilter/x86/vf_nlmeans_init.c
0 → 100644
View file @
047c362d
/*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/x86/cpu.h"
#include "libavfilter/vf_nlmeans.h"
void
ff_compute_weights_line_avx2
(
const
uint32_t
*
const
iia
,
const
uint32_t
*
const
iib
,
const
uint32_t
*
const
iid
,
const
uint32_t
*
const
iie
,
const
uint8_t
*
const
src
,
float
*
total_weight
,
float
*
sum
,
const
float
*
const
weight_lut
,
int
max_meaningful_diff
,
int
startx
,
int
endx
);
av_cold
void
ff_nlmeans_init_x86
(
NLMeansDSPContext
*
dsp
)
{
int
cpu_flags
=
av_get_cpu_flags
();
if
(
ARCH_X86_64
&&
EXTERNAL_AVX2_FAST
(
cpu_flags
))
dsp
->
compute_weights_line
=
ff_compute_weights_line_avx2
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment