Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Stefan Westerfeld
ffmpeg
Commits
186bd30a
Commit
186bd30a
authored
Feb 27, 2019
by
Janne Grunau
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
h264/arm64: implement missing 4:2:2 chroma loop filter neon functions
parent
7e42d5f0
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
46 additions
and
8 deletions
+46
-8
h264dsp_init_aarch64.c
libavcodec/aarch64/h264dsp_init_aarch64.c
+15
-3
h264dsp_neon.S
libavcodec/aarch64/h264dsp_neon.S
+31
-5
No files found.
libavcodec/aarch64/h264dsp_init_aarch64.c
View file @
186bd30a
...
...
@@ -37,10 +37,14 @@ void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
int
beta
,
int8_t
*
tc0
);
void
ff_h264_h_loop_filter_chroma_neon
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
);
void
ff_h264_h_loop_filter_chroma422_neon
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
);
void
ff_h264_v_loop_filter_chroma_intra_neon
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
);
void
ff_h264_h_loop_filter_chroma_intra_neon
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
);
void
ff_h264_h_loop_filter_chroma422_intra_neon
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
);
void
ff_h264_h_loop_filter_chroma_mbaff_intra_neon
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
);
...
...
@@ -91,10 +95,18 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth,
c
->
h264_h_loop_filter_luma_intra
=
ff_h264_h_loop_filter_luma_intra_neon
;
c
->
h264_v_loop_filter_chroma
=
ff_h264_v_loop_filter_chroma_neon
;
c
->
h264_h_loop_filter_chroma
=
ff_h264_h_loop_filter_chroma_neon
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_h264_v_loop_filter_chroma_intra_neon
;
if
(
chroma_format_idc
<=
1
)
{
c
->
h264_h_loop_filter_chroma
=
ff_h264_h_loop_filter_chroma_neon
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_h264_h_loop_filter_chroma_intra_neon
;
c
->
h264_h_loop_filter_chroma_mbaff_intra
=
ff_h264_h_loop_filter_chroma_mbaff_intra_neon
;
}
else
{
c
->
h264_h_loop_filter_chroma
=
ff_h264_h_loop_filter_chroma422_neon
;
c
->
h264_h_loop_filter_chroma_mbaff
=
ff_h264_h_loop_filter_chroma_neon
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_h264_h_loop_filter_chroma422_intra_neon
;
c
->
h264_h_loop_filter_chroma_mbaff_intra
=
ff_h264_h_loop_filter_chroma_intra_neon
;
}
c
->
weight_h264_pixels_tab
[
0
]
=
ff_weight_h264_pixels_16_neon
;
c
->
weight_h264_pixels_tab
[
1
]
=
ff_weight_h264_pixels_8_neon
;
...
...
libavcodec/aarch64/h264dsp_neon.S
View file @
186bd30a
...
...
@@ -28,9 +28,9 @@
ldr w6, [x4]
ccmp w3, #0, #0, ne
mov v24.S[0], w6
and w
6
, w6, w6, lsl #16
and w
8
, w6, w6, lsl #16
b.eq 1f
ands w
6, w6, w6
, lsl #8
ands w
8, w8, w8
, lsl #8
b.ge 2f
1:
ret
...
...
@@ -394,10 +394,10 @@ endfunc
usubw v4.8H, v4.8H, v16.8B
and v26.8B, v26.8B, v30.8B
shl v4.8H, v4.8H, #2
mov x
2
, v26.d[0]
mov x
8
, v26.d[0]
sli v24.8H, v24.8H, #8
uaddw v4.8H, v4.8H, v18.8B
cbz x
2
, 9f
cbz x
8
, 9f
usubw v4.8H, v4.8H, v2.8B
rshrn v4.8B, v4.8H, #3
smin v4.8B, v4.8B, v24.8B
...
...
@@ -436,6 +436,7 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
sxtw x1, w1
sub x0, x0, #2
h_loop_filter_chroma420:
ld1 {v18.S}[0], [x0], x1
ld1 {v16.S}[0], [x0], x1
ld1 {v0.S}[0], [x0], x1
...
...
@@ -464,6 +465,19 @@ function ff_h264_h_loop_filter_chroma_neon, export=1
ret
endfunc
function ff_h264_h_loop_filter_chroma422_neon, export=1
sxtw x1, w1
h264_loop_filter_start
add x5, x0, x1
sub x0, x0, #2
add x1, x1, x1
mov x7, x30
bl h_loop_filter_chroma420
mov x30, x7
sub x0, x5, #2
mov v24.s[0], w6
b h_loop_filter_chroma420
endfunc
.macro h264_loop_filter_chroma_intra
uabd v26.8b, v16.8b, v17.8b // abs(p0 - q0)
...
...
@@ -536,6 +550,7 @@ function ff_h264_h_loop_filter_chroma_intra_neon, export=1
sub x4, x0, #2
sub x0, x0, #1
h_loop_filter_chroma420_intra:
ld1 {v18.8b}, [x4], x1
ld1 {v16.8b}, [x4], x1
ld1 {v17.8b}, [x4], x1
...
...
@@ -543,7 +558,7 @@ function ff_h264_h_loop_filter_chroma_intra_neon, export=1
ld1 {v18.s}[1], [x4], x1
ld1 {v16.s}[1], [x4], x1
ld1 {v17.s}[1], [x4], x1
ld1 {v19.s}[1], [x4]
ld1 {v19.s}[1], [x4]
, x1
transpose_4x8B v18, v16, v17, v19, v26, v27, v28, v29
...
...
@@ -562,6 +577,17 @@ function ff_h264_h_loop_filter_chroma_intra_neon, export=1
ret
endfunc
function ff_h264_h_loop_filter_chroma422_intra_neon, export=1
h264_loop_filter_start_intra
sub x4, x0, #2
add x5, x0, x1, lsl #3
sub x0, x0, #1
mov x7, x30
bl h_loop_filter_chroma420_intra
sub x0, x5, #1
mov x30, x7
b h_loop_filter_chroma420_intra
endfunc
.macro biweight_16 macs, macd
dup v0.16B, w5
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment