Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Stefan Westerfeld
ffmpeg
Commits
296609f8
Commit
296609f8
authored
Oct 11, 2018
by
Martin Vignali
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale/x86/rgb2rgb : port shuffle 2103 mmxext to external asm and remove inline asm version
parent
04afdbb5
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
67 additions
and
48 deletions
+67
-48
rgb2rgb.c
libswscale/x86/rgb2rgb.c
+4
-0
rgb2rgb_template.c
libswscale/x86/rgb2rgb_template.c
+0
-48
rgb_2_rgb.asm
libswscale/x86/rgb_2_rgb.asm
+63
-0
No files found.
libswscale/x86/rgb2rgb.c
View file @
296609f8
...
...
@@ -144,6 +144,7 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset);
#endif
/* HAVE_INLINE_ASM */
void
ff_shuffle_bytes_2103_mmxext
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_2103_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_0321_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
void
ff_shuffle_bytes_1230_ssse3
(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
);
...
...
@@ -176,6 +177,9 @@ av_cold void rgb2rgb_init_x86(void)
rgb2rgb_init_avx
();
#endif
/* HAVE_INLINE_ASM */
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
shuffle_bytes_2103
=
ff_shuffle_bytes_2103_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
#if ARCH_X86_64
uyvytoyuv422
=
ff_uyvytoyuv422_sse2
;
...
...
libswscale/x86/rgb2rgb_template.c
View file @
296609f8
...
...
@@ -1034,51 +1034,6 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
}
}
#if COMPILE_TEMPLATE_MMXEXT
static
inline
void
RENAME
(
shuffle_bytes_2103
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
)
{
x86_reg
idx
=
15
-
src_size
;
const
uint8_t
*
s
=
src
-
idx
;
uint8_t
*
d
=
dst
-
idx
;
__asm__
volatile
(
"test %0, %0
\n\t
"
"jns 2f
\n\t
"
PREFETCH
" (%1, %0)
\n\t
"
"movq %3, %%mm7
\n\t
"
"pxor %4, %%mm7
\n\t
"
"movq %%mm7, %%mm6
\n\t
"
"pxor %5, %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1, %0)
\n\t
"
"movq (%1, %0), %%mm0
\n\t
"
"movq 8(%1, %0), %%mm1
\n\t
"
"pshufw $177, %%mm0, %%mm3
\n\t
"
"pshufw $177, %%mm1, %%mm5
\n\t
"
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm6, %%mm3
\n\t
"
"pand %%mm7, %%mm1
\n\t
"
"pand %%mm6, %%mm5
\n\t
"
"por %%mm3, %%mm0
\n\t
"
"por %%mm5, %%mm1
\n\t
"
MOVNTQ
" %%mm0, (%2, %0)
\n\t
"
MOVNTQ
" %%mm1, 8(%2, %0)
\n\t
"
"add $16, %0
\n\t
"
"js 1b
\n\t
"
SFENCE
"
\n\t
"
EMMS
"
\n\t
"
"2:
\n\t
"
:
"+&r"
(
idx
)
:
"r"
(
s
),
"r"
(
d
),
"m"
(
mask32b
),
"m"
(
mask32r
),
"m"
(
mmx_one
)
:
"memory"
);
for
(;
idx
<
15
;
idx
+=
4
)
{
register
unsigned
v
=
*
(
const
uint32_t
*
)
&
s
[
idx
],
g
=
v
&
0xff00ff00
;
v
&=
0xff00ff
;
*
(
uint32_t
*
)
&
d
[
idx
]
=
(
v
>>
16
)
+
g
+
(
v
<<
16
);
}
}
#endif
static
inline
void
RENAME
(
rgb24tobgr24
)(
const
uint8_t
*
src
,
uint8_t
*
dst
,
int
src_size
)
{
unsigned
i
;
...
...
@@ -2555,9 +2510,6 @@ static av_cold void RENAME(rgb2rgb_init)(void)
rgb24to15
=
RENAME
(
rgb24to15
);
rgb24to16
=
RENAME
(
rgb24to16
);
rgb24tobgr24
=
RENAME
(
rgb24tobgr24
);
#if COMPILE_TEMPLATE_MMXEXT
shuffle_bytes_2103
=
RENAME
(
shuffle_bytes_2103
);
#endif
rgb32tobgr16
=
RENAME
(
rgb32tobgr16
);
rgb32tobgr15
=
RENAME
(
rgb32tobgr15
);
yv12toyuy2
=
RENAME
(
yv12toyuy2
);
...
...
libswscale/x86/rgb_2_rgb.asm
View file @
296609f8
...
...
@@ -24,6 +24,7 @@
SECTION_RODATA
pb_mask_shuffle2103_mmx
times
8
dw
255
pb_shuffle2103
:
db
2
,
1
,
0
,
3
,
6
,
5
,
4
,
7
,
10
,
9
,
8
,
11
,
14
,
13
,
12
,
15
pb_shuffle0321
:
db
0
,
3
,
2
,
1
,
4
,
7
,
6
,
5
,
8
,
11
,
10
,
9
,
12
,
15
,
14
,
13
pb_shuffle1230
:
db
1
,
2
,
3
,
0
,
5
,
6
,
7
,
4
,
9
,
10
,
11
,
8
,
13
,
14
,
15
,
12
...
...
@@ -42,6 +43,68 @@ SECTION .text
%endif
%endmacro
;------------------------------------------------------------------------------
; shuffle_bytes_2103_mmext (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
INIT_MMX
mmxext
cglobal
shuffle_bytes_2103
,
3
,
5
,
8
,
src
,
dst
,
w
,
tmp
,
x
mova
m6
,
[
pb_mask_shuffle2103_mmx
]
mova
m7
,
m6
psllq
m7
,
8
movsxdifnidn
wq
,
wd
mov
xq
,
wq
add
srcq
,
wq
add
dstq
,
wq
neg
wq
;calc scalar loop
and
xq
,
mmsize
*
2
-
4
je
.
loop_simd
.
loop_scalar
:
mov
tmpb
,
[
srcq
+
wq
+
2
]
mov
[
dstq
+
wq
+
0
]
,
tmpb
mov
tmpb
,
[
srcq
+
wq
+
1
]
mov
[
dstq
+
wq
+
1
]
,
tmpb
mov
tmpb
,
[
srcq
+
wq
+
0
]
mov
[
dstq
+
wq
+
2
]
,
tmpb
mov
tmpb
,
[
srcq
+
wq
+
3
]
mov
[
dstq
+
wq
+
3
]
,
tmpb
add
wq
,
4
sub
xq
,
4
jg
.
loop_scalar
;check if src_size < mmsize * 2
cmp
wq
,
0
jge
.
end
.
loop_simd
:
movu
m0
,
[
srcq
+
wq
]
movu
m1
,
[
srcq
+
wq
+
8
]
pshufw
m3
,
m0
,
177
pshufw
m5
,
m1
,
177
pand
m0
,
m7
pand
m3
,
m6
pand
m1
,
m7
pand
m5
,
m6
por
m0
,
m3
por
m1
,
m5
movu
[
dstq
+
wq
]
,
m0
movu
[
dstq
+
wq
+
8
]
,
m1
add
wq
,
mmsize
*
2
jl
.
loop_simd
.
end
:
RET
;------------------------------------------------------------------------------
; shuffle_bytes_## (const uint8_t *src, uint8_t *dst, int src_size)
;------------------------------------------------------------------------------
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment