Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Stefan Westerfeld
ffmpeg
Commits
838abfc1
Commit
838abfc1
authored
Jan 31, 2016
by
Timothy Gu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: vc1dsp: Convert vc1_inv_trans_*_dc to NASM format
parent
b62825a4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
111 additions
and
207 deletions
+111
-207
vc1dsp.asm
libavcodec/x86/vc1dsp.asm
+98
-0
vc1dsp_init.c
libavcodec/x86/vc1dsp_init.c
+13
-0
vc1dsp_mmx.c
libavcodec/x86/vc1dsp_mmx.c
+0
-207
No files found.
libavcodec/x86/vc1dsp.asm
View file @
838abfc1
...
...
@@ -395,3 +395,101 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
jnz
.
loop
REP_RET
%endif
; HAVE_MMX_INLINE
%macro
INV_TRANS_INIT
0
movsxdifnidn
linesizeq
,
linesized
movd
m0
,
blockd
SPLATW
m0
,
m0
pxor
m1
,
m1
psubw
m1
,
m0
packuswb
m0
,
m0
packuswb
m1
,
m1
DEFINE_ARGS
dest
,
linesize
,
linesize3
lea
linesize3q
,
[
linesizeq
*
3
]
%endmacro
%macro
INV_TRANS_PROCESS
1
mov%1
m2
,
[
destq
+
linesizeq
*
0
]
mov%1
m3
,
[
destq
+
linesizeq
*
1
]
mov%1
m4
,
[
destq
+
linesizeq
*
2
]
mov%1
m5
,
[
destq
+
linesize3q
]
paddusb
m2
,
m0
paddusb
m3
,
m0
paddusb
m4
,
m0
paddusb
m5
,
m0
psubusb
m2
,
m1
psubusb
m3
,
m1
psubusb
m4
,
m1
psubusb
m5
,
m1
mov%1
[
linesizeq
*
0
+
destq
]
,
m2
mov%1
[
linesizeq
*
1
+
destq
]
,
m3
mov%1
[
linesizeq
*
2
+
destq
]
,
m4
mov%1
[
linesize3q
+
destq
]
,
m5
%endmacro
; ff_vc1_inv_trans_?x?_dc_mmxext(uint8_t *dest, int linesize, int16_t *block)
INIT_MMX
mmxext
cglobal
vc1_inv_trans_4x4_dc
,
3
,
4
,
0
,
dest
,
linesize
,
block
movsx
r3d
,
WORD
[blockq]
mov
blockd
,
r3d
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
4
]
; 17 * dc + 4
sar
blockd
,
3
; >> 3
mov
r3d
,
blockd
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
64
]
; 17 * dc + 64
sar
blockd
,
7
; >> 7
INV_TRANS_INIT
INV_TRANS_PROCESS
h
RET
INIT_MMX
mmxext
cglobal
vc1_inv_trans_4x8_dc
,
3
,
4
,
0
,
dest
,
linesize
,
block
movsx
r3d
,
WORD
[blockq]
mov
blockd
,
r3d
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
4
]
; 17 * dc + 4
sar
blockd
,
3
; >> 3
shl
blockd
,
2
; 4 * dc
lea
blockd
,
[
blockq
*
3
+
64
]
; 12 * dc + 64
sar
blockd
,
7
; >> 7
INV_TRANS_INIT
INV_TRANS_PROCESS
h
lea
destq
,
[
destq
+
linesizeq
*
4
]
INV_TRANS_PROCESS
h
RET
INIT_MMX
mmxext
cglobal
vc1_inv_trans_8x4_dc
,
3
,
4
,
0
,
dest
,
linesize
,
block
movsx
blockd
,
WORD
[blockq]
; dc
lea
blockd
,
[
blockq
*
3
+
1
]
; 3 * dc + 1
sar
blockd
,
1
; >> 1
mov
r3d
,
blockd
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
64
]
; 17 * dc + 64
sar
blockd
,
7
; >> 7
INV_TRANS_INIT
INV_TRANS_PROCESS
a
RET
INIT_MMX
mmxext
cglobal
vc1_inv_trans_8x8_dc
,
3
,
3
,
0
,
dest
,
linesize
,
block
movsx
blockd
,
WORD
[blockq]
; dc
lea
blockd
,
[
blockq
*
3
+
1
]
; 3 * dc + 1
sar
blockd
,
1
; >> 1
lea
blockd
,
[
blockq
*
3
+
16
]
; 3 * dc + 16
sar
blockd
,
5
; >> 5
INV_TRANS_INIT
INV_TRANS_PROCESS
a
lea
destq
,
[
destq
+
linesizeq
*
4
]
INV_TRANS_PROCESS
a
RET
libavcodec/x86/vc1dsp_init.c
View file @
838abfc1
...
...
@@ -92,6 +92,14 @@ void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_vc1_inv_trans_4x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
void
ff_vc1_inv_trans_4x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
void
ff_vc1_inv_trans_8x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
void
ff_vc1_inv_trans_8x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
av_cold
void
ff_vc1dsp_init_x86
(
VC1DSPContext
*
dsp
)
...
...
@@ -130,6 +138,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
dsp
->
avg_vc1_mspel_pixels_tab
[
1
][
0
]
=
avg_vc1_mspel_mc00_8_mmxext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
0
][
0
]
=
avg_vc1_mspel_mc00_16_mmxext
;
dsp
->
vc1_inv_trans_8x8_dc
=
ff_vc1_inv_trans_8x8_dc_mmxext
;
dsp
->
vc1_inv_trans_4x8_dc
=
ff_vc1_inv_trans_4x8_dc_mmxext
;
dsp
->
vc1_inv_trans_8x4_dc
=
ff_vc1_inv_trans_8x4_dc_mmxext
;
dsp
->
vc1_inv_trans_4x4_dc
=
ff_vc1_inv_trans_4x4_dc_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
dsp
->
vc1_v_loop_filter8
=
ff_vc1_v_loop_filter8_sse2
;
...
...
libavcodec/x86/vc1dsp_mmx.c
View file @
838abfc1
...
...
@@ -481,208 +481,6 @@ DECLARE_FUNCTION(3, 1)
DECLARE_FUNCTION
(
3
,
2
)
DECLARE_FUNCTION
(
3
,
3
)
static
void
vc1_inv_trans_4x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
17
*
dc
+
4
)
>>
3
;
dc
=
(
17
*
dc
+
64
)
>>
7
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
static
void
vc1_inv_trans_4x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
17
*
dc
+
4
)
>>
3
;
dc
=
(
12
*
dc
+
64
)
>>
7
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
dest
+=
4
*
linesize
;
__asm__
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
static
void
vc1_inv_trans_8x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
3
*
dc
+
1
)
>>
1
;
dc
=
(
17
*
dc
+
64
)
>>
7
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
static
void
vc1_inv_trans_8x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
3
*
dc
+
1
)
>>
1
;
dc
=
(
3
*
dc
+
16
)
>>
5
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
dest
+=
4
*
linesize
;
__asm__
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
#define FN_ASSIGN(OP, X, Y, INSN) \
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
...
...
@@ -729,10 +527,5 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
FN_ASSIGN
(
avg_
,
3
,
1
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
2
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
3
,
_mmxext
);
dsp
->
vc1_inv_trans_8x8_dc
=
vc1_inv_trans_8x8_dc_mmxext
;
dsp
->
vc1_inv_trans_4x8_dc
=
vc1_inv_trans_4x8_dc_mmxext
;
dsp
->
vc1_inv_trans_8x4_dc
=
vc1_inv_trans_8x4_dc_mmxext
;
dsp
->
vc1_inv_trans_4x4_dc
=
vc1_inv_trans_4x4_dc_mmxext
;
}
#endif
/* HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment