Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
F
ffmpeg
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Stefan Westerfeld
ffmpeg
Commits
47f21232
Commit
47f21232
authored
Jan 08, 2017
by
James Almer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
huffyuvdsp: move functions only used by huffyuv from lossless_videodsp
Signed-off-by:
James Almer
<
jamrial@gmail.com
>
parent
cf9ef839
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
208 additions
and
204 deletions
+208
-204
huffyuvdec.c
libavcodec/huffyuvdec.c
+4
-4
huffyuvdsp.c
libavcodec/huffyuvdsp.c
+34
-2
huffyuvdsp.h
libavcodec/huffyuvdsp.h
+10
-3
lagarith.c
libavcodec/lagarith.c
+1
-1
lossless_videodsp.c
libavcodec/lossless_videodsp.c
+2
-34
lossless_videodsp.h
libavcodec/lossless_videodsp.h
+3
-6
magicyuv.c
libavcodec/magicyuv.c
+1
-1
lossless_videodsp_altivec.c
libavcodec/ppc/lossless_videodsp_altivec.c
+1
-1
utvideodec.c
libavcodec/utvideodec.c
+1
-1
vble.c
libavcodec/vble.c
+1
-1
huffyuvdsp.asm
libavcodec/x86/huffyuvdsp.asm
+137
-0
huffyuvdsp_init.c
libavcodec/x86/huffyuvdsp_init.c
+12
-1
lossless_videodsp.asm
libavcodec/x86/lossless_videodsp.asm
+0
-136
lossless_videodsp_init.c
libavcodec/x86/lossless_videodsp_init.c
+1
-13
No files found.
libavcodec/huffyuvdec.c
View file @
47f21232
...
...
@@ -297,8 +297,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
if
(
ret
<
0
)
return
ret
;
ff_huffyuvdsp_init
(
&
s
->
hdsp
);
ff_llviddsp_init
(
&
s
->
llviddsp
,
avctx
);
ff_huffyuvdsp_init
(
&
s
->
hdsp
,
avctx
);
ff_llviddsp_init
(
&
s
->
llviddsp
);
memset
(
s
->
vlc
,
0
,
4
*
sizeof
(
VLC
));
s
->
interlaced
=
avctx
->
height
>
288
;
...
...
@@ -891,7 +891,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w)
if
(
s
->
bps
<=
8
)
{
s
->
llviddsp
.
add_bytes
(
dst
,
src
,
w
);
}
else
{
s
->
llvid
dsp
.
add_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
s
->
n
-
1
,
w
);
s
->
h
dsp
.
add_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
s
->
n
-
1
,
w
);
}
}
...
...
@@ -900,7 +900,7 @@ static void add_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s
if
(
s
->
bps
<=
8
)
{
s
->
llviddsp
.
add_median_pred
(
dst
,
src
,
diff
,
w
,
left
,
left_top
);
}
else
{
s
->
llvid
dsp
.
add_hfyu_median_pred_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
(
const
uint16_t
*
)
diff
,
s
->
n
-
1
,
w
,
left
,
left_top
);
s
->
h
dsp
.
add_hfyu_median_pred_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
(
const
uint16_t
*
)
diff
,
s
->
n
-
1
,
w
,
left
,
left_top
);
}
}
static
int
decode_frame
(
AVCodecContext
*
avctx
,
void
*
data
,
int
*
got_frame
,
...
...
libavcodec/huffyuvdsp.c
View file @
47f21232
...
...
@@ -23,6 +23,36 @@
#include "mathops.h"
#include "huffyuvdsp.h"
static
void
add_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
){
long
i
;
unsigned
long
pw_lsb
=
(
mask
>>
1
)
*
0x0001000100010001ULL
;
unsigned
long
pw_msb
=
pw_lsb
+
0x0001000100010001ULL
;
for
(
i
=
0
;
i
<=
w
-
(
int
)
sizeof
(
long
)
/
2
;
i
+=
sizeof
(
long
)
/
2
)
{
long
a
=
*
(
long
*
)(
src
+
i
);
long
b
=
*
(
long
*
)(
dst
+
i
);
*
(
long
*
)(
dst
+
i
)
=
((
a
&
pw_lsb
)
+
(
b
&
pw_lsb
))
^
((
a
^
b
)
&
pw_msb
);
}
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
(
dst
[
i
]
+
src
[
i
])
&
mask
;
}
static
void
add_hfyu_median_pred_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
){
int
i
;
uint16_t
l
,
lt
;
l
=
*
left
;
lt
=
*
left_top
;
for
(
i
=
0
;
i
<
w
;
i
++
){
l
=
(
mid_pred
(
l
,
src
[
i
],
(
l
+
src
[
i
]
-
lt
)
&
mask
)
+
diff
[
i
])
&
mask
;
lt
=
src
[
i
];
dst
[
i
]
=
l
;
}
*
left
=
l
;
*
left_top
=
lt
;
}
static
void
add_hfyu_left_pred_bgr32_c
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
)
{
...
...
@@ -47,10 +77,12 @@ static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src,
left
[
A
]
=
a
;
}
av_cold
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
)
av_cold
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
add_int16
=
add_int16_c
;
c
->
add_hfyu_median_pred_int16
=
add_hfyu_median_pred_int16_c
;
c
->
add_hfyu_left_pred_bgr32
=
add_hfyu_left_pred_bgr32_c
;
if
(
ARCH_X86
)
ff_huffyuvdsp_init_x86
(
c
);
ff_huffyuvdsp_init_x86
(
c
,
avctx
);
}
libavcodec/huffyuvdsp.h
View file @
47f21232
...
...
@@ -21,6 +21,7 @@
#include <stdint.h>
#include "config.h"
#include "avcodec.h"
#if HAVE_BIGENDIAN
#define B 3
...
...
@@ -35,12 +36,18 @@
#endif
typedef
struct
HuffYUVDSPContext
{
void
(
*
add_int16
)(
uint16_t
*
dst
/*align 16*/
,
const
uint16_t
*
src
/*align 16*/
,
unsigned
mask
,
int
w
);
void
(
*
add_hfyu_median_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
void
(
*
add_hfyu_left_pred_bgr32
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
);
}
HuffYUVDSPContext
;
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
);
void
ff_huffyuvdsp_init_ppc
(
HuffYUVDSPContext
*
c
);
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
);
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_huffyuvdsp_init_ppc
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
);
#endif
/* AVCODEC_HUFFYUVDSP_H */
libavcodec/lagarith.c
View file @
47f21232
...
...
@@ -725,7 +725,7 @@ static av_cold int lag_decode_init(AVCodecContext *avctx)
LagarithContext
*
l
=
avctx
->
priv_data
;
l
->
avctx
=
avctx
;
ff_llviddsp_init
(
&
l
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
l
->
llviddsp
);
return
0
;
}
...
...
libavcodec/lossless_videodsp.c
View file @
47f21232
...
...
@@ -79,36 +79,6 @@ static int add_left_pred_c(uint8_t *dst, const uint8_t *src, intptr_t w,
return
acc
;
}
static
void
add_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
){
long
i
;
unsigned
long
pw_lsb
=
(
mask
>>
1
)
*
0x0001000100010001ULL
;
unsigned
long
pw_msb
=
pw_lsb
+
0x0001000100010001ULL
;
for
(
i
=
0
;
i
<=
w
-
(
int
)
sizeof
(
long
)
/
2
;
i
+=
sizeof
(
long
)
/
2
)
{
long
a
=
*
(
long
*
)(
src
+
i
);
long
b
=
*
(
long
*
)(
dst
+
i
);
*
(
long
*
)(
dst
+
i
)
=
((
a
&
pw_lsb
)
+
(
b
&
pw_lsb
))
^
((
a
^
b
)
&
pw_msb
);
}
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
(
dst
[
i
]
+
src
[
i
])
&
mask
;
}
static
void
add_hfyu_median_pred_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
){
int
i
;
uint16_t
l
,
lt
;
l
=
*
left
;
lt
=
*
left_top
;
for
(
i
=
0
;
i
<
w
;
i
++
){
l
=
(
mid_pred
(
l
,
src
[
i
],
(
l
+
src
[
i
]
-
lt
)
&
mask
)
+
diff
[
i
])
&
mask
;
lt
=
src
[
i
];
dst
[
i
]
=
l
;
}
*
left
=
l
;
*
left_top
=
lt
;
}
static
int
add_hfyu_left_pred_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
acc
){
int
i
;
...
...
@@ -129,16 +99,14 @@ static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsign
}
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
,
AVCodecContext
*
avctx
)
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
)
{
c
->
add_bytes
=
add_bytes_c
;
c
->
add_median_pred
=
add_median_pred_c
;
c
->
add_left_pred
=
add_left_pred_c
;
c
->
add_int16
=
add_int16_c
;
c
->
add_hfyu_left_pred_int16
=
add_hfyu_left_pred_int16_c
;
c
->
add_hfyu_median_pred_int16
=
add_hfyu_median_pred_int16_c
;
if
(
ARCH_X86
)
ff_llviddsp_init_x86
(
c
,
avctx
);
ff_llviddsp_init_x86
(
c
);
}
libavcodec/lossless_videodsp.h
View file @
47f21232
...
...
@@ -34,14 +34,11 @@ typedef struct LLVidDSPContext {
int
(
*
add_left_pred
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
int
left
);
void
(
*
add_int16
)(
uint16_t
*
dst
/*align 16*/
,
const
uint16_t
*
src
/*align 16*/
,
unsigned
mask
,
int
w
);
void
(
*
add_hfyu_median_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
int
(
*
add_hfyu_left_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
left
);
}
LLVidDSPContext
;
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
,
AVCodecContext
*
avctx
);
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
llviddsp
,
AVCodecContext
*
avctx
);
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
llviddsp
,
AVCodecContext
*
avctx
);
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
);
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
llviddsp
);
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
llviddsp
);
#endif //AVCODEC_LOSSLESS_VIDEODSP_H
libavcodec/magicyuv.c
View file @
47f21232
...
...
@@ -697,7 +697,7 @@ static int magy_init_thread_copy(AVCodecContext *avctx)
static
av_cold
int
magy_decode_init
(
AVCodecContext
*
avctx
)
{
MagicYUVContext
*
s
=
avctx
->
priv_data
;
ff_llviddsp_init
(
&
s
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
s
->
llviddsp
);
return
0
;
}
...
...
libavcodec/ppc/lossless_videodsp_altivec.c
View file @
47f21232
...
...
@@ -51,7 +51,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, intptr_t w)
}
#endif
/* HAVE_ALTIVEC */
av_cold
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
c
)
{
#if HAVE_ALTIVEC
if
(
!
PPC_ALTIVEC
(
av_get_cpu_flags
()))
...
...
libavcodec/utvideodec.c
View file @
47f21232
...
...
@@ -827,7 +827,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c
->
avctx
=
avctx
;
ff_bswapdsp_init
(
&
c
->
bdsp
);
ff_llviddsp_init
(
&
c
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
c
->
llviddsp
);
if
(
avctx
->
extradata_size
>=
16
)
{
av_log
(
avctx
,
AV_LOG_DEBUG
,
"Encoder version %d.%d.%d.%d
\n
"
,
...
...
libavcodec/vble.c
View file @
47f21232
...
...
@@ -185,7 +185,7 @@ static av_cold int vble_decode_init(AVCodecContext *avctx)
/* Stash for later use */
ctx
->
avctx
=
avctx
;
ff_llviddsp_init
(
&
ctx
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
ctx
->
llviddsp
);
avctx
->
pix_fmt
=
AV_PIX_FMT_YUV420P
;
avctx
->
bits_per_raw_sample
=
8
;
...
...
libavcodec/x86/huffyuvdsp.asm
View file @
47f21232
...
...
@@ -24,6 +24,78 @@
SECTION
.
text
%macro
INT16_LOOP
2
; %1 = a/u (aligned/unaligned), %2 = add/sub
movd
m4
,
maskd
SPLATW
m4
,
m4
add
wd
,
wd
test
wq
,
2
*
mmsize
-
1
jz
%%
.
tomainloop
push
tmpq
%%
.
wordloop
:
sub
wq
,
2
%ifidn
%2
,
add
mov
tmpw
,
[
srcq
+
wq
]
add
tmpw
,
[
dstq
+
wq
]
%else
mov
tmpw
,
[
src1q
+
wq
]
sub
tmpw
,
[
src2q
+
wq
]
%endif
and
tmpw
,
maskw
mov
[
dstq
+
wq
]
,
tmpw
test
wq
,
2
*
mmsize
-
1
jnz
%%
.
wordloop
pop
tmpq
%%
.
tomainloop
:
%ifidn
%2
,
add
add
srcq
,
wq
%else
add
src1q
,
wq
add
src2q
,
wq
%endif
add
dstq
,
wq
neg
wq
jz
%%
.
end
%%
.
loop
:
%ifidn
%2
,
add
mov%1
m0
,
[
srcq
+
wq
]
mov%1
m1
,
[
dstq
+
wq
]
mov%1
m2
,
[
srcq
+
wq
+
mmsize
]
mov%1
m3
,
[
dstq
+
wq
+
mmsize
]
%else
mov%1
m0
,
[
src1q
+
wq
]
mov%1
m1
,
[
src2q
+
wq
]
mov%1
m2
,
[
src1q
+
wq
+
mmsize
]
mov%1
m3
,
[
src2q
+
wq
+
mmsize
]
%endif
p%2
w
m0
,
m1
p%2
w
m2
,
m3
pand
m0
,
m4
pand
m2
,
m4
mov%1
[
dstq
+
wq
]
,
m0
mov%1
[
dstq
+
wq
+
mmsize
]
,
m2
add
wq
,
2
*
mmsize
jl
%%
.
loop
%%
.
end
:
RET
%endmacro
%if
ARCH_X86_32
INIT_MMX
mmx
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
INT16_LOOP
a
,
add
%endif
INIT_XMM
sse2
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
test
srcq
,
mmsize
-
1
jnz
.
unaligned
test
dstq
,
mmsize
-
1
jnz
.
unaligned
INT16_LOOP
a
,
add
.
unaligned
:
INT16_LOOP
u
,
add
; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
; intptr_t w, uint8_t *left)
%macro
LEFT_BGR32
0
...
...
@@ -63,3 +135,68 @@ LEFT_BGR32
%endif
INIT_XMM
sse2
LEFT_BGR32
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX
mmxext
cglobal
add_hfyu_median_pred_int16
,
7
,
7
,
0
,
dst
,
top
,
diff
,
mask
,
w
,
left
,
left_top
add
wd
,
wd
movd
mm6
,
maskd
SPLATW
mm6
,
mm6
movq
mm0
,
[topq]
movq
mm2
,
mm0
movd
mm4
,
[
left_topq
]
psllq
mm2
,
16
movq
mm1
,
mm0
por
mm4
,
mm2
movd
mm3
,
[leftq]
psubw
mm0
,
mm4
; t-tl
add
dstq
,
wq
add
topq
,
wq
add
diffq
,
wq
neg
wq
jmp
.
skip
.
loop
:
movq
mm4
,
[
topq
+
wq
]
movq
mm0
,
mm4
psllq
mm4
,
16
por
mm4
,
mm1
movq
mm1
,
mm0
; t
psubw
mm0
,
mm4
; t-tl
.
skip
:
movq
mm2
,
[
diffq
+
wq
]
%assign
i
0
%rep
4
movq
mm4
,
mm0
paddw
mm4
,
mm3
; t-tl+l
pand
mm4
,
mm6
movq
mm5
,
mm3
pmaxsw
mm3
,
mm1
pminsw
mm5
,
mm1
pminsw
mm3
,
mm4
pmaxsw
mm3
,
mm5
; median
paddw
mm3
,
mm2
; +residual
pand
mm3
,
mm6
%if
i
==
0
movq
mm7
,
mm3
psllq
mm7
,
48
%else
movq
mm4
,
mm3
psrlq
mm7
,
16
psllq
mm4
,
48
por
mm7
,
mm4
%endif
%if
i
<
3
psrlq
mm0
,
16
psrlq
mm1
,
16
psrlq
mm2
,
16
%endif
%assign
i
i
+
1
%endrep
movq
[
dstq
+
wq
]
,
mm7
add
wq
,
8
jl
.
loop
movzx
r2d
,
word
[
dstq
-
2
]
mov
[leftq],
r2d
movzx
r2d
,
word
[
topq
-
2
]
mov
[
left_topq
]
,
r2d
RET
libavcodec/x86/huffyuvdsp_init.c
View file @
47f21232
...
...
@@ -21,24 +21,35 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/huffyuvdsp.h"
void
ff_add_int16_mmx
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
void
ff_add_int16_sse2
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
void
ff_add_hfyu_left_pred_bgr32_mmx
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
);
void
ff_add_hfyu_left_pred_bgr32_sse2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
);
void
ff_add_hfyu_median_pred_int16_mmxext
(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
av_cold
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
)
av_cold
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
int
cpu_flags
=
av_get_cpu_flags
();
const
AVPixFmtDescriptor
*
pix_desc
=
av_pix_fmt_desc_get
(
avctx
->
pix_fmt
);
if
(
ARCH_X86_32
&&
EXTERNAL_MMX
(
cpu_flags
))
{
c
->
add_hfyu_left_pred_bgr32
=
ff_add_hfyu_left_pred_bgr32_mmx
;
c
->
add_int16
=
ff_add_int16_mmx
;
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
)
&&
pix_desc
&&
pix_desc
->
comp
[
0
].
depth
<
16
)
{
c
->
add_hfyu_median_pred_int16
=
ff_add_hfyu_median_pred_int16_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
add_int16
=
ff_add_int16_sse2
;
c
->
add_hfyu_left_pred_bgr32
=
ff_add_hfyu_left_pred_bgr32_sse2
;
}
}
libavcodec/x86/lossless_videodsp.asm
View file @
47f21232
...
...
@@ -217,77 +217,6 @@ ADD_BYTES
INIT_XMM
sse2
ADD_BYTES
%macro
INT16_LOOP
2
; %1 = a/u (aligned/unaligned), %2 = add/sub
movd
m4
,
maskd
SPLATW
m4
,
m4
add
wd
,
wd
test
wq
,
2
*
mmsize
-
1
jz
%%
.
tomainloop
push
tmpq
%%
.
wordloop
:
sub
wq
,
2
%ifidn
%2
,
add
mov
tmpw
,
[
srcq
+
wq
]
add
tmpw
,
[
dstq
+
wq
]
%else
mov
tmpw
,
[
src1q
+
wq
]
sub
tmpw
,
[
src2q
+
wq
]
%endif
and
tmpw
,
maskw
mov
[
dstq
+
wq
]
,
tmpw
test
wq
,
2
*
mmsize
-
1
jnz
%%
.
wordloop
pop
tmpq
%%
.
tomainloop
:
%ifidn
%2
,
add
add
srcq
,
wq
%else
add
src1q
,
wq
add
src2q
,
wq
%endif
add
dstq
,
wq
neg
wq
jz
%%
.
end
%%
.
loop
:
%ifidn
%2
,
add
mov%1
m0
,
[
srcq
+
wq
]
mov%1
m1
,
[
dstq
+
wq
]
mov%1
m2
,
[
srcq
+
wq
+
mmsize
]
mov%1
m3
,
[
dstq
+
wq
+
mmsize
]
%else
mov%1
m0
,
[
src1q
+
wq
]
mov%1
m1
,
[
src2q
+
wq
]
mov%1
m2
,
[
src1q
+
wq
+
mmsize
]
mov%1
m3
,
[
src2q
+
wq
+
mmsize
]
%endif
p%2
w
m0
,
m1
p%2
w
m2
,
m3
pand
m0
,
m4
pand
m2
,
m4
mov%1
[
dstq
+
wq
]
,
m0
mov%1
[
dstq
+
wq
+
mmsize
]
,
m2
add
wq
,
2
*
mmsize
jl
%%
.
loop
%%
.
end
:
RET
%endmacro
%if
ARCH_X86_32
INIT_MMX
mmx
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
INT16_LOOP
a
,
add
%endif
INIT_XMM
sse2
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
test
srcq
,
mmsize
-
1
jnz
.
unaligned
test
dstq
,
mmsize
-
1
jnz
.
unaligned
INT16_LOOP
a
,
add
.
unaligned
:
INT16_LOOP
u
,
add
%macro
ADD_HFYU_LEFT_LOOP_INT16
2
; %1 = dst alignment (a/u), %2 = src alignment (a/u)
add
wd
,
wd
add
srcq
,
wq
...
...
@@ -359,68 +288,3 @@ cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left
ADD_HFYU_LEFT_LOOP_INT16
u
,
a
.
src_unaligned
:
ADD_HFYU_LEFT_LOOP_INT16
u
,
u
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX
mmxext
cglobal
add_hfyu_median_pred_int16
,
7
,
7
,
0
,
dst
,
top
,
diff
,
mask
,
w
,
left
,
left_top
add
wd
,
wd
movd
mm6
,
maskd
SPLATW
mm6
,
mm6
movq
mm0
,
[topq]
movq
mm2
,
mm0
movd
mm4
,
[
left_topq
]
psllq
mm2
,
16
movq
mm1
,
mm0
por
mm4
,
mm2
movd
mm3
,
[leftq]
psubw
mm0
,
mm4
; t-tl
add
dstq
,
wq
add
topq
,
wq
add
diffq
,
wq
neg
wq
jmp
.
skip
.
loop
:
movq
mm4
,
[
topq
+
wq
]
movq
mm0
,
mm4
psllq
mm4
,
16
por
mm4
,
mm1
movq
mm1
,
mm0
; t
psubw
mm0
,
mm4
; t-tl
.
skip
:
movq
mm2
,
[
diffq
+
wq
]
%assign
i
0
%rep
4
movq
mm4
,
mm0
paddw
mm4
,
mm3
; t-tl+l
pand
mm4
,
mm6
movq
mm5
,
mm3
pmaxsw
mm3
,
mm1
pminsw
mm5
,
mm1
pminsw
mm3
,
mm4
pmaxsw
mm3
,
mm5
; median
paddw
mm3
,
mm2
; +residual
pand
mm3
,
mm6
%if
i
==
0
movq
mm7
,
mm3
psllq
mm7
,
48
%else
movq
mm4
,
mm3
psrlq
mm7
,
16
psllq
mm4
,
48
por
mm7
,
mm4
%endif
%if
i
<
3
psrlq
mm0
,
16
psrlq
mm1
,
16
psrlq
mm2
,
16
%endif
%assign
i
i
+
1
%endrep
movq
[
dstq
+
wq
]
,
mm7
add
wq
,
8
jl
.
loop
movzx
r2d
,
word
[
dstq
-
2
]
mov
[leftq],
r2d
movzx
r2d
,
word
[
topq
-
2
]
mov
[
left_topq
]
,
r2d
RET
libavcodec/x86/lossless_videodsp_init.c
View file @
47f21232
...
...
@@ -21,7 +21,6 @@
#include "config.h"
#include "libavutil/x86/asm.h"
#include "../lossless_videodsp.h"
#include "libavutil/pixdesc.h"
#include "libavutil/x86/cpu.h"
void
ff_add_bytes_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
intptr_t
w
);
...
...
@@ -39,11 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int
ff_add_left_pred_sse4
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
int
left
);
void
ff_add_int16_mmx
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
void
ff_add_int16_sse2
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
int
ff_add_hfyu_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
acc
);
int
ff_add_hfyu_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
acc
);
void
ff_add_hfyu_median_pred_int16_mmxext
(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
static
void
add_median_pred_cmov
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
...
...
@@ -83,10 +79,9 @@ static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
}
#endif
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
c
,
AVCodecContext
*
avctx
)
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
c
)
{
int
cpu_flags
=
av_get_cpu_flags
();
const
AVPixFmtDescriptor
*
pix_desc
=
av_pix_fmt_desc_get
(
avctx
->
pix_fmt
);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
if
(
cpu_flags
&
AV_CPU_FLAG_CMOV
)
...
...
@@ -95,7 +90,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
if
(
ARCH_X86_32
&&
EXTERNAL_MMX
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_mmx
;
c
->
add_int16
=
ff_add_int16_mmx
;
}
if
(
ARCH_X86_32
&&
EXTERNAL_MMXEXT
(
cpu_flags
))
{
...
...
@@ -104,15 +98,9 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
c
->
add_median_pred
=
ff_add_median_pred_mmxext
;
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
)
&&
pix_desc
&&
pix_desc
->
comp
[
0
].
depth
<
16
)
{
c
->
add_hfyu_median_pred_int16
=
ff_add_hfyu_median_pred_int16_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_sse2
;
c
->
add_median_pred
=
ff_add_median_pred_sse2
;
c
->
add_int16
=
ff_add_int16_sse2
;
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment