• Mark Reid's avatar
    swscale/x86/output.asm: add x86-optimized planer gbr yuv2anyX functions · 9e445a5b
    Mark Reid authored
    changes since v2:
     * fixed label
    changes since v1:
     * remove vex intruction on sse4 path
     * some load/pack marcos use less intructions
     * fixed some typos
    
    yuv2gbrp_full_X_4_512_c: 12757.6
    yuv2gbrp_full_X_4_512_sse2: 8946.6
    yuv2gbrp_full_X_4_512_sse4: 5138.6
    yuv2gbrp_full_X_4_512_avx2: 3889.6
    yuv2gbrap_full_X_4_512_c: 15368.6
    yuv2gbrap_full_X_4_512_sse2: 11916.1
    yuv2gbrap_full_X_4_512_sse4: 6294.6
    yuv2gbrap_full_X_4_512_avx2: 3477.1
    yuv2gbrp9be_full_X_4_512_c: 14381.6
    yuv2gbrp9be_full_X_4_512_sse2: 9139.1
    yuv2gbrp9be_full_X_4_512_sse4: 5150.1
    yuv2gbrp9be_full_X_4_512_avx2: 2834.6
    yuv2gbrp9le_full_X_4_512_c: 12990.1
    yuv2gbrp9le_full_X_4_512_sse2: 9118.1
    yuv2gbrp9le_full_X_4_512_sse4: 5132.1
    yuv2gbrp9le_full_X_4_512_avx2: 2833.1
    yuv2gbrp10be_full_X_4_512_c: 14401.6
    yuv2gbrp10be_full_X_4_512_sse2: 9133.1
    yuv2gbrp10be_full_X_4_512_sse4: 5126.1
    yuv2gbrp10be_full_X_4_512_avx2: 2837.6
    yuv2gbrp10le_full_X_4_512_c: 12718.1
    yuv2gbrp10le_full_X_4_512_sse2: 9106.1
    yuv2gbrp10le_full_X_4_512_sse4: 5120.1
    yuv2gbrp10le_full_X_4_512_avx2: 2826.1
    yuv2gbrap10be_full_X_4_512_c: 18535.6
    yuv2gbrap10be_full_X_4_512_sse2: 33617.6
    yuv2gbrap10be_full_X_4_512_sse4: 6264.1
    yuv2gbrap10be_full_X_4_512_avx2: 3422.1
    yuv2gbrap10le_full_X_4_512_c: 16724.1
    yuv2gbrap10le_full_X_4_512_sse2: 11787.1
    yuv2gbrap10le_full_X_4_512_sse4: 6282.1
    yuv2gbrap10le_full_X_4_512_avx2: 3441.6
    yuv2gbrp12be_full_X_4_512_c: 13723.6
    yuv2gbrp12be_full_X_4_512_sse2: 9128.1
    yuv2gbrp12be_full_X_4_512_sse4: 7997.6
    yuv2gbrp12be_full_X_4_512_avx2: 2844.1
    yuv2gbrp12le_full_X_4_512_c: 12257.1
    yuv2gbrp12le_full_X_4_512_sse2: 9107.6
    yuv2gbrp12le_full_X_4_512_sse4: 5142.6
    yuv2gbrp12le_full_X_4_512_avx2: 2837.6
    yuv2gbrap12be_full_X_4_512_c: 18511.1
    yuv2gbrap12be_full_X_4_512_sse2: 12156.6
    yuv2gbrap12be_full_X_4_512_sse4: 6251.1
    yuv2gbrap12be_full_X_4_512_avx2: 3444.6
    yuv2gbrap12le_full_X_4_512_c: 16687.1
    yuv2gbrap12le_full_X_4_512_sse2: 11785.1
    yuv2gbrap12le_full_X_4_512_sse4: 6243.6
    yuv2gbrap12le_full_X_4_512_avx2: 3446.1
    yuv2gbrp14be_full_X_4_512_c: 13690.6
    yuv2gbrp14be_full_X_4_512_sse2: 9120.6
    yuv2gbrp14be_full_X_4_512_sse4: 5138.1
    yuv2gbrp14be_full_X_4_512_avx2: 2843.1
    yuv2gbrp14le_full_X_4_512_c: 14995.6
    yuv2gbrp14le_full_X_4_512_sse2: 9119.1
    yuv2gbrp14le_full_X_4_512_sse4: 5126.1
    yuv2gbrp14le_full_X_4_512_avx2: 2843.1
    yuv2gbrp16be_full_X_4_512_c: 12367.1
    yuv2gbrp16be_full_X_4_512_sse2: 8233.6
    yuv2gbrp16be_full_X_4_512_sse4: 4820.1
    yuv2gbrp16be_full_X_4_512_avx2: 2666.6
    yuv2gbrp16le_full_X_4_512_c: 10904.1
    yuv2gbrp16le_full_X_4_512_sse2: 8214.1
    yuv2gbrp16le_full_X_4_512_sse4: 4824.1
    yuv2gbrp16le_full_X_4_512_avx2: 2629.1
    yuv2gbrap16be_full_X_4_512_c: 26569.6
    yuv2gbrap16be_full_X_4_512_sse2: 10884.1
    yuv2gbrap16be_full_X_4_512_sse4: 5488.1
    yuv2gbrap16be_full_X_4_512_avx2: 3272.1
    yuv2gbrap16le_full_X_4_512_c: 14010.1
    yuv2gbrap16le_full_X_4_512_sse2: 10562.1
    yuv2gbrap16le_full_X_4_512_sse4: 5463.6
    yuv2gbrap16le_full_X_4_512_avx2: 3255.1
    yuv2gbrpf32be_full_X_4_512_c: 14524.1
    yuv2gbrpf32be_full_X_4_512_sse2: 8552.6
    yuv2gbrpf32be_full_X_4_512_sse4: 4636.1
    yuv2gbrpf32be_full_X_4_512_avx2: 2474.6
    yuv2gbrpf32le_full_X_4_512_c: 13060.6
    yuv2gbrpf32le_full_X_4_512_sse2: 9682.6
    yuv2gbrpf32le_full_X_4_512_sse4: 4298.1
    yuv2gbrpf32le_full_X_4_512_avx2: 2453.1
    yuv2gbrapf32be_full_X_4_512_c: 18629.6
    yuv2gbrapf32be_full_X_4_512_sse2: 11363.1
    yuv2gbrapf32be_full_X_4_512_sse4: 15201.6
    yuv2gbrapf32be_full_X_4_512_avx2: 3727.1
    yuv2gbrapf32le_full_X_4_512_c: 16677.6
    yuv2gbrapf32le_full_X_4_512_sse2: 10221.6
    yuv2gbrapf32le_full_X_4_512_sse4: 5693.6
    yuv2gbrapf32le_full_X_4_512_avx2: 3656.6
    Reviewed-by: 's avatarPaul B Mahol <onemda@gmail.com>
    Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
    9e445a5b
checkasm.mak 3.4 KB