source:
trunk/ffmpeg-svn-mactel.patch
@
64
| Revision 64, 16.4 KB checked in by gbooker, 7 years ago (diff) |
|---|
-
libavcodec/i386/dsputil_mmx.c
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/dsputil_mmx.c ./libavcodec/i386/dsputil_mmx.c
old new 52 52 static const uint64_t ff_pb_3F attribute_used __attribute__ ((aligned(8))) = 0x3F3F3F3F3F3F3F3FULL; 53 53 static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xFCFCFCFCFCFCFCFCULL; 54 54 55 #define JUMPALIGN() __asm __volatile ( ".balign 8"::)55 #define JUMPALIGN() __asm __volatile (BALIGN_8::) 56 56 #define MOVQ_ZERO(regd) __asm __volatile ("pxor %%" #regd ", %%" #regd ::) 57 57 58 58 #define MOVQ_WONE(regd) \ … … 195 195 asm volatile( 196 196 "mov $-128, %%"REG_a" \n\t" 197 197 "pxor %%mm7, %%mm7 \n\t" 198 ".balign 16 \n\t"198 BALIGN_16 199 199 "1: \n\t" 200 200 "movq (%0), %%mm0 \n\t" 201 201 "movq (%0, %2), %%mm2 \n\t" … … 223 223 asm volatile( 224 224 "pxor %%mm7, %%mm7 \n\t" 225 225 "mov $-128, %%"REG_a" \n\t" 226 ".balign 16 \n\t"226 BALIGN_16 227 227 "1: \n\t" 228 228 "movq (%0), %%mm0 \n\t" 229 229 "movq (%1), %%mm2 \n\t" … … 366 366 { 367 367 __asm __volatile( 368 368 "lea (%3, %3), %%"REG_a" \n\t" 369 ".balign 8 \n\t"369 BALIGN_8 370 370 "1: \n\t" 371 371 "movd (%1), %%mm0 \n\t" 372 372 "movd (%1, %3), %%mm1 \n\t" … … 392 392 { 393 393 __asm __volatile( 394 394 "lea (%3, %3), %%"REG_a" \n\t" 395 ".balign 8 \n\t"395 BALIGN_8 396 396 "1: \n\t" 397 397 "movq (%1), %%mm0 \n\t" 398 398 "movq (%1, %3), %%mm1 \n\t" … … 418 418 { 419 419 __asm __volatile( 420 420 "lea (%3, %3), %%"REG_a" \n\t" 421 ".balign 8 \n\t"421 BALIGN_8 422 422 "1: \n\t" 423 423 "movq (%1), %%mm0 \n\t" 424 424 "movq 8(%1), %%mm4 \n\t" … … 3101 3101 } 3102 3102 3103 3103 #ifdef CONFIG_SNOW_ENCODER 3104 if(mm_flags & MM_SSE2){ 3104 #if defined(__APPLE__) 3105 if (0) // alignment issues with SSE2 code with Apple GCC 3106 #else 3107 if(mm_flags & MM_SSE2) 3108 #endif 3109 { 3105 3110 c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2; 3106 3111 c->vertical_compose97i = ff_snow_vertical_compose97i_sse2; 3107 3112 c->inner_add_yblock = ff_snow_inner_add_yblock_sse2; -
libavcodec/i386/dsputil_mmx_avg.h
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/dsputil_mmx_avg.h ./libavcodec/i386/dsputil_mmx_avg.h
old new 754 754 "lea (%3, %3), %%"REG_a" \n\t" 755 755 "movq (%1), %%mm0 \n\t" 756 756 PAVGB" 1(%1), %%mm0 \n\t" 757 ".balign 8 \n\t"757 BALIGN_8 758 758 "1: \n\t" 759 759 "movq (%1, %%"REG_a"), %%mm2 \n\t" 760 760 "movq (%1, %3), %%mm1 \n\t" -
libavcodec/i386/dsputil_mmx_rnd.h
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/dsputil_mmx_rnd.h ./libavcodec/i386/dsputil_mmx_rnd.h
old new 28 28 MOVQ_BFE(mm6); 29 29 __asm __volatile( 30 30 "lea (%3, %3), %%"REG_a" \n\t" 31 ".balign 8 \n\t"31 BALIGN_8 32 32 "1: \n\t" 33 33 "movq (%1), %%mm0 \n\t" 34 34 "movq 1(%1), %%mm1 \n\t" … … 69 69 "movq %%mm4, (%3) \n\t" 70 70 "add %5, %3 \n\t" 71 71 "decl %0 \n\t" 72 ".balign 8 \n\t"72 BALIGN_8 73 73 "1: \n\t" 74 74 "movq (%1), %%mm0 \n\t" 75 75 "movq (%2), %%mm1 \n\t" … … 110 110 MOVQ_BFE(mm6); 111 111 __asm __volatile( 112 112 "lea (%3, %3), %%"REG_a" \n\t" 113 ".balign 8 \n\t"113 BALIGN_8 114 114 "1: \n\t" 115 115 "movq (%1), %%mm0 \n\t" 116 116 "movq 1(%1), %%mm1 \n\t" … … 168 168 "movq %%mm5, 8(%3) \n\t" 169 169 "add %5, %3 \n\t" 170 170 "decl %0 \n\t" 171 ".balign 8 \n\t"171 BALIGN_8 172 172 "1: \n\t" 173 173 "movq (%1), %%mm0 \n\t" 174 174 "movq (%2), %%mm1 \n\t" … … 206 206 __asm __volatile( 207 207 "lea (%3, %3), %%"REG_a" \n\t" 208 208 "movq (%1), %%mm0 \n\t" 209 ".balign 8 \n\t"209 BALIGN_8 210 210 "1: \n\t" 211 211 "movq (%1, %3), %%mm1 \n\t" 212 212 "movq (%1, %%"REG_a"),%%mm2 \n\t" … … 246 246 "paddusw %%mm1, %%mm5 \n\t" 247 247 "xor %%"REG_a", %%"REG_a" \n\t" 248 248 "add %3, %1 \n\t" 249 ".balign 8 \n\t"249 BALIGN_8 250 250 "1: \n\t" 251 251 "movq (%1, %%"REG_a"), %%mm0 \n\t" 252 252 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" … … 458 458 __asm __volatile( 459 459 "lea (%3, %3), %%"REG_a" \n\t" 460 460 "movq (%1), %%mm0 \n\t" 461 ".balign 8 \n\t"461 BALIGN_8 462 462 "1: \n\t" 463 463 "movq (%1, %3), %%mm1 \n\t" 464 464 "movq (%1, %%"REG_a"), %%mm2 \n\t" … … 509 509 "paddusw %%mm1, %%mm5 \n\t" 510 510 "xor %%"REG_a", %%"REG_a" \n\t" 511 511 "add %3, %1 \n\t" 512 ".balign 8 \n\t"512 BALIGN_8 513 513 "1: \n\t" 514 514 "movq (%1, %%"REG_a"), %%mm0 \n\t" 515 515 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" -
libavcodec/i386/fdct_mmx.c
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/fdct_mmx.c ./libavcodec/i386/fdct_mmx.c
old new 350 350 351 351 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) 352 352 { 353 #if defined(__APPLE__) 354 // Apple 'as' has a different macro syntax than FSF GCC 'as' 355 asm volatile( 356 ".macro FDCT_ROW_SSE2_H1 \n\t" 357 "movq $0(%0), %%xmm2 \n\t" 358 "movq $0+8(%0), %%xmm0 \n\t" 359 "movdqa $1+32(%1), %%xmm3 \n\t" 360 "movdqa $1+48(%1), %%xmm7 \n\t" 361 "movdqa $1(%1), %%xmm4 \n\t" 362 "movdqa $1+16(%1), %%xmm5 \n\t" 363 ".endmacro \n\t" 364 ".macro FDCT_ROW_SSE2_H2 \n\t" 365 "movq $0(%0), %%xmm2 \n\t" 366 "movq $0+8(%0), %%xmm0 \n\t" 367 "movdqa $1+32(%1), %%xmm3 \n\t" 368 "movdqa $1+48(%1), %%xmm7 \n\t" 369 ".endmacro \n\t" 370 ".macro FDCT_ROW_SSE2 \n\t" 371 "movq %%xmm2, %%xmm1 \n\t" 372 "pshuflw $$0x27, %%xmm0, %%xmm0 \n\t" 373 "paddsw %%xmm0, %%xmm1 \n\t" 374 "psubsw %%xmm0, %%xmm2 \n\t" 375 "punpckldq %%xmm2, %%xmm1 \n\t" 376 "pshufd $$0x78, %%xmm1, %%xmm2 \n\t" 377 "pmaddwd %%xmm2, %%xmm3 \n\t" 378 "pmaddwd %%xmm1, %%xmm7 \n\t" 379 "pmaddwd %%xmm5, %%xmm2 \n\t" 380 "pmaddwd %%xmm4, %%xmm1 \n\t" 381 "paddd %%xmm7, %%xmm3 \n\t" 382 "paddd %%xmm2, %%xmm1 \n\t" 383 "paddd %%xmm6, %%xmm3 \n\t" 384 "paddd %%xmm6, %%xmm1 \n\t" 385 "psrad $%3, %%xmm3 \n\t" 386 "psrad $%3, %%xmm1 \n\t" 387 "packssdw %%xmm3, %%xmm1 \n\t" 388 "movdqa %%xmm1, $0(%4) \n\t" 389 ".endmacro \n\t" 390 "movdqa (%2), %%xmm6 \n\t" 391 "FDCT_ROW_SSE2_H1 0,0 \n\t" 392 "FDCT_ROW_SSE2 0 \n\t" 393 "FDCT_ROW_SSE2_H2 64,0 \n\t" 394 "FDCT_ROW_SSE2 64 \n\t" 395 396 "FDCT_ROW_SSE2_H1 16,64 \n\t" 397 "FDCT_ROW_SSE2 16 \n\t" 398 "FDCT_ROW_SSE2_H2 112,64 \n\t" 399 "FDCT_ROW_SSE2 112 \n\t" 400 401 "FDCT_ROW_SSE2_H1 32,128 \n\t" 402 "FDCT_ROW_SSE2 32 \n\t" 403 "FDCT_ROW_SSE2_H2 96,128 \n\t" 404 "FDCT_ROW_SSE2 96 \n\t" 405 406 "FDCT_ROW_SSE2_H1 48,192 \n\t" 407 "FDCT_ROW_SSE2 48 \n\t" 408 "FDCT_ROW_SSE2_H2 80,192 \n\t" 409 "FDCT_ROW_SSE2 80 \n\t" 410 : 411 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) 412 ); 413 #else 353 414 asm volatile( 354 415 ".macro FDCT_ROW_SSE2_H1 i t \n\t" 355 416 "movq \\i(%0), %%xmm2 \n\t" … … 408 469 : 409 470 : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out) 410 471 ); 472 #endif 411 473 } 412 474 413 475 static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table) -
libavcodec/i386/mmx.h
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/mmx.h ./libavcodec/i386/mmx.h
old new 23 23 # define PTR_SIZE "4" 24 24 #endif 25 25 26 #if defined(__APPLE__) 27 # define BALIGN_8 ".align 3 \n\t" 28 # define BALIGN_16 ".align 4 \n\t" 29 #else 30 # define BALIGN_8 ".balign 8 \n\t" 31 # define BALIGN_16 ".balign 16 \n\t" 32 #endif 33 26 34 /* 27 35 * The type of an value that fits in an MMX register (note that long 28 36 * long constant values MUST be suffixed by LL and unsigned long long -
libavcodec/i386/motion_est_mmx.c
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/motion_est_mmx.c ./libavcodec/i386/motion_est_mmx.c
old new 21 21 */ 22 22 #include "../dsputil.h" 23 23 #include "x86_cpu.h" 24 #include "mmx.h" 24 25 25 26 static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ 26 27 0x0000000000000000ULL, … … 34 34 { 35 35 long len= -(stride*h); 36 36 asm volatile( 37 ".balign 16 \n\t"37 BALIGN_16 38 38 "1: \n\t" 39 39 "movq (%1, %%"REG_a"), %%mm0 \n\t" 40 40 "movq (%2, %%"REG_a"), %%mm2 \n\t" … … 70 70 { 71 71 long len= -(stride*h); 72 72 asm volatile( 73 ".balign 16 \n\t"73 BALIGN_16 74 74 "1: \n\t" 75 75 "movq (%1, %%"REG_a"), %%mm0 \n\t" 76 76 "movq (%2, %%"REG_a"), %%mm2 \n\t" … … 92 92 { 93 93 long len= -(stride*h); 94 94 asm volatile( 95 ".balign 16 \n\t"95 BALIGN_16 96 96 "1: \n\t" 97 97 "movq (%1, %%"REG_a"), %%mm0 \n\t" 98 98 "movq (%2, %%"REG_a"), %%mm2 \n\t" … … 118 118 { //FIXME reuse src 119 119 long len= -(stride*h); 120 120 asm volatile( 121 ".balign 16 \n\t"121 BALIGN_16 122 122 "movq "MANGLE(bone)", %%mm5 \n\t" 123 123 "1: \n\t" 124 124 "movq (%1, %%"REG_a"), %%mm0 \n\t" … … 155 155 { 156 156 long len= -(stride*h); 157 157 asm volatile( 158 ".balign 16 \n\t"158 BALIGN_16 159 159 "1: \n\t" 160 160 "movq (%1, %%"REG_a"), %%mm0 \n\t" 161 161 "movq (%2, %%"REG_a"), %%mm1 \n\t" … … 193 193 { 194 194 long len= -(stride*h); 195 195 asm volatile( 196 ".balign 16 \n\t"196 BALIGN_16 197 197 "1: \n\t" 198 198 "movq (%1, %%"REG_a"), %%mm0 \n\t" 199 199 "movq (%2, %%"REG_a"), %%mm1 \n\t" -
libavcodec/i386/mpegvideo_mmx.c
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/mpegvideo_mmx.c ./libavcodec/i386/mpegvideo_mmx.c
old new 24 24 #include "../mpegvideo.h" 25 25 #include "../avcodec.h" 26 26 #include "x86_cpu.h" 27 #include "mmx.h" 27 28 28 29 extern uint8_t zigzag_direct_noperm[64]; 29 30 extern uint16_t inv_zigzag_direct16[64]; … … 66 66 "packssdw %%mm5, %%mm5 \n\t" 67 67 "psubw %%mm5, %%mm7 \n\t" 68 68 "pxor %%mm4, %%mm4 \n\t" 69 ".balign 16 \n\t"69 BALIGN_16 70 70 "1: \n\t" 71 71 "movq (%0, %3), %%mm0 \n\t" 72 72 "movq 8(%0, %3), %%mm1 \n\t" … … 129 129 "packssdw %%mm5, %%mm5 \n\t" 130 130 "psubw %%mm5, %%mm7 \n\t" 131 131 "pxor %%mm4, %%mm4 \n\t" 132 ".balign 16 \n\t"132 BALIGN_16 133 133 "1: \n\t" 134 134 "movq (%0, %3), %%mm0 \n\t" 135 135 "movq 8(%0, %3), %%mm1 \n\t" … … 222 222 "packssdw %%mm6, %%mm6 \n\t" 223 223 "packssdw %%mm6, %%mm6 \n\t" 224 224 "mov %3, %%"REG_a" \n\t" 225 ".balign 16 \n\t"225 BALIGN_16 226 226 "1: \n\t" 227 227 "movq (%0, %%"REG_a"), %%mm0 \n\t" 228 228 "movq 8(%0, %%"REG_a"), %%mm1 \n\t" … … 285 285 "packssdw %%mm6, %%mm6 \n\t" 286 286 "packssdw %%mm6, %%mm6 \n\t" 287 287 "mov %3, %%"REG_a" \n\t" 288 ".balign 16 \n\t"288 BALIGN_16 289 289 "1: \n\t" 290 290 "movq (%0, %%"REG_a"), %%mm0 \n\t" 291 291 "movq 8(%0, %%"REG_a"), %%mm1 \n\t" … … 357 357 "packssdw %%mm6, %%mm6 \n\t" 358 358 "packssdw %%mm6, %%mm6 \n\t" 359 359 "mov %3, %%"REG_a" \n\t" 360 ".balign 16 \n\t"360 BALIGN_16 361 361 "1: \n\t" 362 362 "movq (%0, %%"REG_a"), %%mm0 \n\t" 363 363 "movq 8(%0, %%"REG_a"), %%mm1 \n\t" … … 418 418 "packssdw %%mm6, %%mm6 \n\t" 419 419 "packssdw %%mm6, %%mm6 \n\t" 420 420 "mov %3, %%"REG_a" \n\t" 421 ".balign 16 \n\t"421 BALIGN_16 422 422 "1: \n\t" 423 423 "movq (%0, %%"REG_a"), %%mm0 \n\t" 424 424 "movq 8(%0, %%"REG_a"), %%mm1 \n\t" -
libavcodec/i386/mpegvideo_mmx_template.c
diff -NaurbB --exclude=.svn ../ffmpeg/libavcodec/i386/mpegvideo_mmx_template.c ./libavcodec/i386/mpegvideo_mmx_template.c
old new 111 111 "pxor %%mm6, %%mm6 \n\t" 112 112 "psubw (%3), %%mm6 \n\t" // -bias[0] 113 113 "mov $-128, %%"REG_a" \n\t" 114 ".balign 16 \n\t"114 BALIGN_16 115 115 "1: \n\t" 116 116 "pxor %%mm1, %%mm1 \n\t" // 0 117 117 "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i] … … 155 155 "pxor %%mm7, %%mm7 \n\t" // 0 156 156 "pxor %%mm4, %%mm4 \n\t" // 0 157 157 "mov $-128, %%"REG_a" \n\t" 158 ".balign 16 \n\t"158 BALIGN_16 159 159 "1: \n\t" 160 160 "pxor %%mm1, %%mm1 \n\t" // 0 161 161 "movq (%1, %%"REG_a"), %%mm0 \n\t" // block[i]
Note: See TracBrowser
for help on using the repository browser.
