Ticket #113: ffmpeg-64bit-copies-2.diff
| File ffmpeg-64bit-copies-2.diff, 2.0 kB (added by astrange, 2 years ago) |
|---|
-
libavcodec/h264.c
old new 38 38 //#undef NDEBUG 39 39 #include <assert.h> 40 40 41 #if defined(__MMX__) 42 #include <mmintrin.h> 43 #endif 44 41 45 #define interlaced_dct interlaced_dct_is_a_bad_name 42 46 #define mb_intra mb_intra_isnt_initalized_see_mb_type 43 47 … … 452 456 *(uint32_t*)(p + 2*stride)= 453 457 *(uint32_t*)(p + 3*stride)= v; 454 458 }else if(w==8){ 455 //gcc can't optimize 64bit math on x86_32 456 #if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) 459 #if defined(__MMX__) 460 __m64 v= _mm_set1_pi32(val); 461 *(__m64*)(p + 0*stride)= v; 462 if(h==1) return; 463 *(__m64*)(p + 1*stride)= v; 464 if(h==2) return; 465 *(__m64*)(p + 2*stride)= 466 *(__m64*)(p + 3*stride)= v; 467 }else if(w==16){ 468 __m64 v= _mm_set1_pi32(val); 469 *(__m64*)(p + 0+0*stride)= 470 *(__m64*)(p + 8+0*stride)= 471 *(__m64*)(p + 0+1*stride)= 472 *(__m64*)(p + 8+1*stride)= v; 473 if(h==2) return; 474 *(__m64*)(p + 0+2*stride)= 475 *(__m64*)(p + 8+2*stride)= 476 *(__m64*)(p + 0+3*stride)= 477 *(__m64*)(p + 8+3*stride)= v; 478 //gcc can't optimize 64bit math on x86_32 479 #elif defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64) 457 480 const uint64_t v= val*0x0100000001ULL; 458 481 *(uint64_t*)(p + 0*stride)= v; 459 482 if(h==1) return; … … 507 530 assert(h==4); 508 531 } 509 532 533 #define uint64_t double 534 510 535 static void fill_caches(H264Context *h, int mb_type, int for_deblock){ 511 536 MpegEncContext * const s = &h->s; 512 537 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride; … … 7005 7030 } 7006 7031 } 7007 7032 7033 #undef uint64_t 7034 7008 7035 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 7009 7036 MpegEncContext * const s = &h->s; 7010 7037 int mb_xy, mb_type;
