root/branches/perian-1.1/Patches/ffmpeg-pic.diff
| Revision 822, 22.8 kB (checked in by astrange, 9 months ago) |
|---|
-
ffmpeg/libavcodec/i386/motion_est_mmx.c
old new 167 167 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) 168 168 { 169 169 asm volatile( 170 "movq "MANGLE(bone)", %%mm5 \n\t"170 "movq %4, %%mm5 \n\t" 171 171 "movq (%1), %%mm0 \n\t" 172 172 "pavgb 1(%1), %%mm0 \n\t" 173 173 "add %3, %1 \n\t" … … 190 190 "sub $2, %0 \n\t" 191 191 " jg 1b \n\t" 192 192 : "+r" (h), "+r" (blk1), "+r" (blk2) 193 : "r" ((long)stride) 193 : "r" ((long)stride), "m" (bone) 194 194 ); 195 195 } 196 196 … … 258 258 "punpckhbw %%mm7, %%mm5 \n\t" 259 259 "paddw %%mm4, %%mm2 \n\t" 260 260 "paddw %%mm5, %%mm3 \n\t" 261 "movq 16+ "MANGLE(round_tab)", %%mm5 \n\t"261 "movq 16+%5, %%mm5 \n\t" 262 262 "paddw %%mm2, %%mm0 \n\t" 263 263 "paddw %%mm3, %%mm1 \n\t" 264 264 "paddw %%mm5, %%mm0 \n\t" … … 281 281 "add %4, %%"REG_a" \n\t" 282 282 " js 1b \n\t" 283 283 : "+a" (len) 284 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) 284 : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0]) 285 285 ); 286 286 } 287 287 -
ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
old new 192 192 "pxor %%mm7, %%mm7 \n\t" 193 193 "movd %5, %%mm2 \n\t" 194 194 "movd %6, %%mm3 \n\t" 195 "movq "MANGLE(ff_pw_8)", %%mm4\n\t"196 "movq "MANGLE(ff_pw_8)", %%mm5\n\t"195 "movq %7, %%mm4\n\t" 196 "movq %7, %%mm5\n\t" 197 197 "punpcklwd %%mm2, %%mm2 \n\t" 198 198 "punpcklwd %%mm3, %%mm3 \n\t" 199 199 "punpcklwd %%mm2, %%mm2 \n\t" … … 250 250 "sub $2, %2 \n\t" 251 251 "jnz 1b \n\t" 252 252 : "+r"(dst), "+r"(src), "+r"(h) 253 : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) 253 : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8) 254 254 ); 255 255 } 256 256 -
ffmpeg/libavcodec/i386/dsputil_mmx.c
old new 772 772 773 773 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ 774 774 "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ 775 "movq " MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\775 "movq "#pw_20", %%mm4 \n\t" /* 20 */\ 776 776 "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\ 777 777 "movq "#in7", " #m3 " \n\t" /* d */\ 778 778 "movq "#in0", %%mm5 \n\t" /* D */\ … … 784 784 "paddw " #m5 ", %%mm6 \n\t" /* x2 */\ 785 785 "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\ 786 786 "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\ 787 "pmullw " MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\787 "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\ 788 788 "paddw " #rnd ", %%mm4 \n\t" /* x2 */\ 789 789 "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\ 790 790 "psraw $5, %%mm5 \n\t"\ … … 818 818 "paddw %%mm5, %%mm5 \n\t" /* 2b */\ 819 819 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ 820 820 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ 821 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\821 "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\ 822 822 "paddw %%mm4, %%mm0 \n\t" /* a */\ 823 823 "paddw %%mm1, %%mm5 \n\t" /* d */\ 824 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\824 "pmullw %7, %%mm0 \n\t" /* 20a */\ 825 825 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ 826 826 "paddw %6, %%mm6 \n\t"\ 827 827 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ … … 844 844 "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\ 845 845 "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\ 846 846 "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\ 847 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\847 "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\ 848 848 "paddw %%mm2, %%mm1 \n\t" /* a */\ 849 849 "paddw %%mm6, %%mm4 \n\t" /* d */\ 850 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\850 "pmullw %7, %%mm1 \n\t" /* 20a */\ 851 851 "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\ 852 852 "paddw %6, %%mm1 \n\t"\ 853 853 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\ … … 870 870 "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\ 871 871 "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\ 872 872 "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\ 873 "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\873 "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\ 874 874 "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\ 875 875 "paddw %%mm3, %%mm2 \n\t" /* d */\ 876 876 "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\ … … 878 878 "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\ 879 879 "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\ 880 880 "paddw %%mm2, %%mm6 \n\t" /* a */\ 881 "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\881 "pmullw %7, %%mm6 \n\t" /* 20a */\ 882 882 "paddw %6, %%mm0 \n\t"\ 883 883 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ 884 884 "psraw $5, %%mm0 \n\t"\ … … 893 893 "paddw %%mm2, %%mm5 \n\t" /* d */\ 894 894 "paddw %%mm6, %%mm6 \n\t" /* 2b */\ 895 895 "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\ 896 "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\897 "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\896 "pmullw %7, %%mm3 \n\t" /* 20a */\ 897 "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\ 898 898 "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\ 899 899 "paddw %6, %%mm4 \n\t"\ 900 900 "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\ … … 907 907 "decl %2 \n\t"\ 908 908 " jnz 1b \n\t"\ 909 909 : "+a"(src), "+c"(dst), "+g"(h)\ 910 : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ 910 : "d"((long)srcStride), "S"((long)dstStride),\ 911 "m"(temp), "m"(ROUNDER),\ 912 "m"(ff_pw_20), "m"(ff_pw_3)\ 911 913 : "memory"\ 912 914 );\ 913 915 }\ … … 985 987 "paddw %%mm5, %%mm5 \n\t" /* 2b */\ 986 988 "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ 987 989 "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ 988 "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\990 "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\ 989 991 "paddw %%mm4, %%mm0 \n\t" /* a */\ 990 992 "paddw %%mm1, %%mm5 \n\t" /* d */\ 991 "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\993 "pmullw %7, %%mm0 \n\t" /* 20a */\ 992 994 "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ 993 995 "paddw %6, %%mm6 \n\t"\ 994 996 "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ … … 1006 1008 "paddw %%mm5, %%mm4 \n\t" /* d */\ 1007 1009 "paddw %%mm2, %%mm2 \n\t" /* 2b */\ 1008 1010 "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\ 1009 "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\1010 "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\1011 "pmullw %7, %%mm1 \n\t" /* 20a */\ 1012 "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\ 1011 1013 "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\ 1012 1014 "paddw %6, %%mm1 \n\t"\ 1013 1015 "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\ … … 1020 1022 "decl %2 \n\t"\ 1021 1023 " jnz 1b \n\t"\ 1022 1024 : "+a"(src), "+c"(dst), "+g"(h)\ 1023 : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ 1025 : "S"((long)srcStride), "D"((long)dstStride),\ 1026 "m"(temp), "m"(ROUNDER),\ 1027 "m"(ff_pw_20), "m"(ff_pw_3)\ 1024 1028 : "memory"\ 1025 1029 );\ 1026 1030 }\ … … 1099 1103 "movq 8(%0), %%mm1 \n\t"\ 1100 1104 "movq 16(%0), %%mm2 \n\t"\ 1101 1105 "movq 24(%0), %%mm3 \n\t"\ 1102 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\1103 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\1106 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ 1107 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ 1104 1108 "add %4, %1 \n\t"\ 1105 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\1109 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ 1106 1110 \ 1107 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\1111 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ 1108 1112 "add %4, %1 \n\t"\ 1109 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\1110 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\1113 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ 1114 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ 1111 1115 "add %4, %1 \n\t"\ 1112 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\1113 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\1116 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ 1117 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ 1114 1118 "add %4, %1 \n\t"\ 1115 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\1116 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\1119 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ 1120 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ 1117 1121 "add %4, %1 \n\t"\ 1118 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\1119 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\1122 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ 1123 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ 1120 1124 "add %4, %1 \n\t"\ 1121 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\1125 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ 1122 1126 \ 1123 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\1127 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ 1124 1128 "add %4, %1 \n\t" \ 1125 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\1126 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\1129 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ 1130 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ 1127 1131 \ 1128 1132 "add $136, %0 \n\t"\ 1129 1133 "add %6, %1 \n\t"\ … … 1131 1135 " jnz 1b \n\t"\ 1132 1136 \ 1133 1137 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ 1134 : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\ 1138 : "r"((long)dstStride), "r"(2*(long)dstStride),\ 1139 "m"(ROUNDER), "g"(4-14*(long)dstStride),\ 1140 "m"(ff_pw_20), "m"(ff_pw_3)\ 1135 1141 :"memory"\ 1136 1142 );\ 1137 1143 }\ … … 1171 1177 "movq 8(%0), %%mm1 \n\t"\ 1172 1178 "movq 16(%0), %%mm2 \n\t"\ 1173 1179 "movq 24(%0), %%mm3 \n\t"\ 1174 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\1175 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\1180 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ 1181 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ 1176 1182 "add %4, %1 \n\t"\ 1177 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\1183 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ 1178 1184 \ 1179 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\1185 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ 1180 1186 "add %4, %1 \n\t"\ 1181 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, % 5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\1187 QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ 1182 1188 \ 1183 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, % 5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\1189 QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ 1184 1190 "add %4, %1 \n\t"\ 1185 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, % 5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\1186 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, % 5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\1191 QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ 1192 QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ 1187 1193 \ 1188 1194 "add $72, %0 \n\t"\ 1189 1195 "add %6, %1 \n\t"\ … … 1191 1197 " jnz 1b \n\t"\ 1192 1198 \ 1193 1199 : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ 1194 : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\ 1200 : "r"((long)dstStride), "r"(2*(long)dstStride),\ 1201 "m"(ROUNDER), "g"(4-6*(long)dstStride),\ 1202 "m"(ff_pw_20), "m"(ff_pw_3)\ 1195 1203 : "memory"\ 1196 1204 );\ 1197 1205 }\ -
ffmpeg/libavcodec/i386/h264dsp_mmx.c
old new 417 417 // in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) 418 418 // out: mm1=p0' mm2=q0' 419 419 // clobbers: mm0,3-6 420 #define H264_DEBLOCK_P0_Q0(pb_01, pb_3 f)\420 #define H264_DEBLOCK_P0_Q0(pb_01, pb_3, pb_a1)\ 421 421 "movq %%mm1 , %%mm5 \n\t"\ 422 422 "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\ 423 423 "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\ 424 424 "pcmpeqb %%mm4 , %%mm4 \n\t"\ 425 425 "pxor %%mm4 , %%mm3 \n\t"\ 426 426 "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\ 427 "pavgb " MANGLE(ff_pb_3)", %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\427 "pavgb "#pb_3" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\ 428 428 "pxor %%mm1 , %%mm4 \n\t"\ 429 429 "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\ 430 430 "pavgb %%mm5 , %%mm3 \n\t"\ 431 431 "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\ 432 "movq " MANGLE(ff_pb_A1)", %%mm6 \n\t"\432 "movq "#pb_a1" , %%mm6 \n\t"\ 433 433 "psubusb %%mm3 , %%mm6 \n\t"\ 434 "psubusb " MANGLE(ff_pb_A1)", %%mm3 \n\t"\434 "psubusb "#pb_a1" , %%mm3 \n\t"\ 435 435 "pminub %%mm7 , %%mm6 \n\t"\ 436 436 "pminub %%mm7 , %%mm3 \n\t"\ 437 437 "psubusb %%mm6 , %%mm1 \n\t"\ … … 498 498 H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) 499 499 500 500 /* filter p0, q0 */ 501 H264_DEBLOCK_P0_Q0(%8, unused)501 H264_DEBLOCK_P0_Q0(%8, %9, %10) 502 502 "movq %%mm1, (%1,%3,2) \n\t" 503 503 "movq %%mm2, (%2) \n\t" 504 504 505 505 : "=m"(*tmp0) 506 506 : "r"(pix-3*stride), "r"(pix), "r"((long)stride), 507 507 "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1), 508 "m"(ff_bone) 508 "m"(ff_bone), "m" (ff_pb_3), "m" (ff_pb_A1) 509 509 ); 510 510 } 511 511 … … 546 546 "movd %3, %%mm6 \n\t" 547 547 "punpcklbw %%mm6, %%mm6 \n\t" 548 548 "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask 549 H264_DEBLOCK_P0_Q0(%6, %7 )549 H264_DEBLOCK_P0_Q0(%6, %7, %8) 550 550 "movq %%mm1, (%0,%2) \n\t" 551 551 "movq %%mm2, (%1) \n\t" 552 552 553 553 :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), 554 554 "r"(*(uint32_t*)tc0), 555 "m"(alpha1), "m"(beta1), "m"(ff_bone), "m" (ff_pb_3F)555 "m"(alpha1), "m"(beta1), "m"(ff_bone), "m" (ff_pb_3), "m" (ff_pb_A1) 556 556 ); 557 557 } 558 558 -
ffmpeg/libavcodec/i386/simple_idct_mmx.c
old new 363 363 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ 364 364 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ 365 365 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ 366 "movq "MANGLE(wm1010)", %%mm4\n\t"\366 "movq %3, %%mm4 \n\t"\ 367 367 "pand %%mm0, %%mm4 \n\t"\ 368 368 "por %%mm1, %%mm4 \n\t"\ 369 369 "por %%mm2, %%mm4 \n\t"\ … … 437 437 "jmp 2f \n\t"\ 438 438 "1: \n\t"\ 439 439 "pslld $16, %%mm0 \n\t"\ 440 "#paddd "MANGLE(d40000)", %%mm0\n\t"\440 "#paddd %4, %%mm0 \n\t"\ 441 441 "psrad $13, %%mm0 \n\t"\ 442 442 "packssdw %%mm0, %%mm0 \n\t"\ 443 443 "movq %%mm0, " #dst " \n\t"\ … … 471 471 "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ 472 472 "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ 473 473 "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ 474 "movq "MANGLE(wm1010)", %%mm4\n\t"\474 "movq %3, %%mm4 \n\t"\ 475 475 "pand %%mm0, %%mm4 \n\t"\ 476 476 "por %%mm1, %%mm4 \n\t"\ 477 477 "por %%mm2, %%mm4 \n\t"\ … … 545 545 "jmp 2f \n\t"\ 546 546 "1: \n\t"\ 547 547 "pslld $16, %%mm0 \n\t"\ 548 "paddd "MANGLE(d40000)", %%mm0\n\t"\548 "paddd %4, %%mm0 \n\t"\ 549 549 "psrad $13, %%mm0 \n\t"\ 550 550 "packssdw %%mm0, %%mm0 \n\t"\ 551 551 "movq %%mm0, " #dst " \n\t"\ … … 1270 1270 */ 1271 1271 1272 1272 "9: \n\t" 1273 :: "r" (block), "r" (temp), "r" (coeffs) 1273 :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000) 1274 1274 : "%eax" 1275 1275 ); 1276 1276 } -
ffmpeg/libavcodec/i386/vc1dsp_mmx.c
old new 77 77 asm volatile( 78 78 "mov $3, %%"REG_c" \n\t" 79 79 LOAD_ROUNDER_MMX("%5") 80 "movq "MANGLE(ff_pw_9)", %%mm6 \n\t"80 "movq %7, %%mm6 \n\t" 81 81 "1: \n\t" 82 82 "movd (%0), %%mm2 \n\t" 83 83 "add %2, %0 \n\t" … … 98 98 "jnz 1b \n\t" 99 99 : "+r"(src), "+r"(dst) 100 100 : "r"(stride), "r"(-2*stride), 101 "m"(shift), "m"(rnd), "r"(9*stride-4) 101 "m"(shift), "m"(rnd), "r"(9*stride-4), "m"(ff_pw_9) 102 102 : "%"REG_c, "memory" 103 103 ); 104 104 } … … 116 116 rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */ 117 117 asm volatile( 118 118 LOAD_ROUNDER_MMX("%4") 119 "movq "MANGLE(ff_pw_128)", %%mm6\n\t"120 "movq "MANGLE(ff_pw_9)", %%mm5 \n\t"119 "movq %5, %%mm6\n\t" 120 "movq %6, %%mm5 \n\t" 121 121 "1: \n\t" 122 122 "movq 2*0+0(%1), %%mm1 \n\t" 123 123 "movq 2*0+8(%1), %%mm2 \n\t" … … 141 141 "decl %0 \n\t" 142 142 "jnz 1b \n\t" 143 143 : "+r"(h), "+r" (src), "+r" (dst) 144 : "r"(stride), "m"(rnd) 144 : "r"(stride), "m"(rnd), "m"(ff_pw_128), "m"(ff_pw_9) 145 145 : "memory" 146 146 ); 147 147 } … … 158 158 asm volatile( 159 159 "mov $8, %%"REG_c" \n\t" 160 160 LOAD_ROUNDER_MMX("%5") 161 "movq "MANGLE(ff_pw_9)", %%mm6\n\t"161 "movq %7, %%mm6\n\t" 162 162 "1: \n\t" 163 163 "movd 0(%0 ), %%mm3 \n\t" 164 164 "movd 4(%0 ), %%mm4 \n\t" … … 194 194 "jnz 1b \n\t" 195 195 : "+r"(src), "+r"(dst) 196 196 : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd), 197 "g"(stride-offset) 197 "g"(stride-offset), "m"(ff_pw_9) 198 198 : "%"REG_c, "memory" 199 199 ); 200 200 }
Note: See TracBrowser for help on using the browser.
