| 1 | | diff -ruN ffmpeg/patched ffmpeg-/patched |
|---|
| 2 | | --- ffmpeg/patched 1969-12-31 19:00:00.000000000 -0500 |
|---|
| 3 | | +++ ffmpeg-/patched 2008-02-09 14:41:01.000000000 -0500 |
|---|
| 4 | | @@ -0,0 +1 @@ |
|---|
| 5 | | + |
|---|
| | 1 | Index: ffmpeg/libavcodec/i386/motion_est_mmx.c |
|---|
| | 2 | =================================================================== |
|---|
| | 3 | --- ffmpeg/libavcodec/i386/motion_est_mmx.c (revision 11959) |
|---|
| | 4 | +++ ffmpeg/libavcodec/i386/motion_est_mmx.c (working copy) |
|---|
| | 5 | @@ -167,7 +167,7 @@ |
|---|
| | 6 | static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
|---|
| | 7 | { |
|---|
| | 8 | asm volatile( |
|---|
| | 9 | - "movq "MANGLE(bone)", %%mm5 \n\t" |
|---|
| | 10 | + "movq %4, %%mm5 \n\t" |
|---|
| | 11 | "movq (%1), %%mm0 \n\t" |
|---|
| | 12 | "pavgb 1(%1), %%mm0 \n\t" |
|---|
| | 13 | "add %3, %1 \n\t" |
|---|
| | 14 | @@ -190,7 +190,7 @@ |
|---|
| | 15 | "sub $2, %0 \n\t" |
|---|
| | 16 | " jg 1b \n\t" |
|---|
| | 17 | : "+r" (h), "+r" (blk1), "+r" (blk2) |
|---|
| | 18 | - : "r" ((long)stride) |
|---|
| | 19 | + : "r" ((long)stride), "m" (bone) |
|---|
| | 20 | ); |
|---|
| | 21 | } |
|---|
| | 22 | |
|---|
| | 23 | @@ -258,7 +258,7 @@ |
|---|
| | 24 | "punpckhbw %%mm7, %%mm5 \n\t" |
|---|
| | 25 | "paddw %%mm4, %%mm2 \n\t" |
|---|
| | 26 | "paddw %%mm5, %%mm3 \n\t" |
|---|
| | 27 | - "movq 16+"MANGLE(round_tab)", %%mm5 \n\t" |
|---|
| | 28 | + "movq 16+%5, %%mm5 \n\t" |
|---|
| | 29 | "paddw %%mm2, %%mm0 \n\t" |
|---|
| | 30 | "paddw %%mm3, %%mm1 \n\t" |
|---|
| | 31 | "paddw %%mm5, %%mm0 \n\t" |
|---|
| | 32 | @@ -281,7 +281,7 @@ |
|---|
| | 33 | "add %4, %%"REG_a" \n\t" |
|---|
| | 34 | " js 1b \n\t" |
|---|
| | 35 | : "+a" (len) |
|---|
| | 36 | - : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride) |
|---|
| | 37 | + : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride), "m" (round_tab[0]) |
|---|
| | 38 | ); |
|---|
| | 39 | } |
|---|
| | 40 | |
|---|
| | 41 | Index: ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c |
|---|
| | 42 | =================================================================== |
|---|
| | 43 | --- ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c (revision 11959) |
|---|
| | 44 | +++ ffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c (working copy) |
|---|
| | 45 | @@ -192,8 +192,8 @@ |
|---|
| | 46 | "pxor %%mm7, %%mm7 \n\t" |
|---|
| | 47 | "movd %5, %%mm2 \n\t" |
|---|
| | 48 | "movd %6, %%mm3 \n\t" |
|---|
| | 49 | - "movq "MANGLE(ff_pw_8)", %%mm4\n\t" |
|---|
| | 50 | - "movq "MANGLE(ff_pw_8)", %%mm5\n\t" |
|---|
| | 51 | + "movq %7, %%mm4\n\t" |
|---|
| | 52 | + "movq %7, %%mm5\n\t" |
|---|
| | 53 | "punpcklwd %%mm2, %%mm2 \n\t" |
|---|
| | 54 | "punpcklwd %%mm3, %%mm3 \n\t" |
|---|
| | 55 | "punpcklwd %%mm2, %%mm2 \n\t" |
|---|
| | 56 | @@ -250,7 +250,7 @@ |
|---|
| | 57 | "sub $2, %2 \n\t" |
|---|
| | 58 | "jnz 1b \n\t" |
|---|
| | 59 | : "+r"(dst), "+r"(src), "+r"(h) |
|---|
| | 60 | - : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y) |
|---|
| | 61 | + : "r"((long)stride), "m"(ff_pw_32), "m"(x), "m"(y), "m"(ff_pw_8) |
|---|
| | 62 | ); |
|---|
| | 63 | } |
|---|
| | 64 | |
|---|
| | 65 | Index: ffmpeg/libavcodec/i386/dsputil_mmx.c |
|---|
| | 66 | =================================================================== |
|---|
| | 67 | --- ffmpeg/libavcodec/i386/dsputil_mmx.c (revision 11959) |
|---|
| | 68 | +++ ffmpeg/libavcodec/i386/dsputil_mmx.c (working copy) |
|---|
| | 69 | @@ -1927,7 +1927,7 @@ |
|---|
| | 70 | |
|---|
| | 71 | #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\ |
|---|
| | 72 | "paddw " #m4 ", " #m3 " \n\t" /* x1 */\ |
|---|
| | 73 | - "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\ |
|---|
| | 74 | + "movq "#pw_20", %%mm4 \n\t" /* 20 */\ |
|---|
| | 75 | "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\ |
|---|
| | 76 | "movq "#in7", " #m3 " \n\t" /* d */\ |
|---|
| | 77 | "movq "#in0", %%mm5 \n\t" /* D */\ |
|---|
| | 78 | @@ -1939,7 +1939,7 @@ |
|---|
| | 79 | "paddw " #m5 ", %%mm6 \n\t" /* x2 */\ |
|---|
| | 80 | "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\ |
|---|
| | 81 | "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\ |
|---|
| | 82 | - "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\ |
|---|
| | 83 | + "pmullw "#pw_3", %%mm5 \n\t" /* -6x2 + 3x3 */\ |
|---|
| | 84 | "paddw " #rnd ", %%mm4 \n\t" /* x2 */\ |
|---|
| | 85 | "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\ |
|---|
| | 86 | "psraw $5, %%mm5 \n\t"\ |
|---|
| | 87 | @@ -1973,10 +1973,10 @@ |
|---|
| | 88 | "paddw %%mm5, %%mm5 \n\t" /* 2b */\ |
|---|
| | 89 | "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ |
|---|
| | 90 | "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ |
|---|
| | 91 | - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\ |
|---|
| | 92 | + "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\ |
|---|
| | 93 | "paddw %%mm4, %%mm0 \n\t" /* a */\ |
|---|
| | 94 | "paddw %%mm1, %%mm5 \n\t" /* d */\ |
|---|
| | 95 | - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\ |
|---|
| | 96 | + "pmullw %7, %%mm0 \n\t" /* 20a */\ |
|---|
| | 97 | "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ |
|---|
| | 98 | "paddw %6, %%mm6 \n\t"\ |
|---|
| | 99 | "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
|---|
| | 100 | @@ -1999,10 +1999,10 @@ |
|---|
| | 101 | "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\ |
|---|
| | 102 | "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\ |
|---|
| | 103 | "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\ |
|---|
| | 104 | - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\ |
|---|
| | 105 | + "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\ |
|---|
| | 106 | "paddw %%mm2, %%mm1 \n\t" /* a */\ |
|---|
| | 107 | "paddw %%mm6, %%mm4 \n\t" /* d */\ |
|---|
| | 108 | - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\ |
|---|
| | 109 | + "pmullw %7, %%mm1 \n\t" /* 20a */\ |
|---|
| | 110 | "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\ |
|---|
| | 111 | "paddw %6, %%mm1 \n\t"\ |
|---|
| | 112 | "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\ |
|---|
| | 113 | @@ -2025,7 +2025,7 @@ |
|---|
| | 114 | "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\ |
|---|
| | 115 | "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\ |
|---|
| | 116 | "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\ |
|---|
| | 117 | - "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\ |
|---|
| | 118 | + "pmullw %8, %%mm0 \n\t" /* 3c - 6b */\ |
|---|
| | 119 | "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\ |
|---|
| | 120 | "paddw %%mm3, %%mm2 \n\t" /* d */\ |
|---|
| | 121 | "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\ |
|---|
| | 122 | @@ -2033,7 +2033,7 @@ |
|---|
| | 123 | "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\ |
|---|
| | 124 | "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\ |
|---|
| | 125 | "paddw %%mm2, %%mm6 \n\t" /* a */\ |
|---|
| | 126 | - "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\ |
|---|
| | 127 | + "pmullw %7, %%mm6 \n\t" /* 20a */\ |
|---|
| | 128 | "paddw %6, %%mm0 \n\t"\ |
|---|
| | 129 | "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
|---|
| | 130 | "psraw $5, %%mm0 \n\t"\ |
|---|
| | 131 | @@ -2048,8 +2048,8 @@ |
|---|
| | 132 | "paddw %%mm2, %%mm5 \n\t" /* d */\ |
|---|
| | 133 | "paddw %%mm6, %%mm6 \n\t" /* 2b */\ |
|---|
| | 134 | "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\ |
|---|
| | 135 | - "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\ |
|---|
| | 136 | - "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\ |
|---|
| | 137 | + "pmullw %7, %%mm3 \n\t" /* 20a */\ |
|---|
| | 138 | + "pmullw %8, %%mm4 \n\t" /* 3c - 6b */\ |
|---|
| | 139 | "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\ |
|---|
| | 140 | "paddw %6, %%mm4 \n\t"\ |
|---|
| | 141 | "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\ |
|---|
| | 142 | @@ -2062,7 +2062,9 @@ |
|---|
| | 143 | "decl %2 \n\t"\ |
|---|
| | 144 | " jnz 1b \n\t"\ |
|---|
| | 145 | : "+a"(src), "+c"(dst), "+g"(h)\ |
|---|
| | 146 | - : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ |
|---|
| | 147 | + : "d"((long)srcStride), "S"((long)dstStride),\ |
|---|
| | 148 | + "m"(temp), "m"(ROUNDER),\ |
|---|
| | 149 | + "m"(ff_pw_20), "m"(ff_pw_3)\ |
|---|
| | 150 | : "memory"\ |
|---|
| | 151 | );\ |
|---|
| | 152 | }\ |
|---|
| | 153 | @@ -2140,10 +2142,10 @@ |
|---|
| | 154 | "paddw %%mm5, %%mm5 \n\t" /* 2b */\ |
|---|
| | 155 | "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\ |
|---|
| | 156 | "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\ |
|---|
| | 157 | - "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\ |
|---|
| | 158 | + "pmullw %8, %%mm6 \n\t" /* 3c - 6b */\ |
|---|
| | 159 | "paddw %%mm4, %%mm0 \n\t" /* a */\ |
|---|
| | 160 | "paddw %%mm1, %%mm5 \n\t" /* d */\ |
|---|
| | 161 | - "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\ |
|---|
| | 162 | + "pmullw %7, %%mm0 \n\t" /* 20a */\ |
|---|
| | 163 | "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\ |
|---|
| | 164 | "paddw %6, %%mm6 \n\t"\ |
|---|
| | 165 | "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\ |
|---|
| | 166 | @@ -2161,8 +2163,8 @@ |
|---|
| | 167 | "paddw %%mm5, %%mm4 \n\t" /* d */\ |
|---|
| | 168 | "paddw %%mm2, %%mm2 \n\t" /* 2b */\ |
|---|
| | 169 | "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\ |
|---|
| | 170 | - "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\ |
|---|
| | 171 | - "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\ |
|---|
| | 172 | + "pmullw %7, %%mm1 \n\t" /* 20a */\ |
|---|
| | 173 | + "pmullw %8, %%mm3 \n\t" /* 3c - 6b */\ |
|---|
| | 174 | "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\ |
|---|
| | 175 | "paddw %6, %%mm1 \n\t"\ |
|---|
| | 176 | "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\ |
|---|
| | 177 | @@ -2175,7 +2177,9 @@ |
|---|
| | 178 | "decl %2 \n\t"\ |
|---|
| | 179 | " jnz 1b \n\t"\ |
|---|
| | 180 | : "+a"(src), "+c"(dst), "+g"(h)\ |
|---|
| | 181 | - : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\ |
|---|
| | 182 | + : "S"((long)srcStride), "D"((long)dstStride),\ |
|---|
| | 183 | + "m"(temp), "m"(ROUNDER),\ |
|---|
| | 184 | + "m"(ff_pw_20), "m"(ff_pw_3)\ |
|---|
| | 185 | : "memory"\ |
|---|
| | 186 | );\ |
|---|
| | 187 | }\ |
|---|
| | 188 | @@ -2254,31 +2258,31 @@ |
|---|
| | 189 | "movq 8(%0), %%mm1 \n\t"\ |
|---|
| | 190 | "movq 16(%0), %%mm2 \n\t"\ |
|---|
| | 191 | "movq 24(%0), %%mm3 \n\t"\ |
|---|
| | 192 | - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
|---|
| | 193 | - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
|---|
| | 194 | + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
|---|
| | 195 | + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
|---|
| | 196 | "add %4, %1 \n\t"\ |
|---|
| | 197 | - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
|---|
| | 198 | + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
|---|
| | 199 | \ |
|---|
| | 200 | - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
|---|
| | 201 | + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
|---|
| | 202 | "add %4, %1 \n\t"\ |
|---|
| | 203 | - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
|---|
| | 204 | - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ |
|---|
| | 205 | + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
|---|
| | 206 | + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\ |
|---|
| | 207 | "add %4, %1 \n\t"\ |
|---|
| | 208 | - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ |
|---|
| | 209 | - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ |
|---|
| | 210 | + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\ |
|---|
| | 211 | + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\ |
|---|
| | 212 | "add %4, %1 \n\t"\ |
|---|
| | 213 | - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ |
|---|
| | 214 | - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ |
|---|
| | 215 | + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\ |
|---|
| | 216 | + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\ |
|---|
| | 217 | "add %4, %1 \n\t"\ |
|---|
| | 218 | - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ |
|---|
| | 219 | - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ |
|---|
| | 220 | + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\ |
|---|
| | 221 | + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\ |
|---|
| | 222 | "add %4, %1 \n\t"\ |
|---|
| | 223 | - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ |
|---|
| | 224 | + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\ |
|---|
| | 225 | \ |
|---|
| | 226 | - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ |
|---|
| | 227 | + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\ |
|---|
| | 228 | "add %4, %1 \n\t" \ |
|---|
| | 229 | - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ |
|---|
| | 230 | - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ |
|---|
| | 231 | + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\ |
|---|
| | 232 | + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\ |
|---|
| | 233 | \ |
|---|
| | 234 | "add $136, %0 \n\t"\ |
|---|
| | 235 | "add %6, %1 \n\t"\ |
|---|
| | 236 | @@ -2286,7 +2290,9 @@ |
|---|
| | 237 | " jnz 1b \n\t"\ |
|---|
| | 238 | \ |
|---|
| | 239 | : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ |
|---|
| | 240 | - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\ |
|---|
| | 241 | + : "r"((long)dstStride), "r"(2*(long)dstStride),\ |
|---|
| | 242 | + "m"(ROUNDER), "g"(4-14*(long)dstStride),\ |
|---|
| | 243 | + "m"(ff_pw_20), "m"(ff_pw_3)\ |
|---|
| | 244 | :"memory"\ |
|---|
| | 245 | );\ |
|---|
| | 246 | }\ |
|---|
| | 247 | @@ -2326,19 +2332,19 @@ |
|---|
| | 248 | "movq 8(%0), %%mm1 \n\t"\ |
|---|
| | 249 | "movq 16(%0), %%mm2 \n\t"\ |
|---|
| | 250 | "movq 24(%0), %%mm3 \n\t"\ |
|---|
| | 251 | - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
|---|
| | 252 | - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
|---|
| | 253 | + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\ |
|---|
| | 254 | + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\ |
|---|
| | 255 | "add %4, %1 \n\t"\ |
|---|
| | 256 | - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
|---|
| | 257 | + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\ |
|---|
| | 258 | \ |
|---|
| | 259 | - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
|---|
| | 260 | + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\ |
|---|
| | 261 | "add %4, %1 \n\t"\ |
|---|
| | 262 | - QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
|---|
| | 263 | + QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %7, %8, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\ |
|---|
| | 264 | \ |
|---|
| | 265 | - QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ |
|---|
| | 266 | + QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %7, %8, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\ |
|---|
| | 267 | "add %4, %1 \n\t"\ |
|---|
| | 268 | - QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ |
|---|
| | 269 | - QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ |
|---|
| | 270 | + QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %7, %8, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\ |
|---|
| | 271 | + QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %7, %8, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\ |
|---|
| | 272 | \ |
|---|
| | 273 | "add $72, %0 \n\t"\ |
|---|
| | 274 | "add %6, %1 \n\t"\ |
|---|
| | 275 | @@ -2346,7 +2352,9 @@ |
|---|
| | 276 | " jnz 1b \n\t"\ |
|---|
| | 277 | \ |
|---|
| | 278 | : "+r"(temp_ptr), "+r"(dst), "+g"(count)\ |
|---|
| | 279 | - : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\ |
|---|
| | 280 | + : "r"((long)dstStride), "r"(2*(long)dstStride),\ |
|---|
| | 281 | + "m"(ROUNDER), "g"(4-6*(long)dstStride),\ |
|---|
| | 282 | + "m"(ff_pw_20), "m"(ff_pw_3)\ |
|---|
| | 283 | : "memory"\ |
|---|
| | 284 | );\ |
|---|
| | 285 | }\ |
|---|
| | 286 | Index: ffmpeg/libavcodec/i386/h264dsp_mmx.c |
|---|
| | 287 | =================================================================== |
|---|
| | 288 | --- ffmpeg/libavcodec/i386/h264dsp_mmx.c (revision 11959) |
|---|
| | 289 | +++ ffmpeg/libavcodec/i386/h264dsp_mmx.c (working copy) |
|---|
| | 290 | @@ -417,21 +417,21 @@ |
|---|
| | 291 | // in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) |
|---|
| | 292 | // out: mm1=p0' mm2=q0' |
|---|
| | 293 | // clobbers: mm0,3-6 |
|---|
| | 294 | -#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\ |
|---|
| | 295 | +#define H264_DEBLOCK_P0_Q0(pb_01, pb_3, pb_a1)\ |
|---|
| | 296 | "movq %%mm1 , %%mm5 \n\t"\ |
|---|
| | 297 | "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\ |
|---|
| | 298 | "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\ |
|---|
| | 299 | "pcmpeqb %%mm4 , %%mm4 \n\t"\ |
|---|
| | 300 | "pxor %%mm4 , %%mm3 \n\t"\ |
|---|
| | 301 | "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\ |
|---|
| | 302 | - "pavgb "MANGLE(ff_pb_3)" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\ |
|---|
| | 303 | + "pavgb "#pb_3" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\ |
|---|
| | 304 | "pxor %%mm1 , %%mm4 \n\t"\ |
|---|
| | 305 | "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\ |
|---|
| | 306 | "pavgb %%mm5 , %%mm3 \n\t"\ |
|---|
| | 307 | "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\ |
|---|
| | 308 | - "movq "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\ |
|---|
| | 309 | + "movq "#pb_a1" , %%mm6 \n\t"\ |
|---|
| | 310 | "psubusb %%mm3 , %%mm6 \n\t"\ |
|---|
| | 311 | - "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\ |
|---|
| | 312 | + "psubusb "#pb_a1" , %%mm3 \n\t"\ |
|---|
| | 313 | "pminub %%mm7 , %%mm6 \n\t"\ |
|---|
| | 314 | "pminub %%mm7 , %%mm3 \n\t"\ |
|---|
| | 315 | "psubusb %%mm6 , %%mm1 \n\t"\ |
|---|
| | 316 | @@ -498,14 +498,14 @@ |
|---|
| | 317 | H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6) |
|---|
| | 318 | |
|---|
| | 319 | /* filter p0, q0 */ |
|---|
| | 320 | - H264_DEBLOCK_P0_Q0(%8, unused) |
|---|
| | 321 | + H264_DEBLOCK_P0_Q0(%8, %9, %10) |
|---|
| | 322 | "movq %%mm1, (%1,%3,2) \n\t" |
|---|
| | 323 | "movq %%mm2, (%2) \n\t" |
|---|
| | 324 | |
|---|
| | 325 | : "=m"(*tmp0) |
|---|
| | 326 | : "r"(pix-3*stride), "r"(pix), "r"((long)stride), |
|---|
| | 327 | "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1), |
|---|
| | 328 | - "m"(ff_bone) |
|---|
| | 329 | + "m"(ff_bone), "m" (ff_pb_3), "m" (ff_pb_A1) |
|---|
| | 330 | ); |
|---|
| | 331 | } |
|---|
| | 332 | |
|---|
| | 333 | @@ -546,13 +546,13 @@ |
|---|
| | 334 | "movd %3, %%mm6 \n\t" |
|---|
| | 335 | "punpcklbw %%mm6, %%mm6 \n\t" |
|---|
| | 336 | "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask |
|---|
| | 337 | - H264_DEBLOCK_P0_Q0(%6, %7) |
|---|
| | 338 | + H264_DEBLOCK_P0_Q0(%6, %7, %8) |
|---|
| | 339 | "movq %%mm1, (%0,%2) \n\t" |
|---|
| | 340 | "movq %%mm2, (%1) \n\t" |
|---|
| | 341 | |
|---|
| | 342 | :: "r"(pix-2*stride), "r"(pix), "r"((long)stride), |
|---|
| | 343 | "r"(*(uint32_t*)tc0), |
|---|
| | 344 | - "m"(alpha1), "m"(beta1), "m"(ff_bone), "m"(ff_pb_3F) |
|---|
| | 345 | + "m"(alpha1), "m"(beta1), "m"(ff_bone), "m" (ff_pb_3), "m" (ff_pb_A1) |
|---|
| | 346 | ); |
|---|
| | 347 | } |
|---|
| | 348 | |
|---|
| | 349 | Index: ffmpeg/libavcodec/i386/simple_idct_mmx.c |
|---|
| | 350 | =================================================================== |
|---|
| | 351 | --- ffmpeg/libavcodec/i386/simple_idct_mmx.c (revision 11959) |
|---|
| | 352 | +++ ffmpeg/libavcodec/i386/simple_idct_mmx.c (working copy) |
|---|
| | 353 | @@ -363,7 +363,7 @@ |
|---|
| | 354 | "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ |
|---|
| | 355 | "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ |
|---|
| | 356 | "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ |
|---|
| | 357 | - "movq "MANGLE(wm1010)", %%mm4 \n\t"\ |
|---|
| | 358 | + "movq %3, %%mm4 \n\t"\ |
|---|
| | 359 | "pand %%mm0, %%mm4 \n\t"\ |
|---|
| | 360 | "por %%mm1, %%mm4 \n\t"\ |
|---|
| | 361 | "por %%mm2, %%mm4 \n\t"\ |
|---|
| | 362 | @@ -437,7 +437,7 @@ |
|---|
| | 363 | "jmp 2f \n\t"\ |
|---|
| | 364 | "1: \n\t"\ |
|---|
| | 365 | "pslld $16, %%mm0 \n\t"\ |
|---|
| | 366 | - "#paddd "MANGLE(d40000)", %%mm0 \n\t"\ |
|---|
| | 367 | + "#paddd %4, %%mm0 \n\t"\ |
|---|
| | 368 | "psrad $13, %%mm0 \n\t"\ |
|---|
| | 369 | "packssdw %%mm0, %%mm0 \n\t"\ |
|---|
| | 370 | "movq %%mm0, " #dst " \n\t"\ |
|---|
| | 371 | @@ -471,7 +471,7 @@ |
|---|
| | 372 | "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\ |
|---|
| | 373 | "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\ |
|---|
| | 374 | "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\ |
|---|
| | 375 | - "movq "MANGLE(wm1010)", %%mm4 \n\t"\ |
|---|
| | 376 | + "movq %3, %%mm4 \n\t"\ |
|---|
| | 377 | "pand %%mm0, %%mm4 \n\t"\ |
|---|
| | 378 | "por %%mm1, %%mm4 \n\t"\ |
|---|
| | 379 | "por %%mm2, %%mm4 \n\t"\ |
|---|
| | 380 | @@ -545,7 +545,7 @@ |
|---|
| | 381 | "jmp 2f \n\t"\ |
|---|
| | 382 | "1: \n\t"\ |
|---|
| | 383 | "pslld $16, %%mm0 \n\t"\ |
|---|
| | 384 | - "paddd "MANGLE(d40000)", %%mm0 \n\t"\ |
|---|
| | 385 | + "paddd %4, %%mm0 \n\t"\ |
|---|
| | 386 | "psrad $13, %%mm0 \n\t"\ |
|---|
| | 387 | "packssdw %%mm0, %%mm0 \n\t"\ |
|---|
| | 388 | "movq %%mm0, " #dst " \n\t"\ |
|---|
| | 389 | @@ -1270,7 +1270,7 @@ |
|---|
| | 390 | */ |
|---|
| | 391 | |
|---|
| | 392 | "9: \n\t" |
|---|
| | 393 | - :: "r" (block), "r" (temp), "r" (coeffs) |
|---|
| | 394 | + :: "r" (block), "r" (temp), "r" (coeffs), "m" (wm1010), "m"(d40000) |
|---|
| | 395 | : "%eax" |
|---|
| | 396 | ); |
|---|
| | 397 | } |
|---|
| | 398 | Index: ffmpeg/libavcodec/i386/vc1dsp_mmx.c |
|---|
| | 399 | =================================================================== |
|---|
| | 400 | --- ffmpeg/libavcodec/i386/vc1dsp_mmx.c (revision 11959) |
|---|
| | 401 | +++ ffmpeg/libavcodec/i386/vc1dsp_mmx.c (working copy) |
|---|
| | 402 | @@ -77,7 +77,7 @@ |
|---|
| | 403 | asm volatile( |
|---|
| | 404 | "mov $3, %%"REG_c" \n\t" |
|---|
| | 405 | LOAD_ROUNDER_MMX("%5") |
|---|
| | 406 | - "movq "MANGLE(ff_pw_9)", %%mm6 \n\t" |
|---|
| | 407 | + "movq %7, %%mm6 \n\t" |
|---|
| | 408 | "1: \n\t" |
|---|
| | 409 | "movd (%0), %%mm2 \n\t" |
|---|
| | 410 | "add %2, %0 \n\t" |
|---|
| | 411 | @@ -98,7 +98,7 @@ |
|---|
| | 412 | "jnz 1b \n\t" |
|---|
| | 413 | : "+r"(src), "+r"(dst) |
|---|
| | 414 | : "r"(stride), "r"(-2*stride), |
|---|
| | 415 | - "m"(shift), "m"(rnd), "r"(9*stride-4) |
|---|
| | 416 | + "m"(shift), "m"(rnd), "r"(9*stride-4), "m"(ff_pw_9) |
|---|
| | 417 | : "%"REG_c, "memory" |
|---|
| | 418 | ); |
|---|
| | 419 | } |
|---|
| | 420 | @@ -116,8 +116,8 @@ |
|---|
| | 421 | rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */ |
|---|
| | 422 | asm volatile( |
|---|
| | 423 | LOAD_ROUNDER_MMX("%4") |
|---|
| | 424 | - "movq "MANGLE(ff_pw_128)", %%mm6\n\t" |
|---|
| | 425 | - "movq "MANGLE(ff_pw_9)", %%mm5 \n\t" |
|---|
| | 426 | + "movq %5, %%mm6\n\t" |
|---|
| | 427 | + "movq %6, %%mm5 \n\t" |
|---|
| | 428 | "1: \n\t" |
|---|
| | 429 | "movq 2*0+0(%1), %%mm1 \n\t" |
|---|
| | 430 | "movq 2*0+8(%1), %%mm2 \n\t" |
|---|
| | 431 | @@ -141,7 +141,7 @@ |
|---|
| | 432 | "decl %0 \n\t" |
|---|
| | 433 | "jnz 1b \n\t" |
|---|
| | 434 | : "+r"(h), "+r" (src), "+r" (dst) |
|---|
| | 435 | - : "r"(stride), "m"(rnd) |
|---|
| | 436 | + : "r"(stride), "m"(rnd), "m"(ff_pw_128), "m"(ff_pw_9) |
|---|
| | 437 | : "memory" |
|---|
| | 438 | ); |
|---|
| | 439 | } |
|---|
| | 440 | @@ -158,7 +158,7 @@ |
|---|
| | 441 | asm volatile( |
|---|
| | 442 | "mov $8, %%"REG_c" \n\t" |
|---|
| | 443 | LOAD_ROUNDER_MMX("%5") |
|---|
| | 444 | - "movq "MANGLE(ff_pw_9)", %%mm6\n\t" |
|---|
| | 445 | + "movq %7, %%mm6\n\t" |
|---|
| | 446 | "1: \n\t" |
|---|
| | 447 | "movd 0(%0 ), %%mm3 \n\t" |
|---|
| | 448 | "movd 4(%0 ), %%mm4 \n\t" |
|---|
| | 449 | @@ -194,7 +194,7 @@ |
|---|
| | 450 | "jnz 1b \n\t" |
|---|
| | 451 | : "+r"(src), "+r"(dst) |
|---|
| | 452 | : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd), |
|---|
| | 453 | - "g"(stride-offset) |
|---|
| | 454 | + "g"(stride-offset), "m"(ff_pw_9) |
|---|
| | 455 | : "%"REG_c, "memory" |
|---|
| | 456 | ); |
|---|
| | 457 | } |
|---|