| | 212 | #if 0 |
|---|
| | 213 | asm volatile( |
|---|
| | 214 | "\n0: \n\t" |
|---|
| | 215 | "movdqu (%4), %%xmm4 \n\t" |
|---|
| | 216 | "movdqu (%5), %%xmm5 \n\t" |
|---|
| | 217 | "addl $16, %4 \n\t" |
|---|
| | 218 | "addl $16, %5 \n\t" |
|---|
| | 219 | "movdqa (%2), %%xmm0 \n\t" |
|---|
| | 220 | "movdqa 16(%2), %%xmm2 \n\t" |
|---|
| | 221 | "addl $32, %2 \n\t" |
|---|
| | 222 | "movdqa (%3), %%xmm1 \n\t" |
|---|
| | 223 | "movdqa 16(%3), %%xmm3 \n\t" |
|---|
| | 224 | "addl $32, %3 \n\t" |
|---|
| | 225 | "movdqa %%xmm4, %%xmm6 \n\t" |
|---|
| | 226 | "punpcklbw %%xmm5, %%xmm4 \n\t" /*chroma_l*/ |
|---|
| | 227 | "punpckhbw %%xmm5, %%xmm6 \n\t" /*chroma_h*/ |
|---|
| | 228 | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| | 229 | "punpcklbw %%xmm0, %%xmm5 \n\t" |
|---|
| | 230 | "movntdq %%xmm5, (%0) \n\t" /*ov[x4]*/ |
|---|
| | 231 | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| | 232 | "punpckhbw %%xmm0, %%xmm5 \n\t" |
|---|
| | 233 | "movntdq %%xmm5, 16(%0) \n\t" /*ov[x4+1]*/ |
|---|
| | 234 | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| | 235 | "punpcklbw %%xmm2, %%xmm5 \n\t" |
|---|
| | 236 | "movntdq %%xmm5, 32(%0) \n\t" /*ov[x4+2]*/ |
|---|
| | 237 | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| | 238 | "punpckhbw %%xmm2, %%xmm5 \n\t" |
|---|
| | 239 | "movntdq %%xmm5, 48(%0) \n\t" /*ov[x4+3]*/ |
|---|
| | 240 | "addl $64, %0 \n\t" |
|---|
| | 241 | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| | 242 | "punpcklbw %%xmm1, %%xmm5 \n\t" |
|---|
| | 243 | "movntdq %%xmm5, (%1) \n\t" /*ov2[x4]*/ |
|---|
| | 244 | "punpckhbw %%xmm1, %%xmm4 \n\t" |
|---|
| | 245 | "movntdq %%xmm4, 16(%1) \n\t" /*ov2[x4+1]*/ |
|---|
| | 246 | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| | 247 | "punpcklbw %%xmm3, %%xmm5 \n\t" |
|---|
| | 248 | "movntdq %%xmm5, 32(%1) \n\t" /*ov2[x4+2]*/ |
|---|
| | 249 | "punpckhbw %%xmm3, %%xmm6 \n\t" |
|---|
| | 250 | "movntdq %%xmm6, 48(%1) \n\t" /*ov2[x4+3]*/ |
|---|
| | 251 | "addl $64, %1 \n\t" |
|---|
| | 252 | "dec %6 \n\t" |
|---|
| | 253 | "jnz 0b \n\t" |
|---|
| | 254 | : |
|---|
| | 255 | : "r" (ov), "r" (ov2), |
|---|
| | 256 | "r" (yv), "r" (yv2), "r" (uv), "r" (vv), |
|---|
| | 257 | "c" (vWidth) |
|---|
| | 258 | ); |
|---|
| | 259 | #else |
|---|
| 215 | | #if 1 |
|---|
| 216 | | asm volatile( |
|---|
| 217 | | "movdqu %4, %%xmm4 \n\t" |
|---|
| 218 | | "movdqu %5, %%xmm5 \n\t" |
|---|
| 219 | | "movdqa %2, %%xmm0 \n\t" |
|---|
| 220 | | "movdqa 16+1*%2, %%xmm2 \n\t" |
|---|
| 221 | | "movdqa %3, %%xmm1 \n\t" |
|---|
| 222 | | "movdqa 16+1*%3, %%xmm3 \n\t" |
|---|
| 223 | | "movdqa %%xmm4, %%xmm6 \n\t" |
|---|
| 224 | | "punpcklbw %%xmm5, %%xmm4 \n\t" /*chroma_l*/ |
|---|
| 225 | | "punpckhbw %%xmm5, %%xmm6 \n\t" /*chroma_h*/ |
|---|
| 226 | | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| 227 | | "punpcklbw %%xmm0, %%xmm5 \n\t" |
|---|
| 228 | | "movntdq %%xmm5, %0 \n\t" /*ov[x4]*/ |
|---|
| 229 | | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| 230 | | "punpckhbw %%xmm0, %%xmm5 \n\t" |
|---|
| 231 | | "movntdq %%xmm5, 16+1*%0 \n\t" /*ov[x4+1]*/ |
|---|
| 232 | | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| 233 | | "punpcklbw %%xmm2, %%xmm5 \n\t" |
|---|
| 234 | | "movntdq %%xmm5, 32+1*%0 \n\t" /*ov[x4+2]*/ |
|---|
| 235 | | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| 236 | | "punpckhbw %%xmm2, %%xmm5 \n\t" |
|---|
| 237 | | "movntdq %%xmm5, 48+1*%0 \n\t" /*ov[x4+3]*/ |
|---|
| 238 | | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| 239 | | "punpcklbw %%xmm1, %%xmm5 \n\t" |
|---|
| 240 | | "movntdq %%xmm5, %1 \n\t" /*ov2[x4]*/ |
|---|
| 241 | | "punpckhbw %%xmm1, %%xmm4 \n\t" |
|---|
| 242 | | "movntdq %%xmm4, 16+1*%1 \n\t" /*ov2[x4+1]*/ |
|---|
| 243 | | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| 244 | | "punpcklbw %%xmm3, %%xmm5 \n\t" |
|---|
| 245 | | "movntdq %%xmm5, 32+1*%1 \n\t" /*ov2[x4+2]*/ |
|---|
| 246 | | "punpckhbw %%xmm3, %%xmm6 \n\t" |
|---|
| 247 | | "movntdq %%xmm6, 48+1*%1 \n\t" /*ov2[x4+3]*/ |
|---|
| 248 | | : "=m" (ov[x4]), "=m" (ov2[x4]) |
|---|
| 249 | | : "m" (yv[x2]), "m" (yv2[x2]), "m" (uv[x]), "m" (vv[x]) |
|---|
| 250 | | ); |
|---|
| 251 | | #else |
|---|