| | 68 | |
|---|
| | 69 | static void noinline Y420toY422_lastrow(UInt8 *o, const UInt8 *yc, const UInt8 *uc, const UInt8 *vc, unsigned halfWidth) |
|---|
| | 70 | { |
|---|
| | 71 | unsigned x; |
|---|
| | 72 | for(x=0; x < halfWidth; x++) |
|---|
| | 73 | { |
|---|
| | 74 | unsigned x4 = x*4, x2 = x*2; |
|---|
| | 75 | o[x4] = uc[x]; |
|---|
| | 76 | o[x4+1] = yc[x2]; |
|---|
| | 77 | o[x4+2] = vc[x]; |
|---|
| | 78 | o[x4+3] = yc[x2+1]; |
|---|
| | 79 | } |
|---|
| | 80 | } |
|---|
| | 81 | |
|---|
| | 82 | #define HandleLastRow(o, yc, uc, vc, halfWidth, height) if (unlikely(height & 1)) Y420toY422_lastrow(o, yc, uc, vc, halfWidth) |
|---|
| | 215 | #if 0 |
|---|
| | 216 | asm volatile( |
|---|
| | 217 | "\n0: \n\t" |
|---|
| | 218 | "movdqa (%2), %%xmm0 \n\t" |
|---|
| | 219 | "movdqa 16(%2), %%xmm2 \n\t" |
|---|
| | 220 | "movdqa (%3), %%xmm1 \n\t" |
|---|
| | 221 | "movdqa 16(%3), %%xmm3 \n\t" |
|---|
| | 222 | "movdqu (%4), %%xmm4 \n\t" |
|---|
| | 223 | "movdqu (%5), %%xmm5 \n\t" |
|---|
| | 224 | "addl $32, %2 \n\t" |
|---|
| | 225 | "addl $32, %3 \n\t" |
|---|
| | 226 | "addl $16, %4 \n\t" |
|---|
| | 227 | "addl $16, %5 \n\t" |
|---|
| | 228 | "movdqa %%xmm4, %%xmm6 \n\t" |
|---|
| | 229 | "punpcklbw %%xmm5, %%xmm4 \n\t" /*chroma_l*/ |
|---|
| | 230 | "punpckhbw %%xmm5, %%xmm6 \n\t" /*chroma_h*/ |
|---|
| | 231 | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| | 232 | "punpcklbw %%xmm0, %%xmm5 \n\t" |
|---|
| | 233 | "movntdq %%xmm5, (%0) \n\t" /*ov[x4]*/ |
|---|
| | 234 | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| | 235 | "punpckhbw %%xmm0, %%xmm5 \n\t" |
|---|
| | 236 | "movntdq %%xmm5, 16(%0) \n\t" /*ov[x4+1]*/ |
|---|
| | 237 | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| | 238 | "punpcklbw %%xmm2, %%xmm5 \n\t" |
|---|
| | 239 | "movntdq %%xmm5, 32(%0) \n\t" /*ov[x4+2]*/ |
|---|
| | 240 | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| | 241 | "punpckhbw %%xmm2, %%xmm5 \n\t" |
|---|
| | 242 | "movntdq %%xmm5, 48(%0) \n\t" /*ov[x4+3]*/ |
|---|
| | 243 | "addl $64, %0 \n\t" |
|---|
| | 244 | "movdqa %%xmm4, %%xmm5 \n\t" |
|---|
| | 245 | "punpcklbw %%xmm1, %%xmm5 \n\t" |
|---|
| | 246 | "movntdq %%xmm5, (%1) \n\t" /*ov2[x4]*/ |
|---|
| | 247 | "punpckhbw %%xmm1, %%xmm4 \n\t" |
|---|
| | 248 | "movntdq %%xmm4, 16(%1) \n\t" /*ov2[x4+1]*/ |
|---|
| | 249 | "movdqa %%xmm6, %%xmm5 \n\t" |
|---|
| | 250 | "punpcklbw %%xmm3, %%xmm5 \n\t" |
|---|
| | 251 | "movntdq %%xmm5, 32(%1) \n\t" /*ov2[x4+2]*/ |
|---|
| | 252 | "punpckhbw %%xmm3, %%xmm6 \n\t" |
|---|
| | 253 | "movntdq %%xmm6, 48(%1) \n\t" /*ov2[x4+3]*/ |
|---|
| | 254 | "addl $64, %1 \n\t" |
|---|
| | 255 | "dec %6 \n\t" |
|---|
| | 256 | "jnz 0b \n\t" |
|---|
| | 257 | : "+r" (ov), "+r" (ov2), |
|---|
| | 258 | "+r" (yv), "+r" (yv2), "+r" (uv), "+r" (vv) |
|---|
| | 259 | : "r" (vWidth) |
|---|
| | 260 | ); |
|---|
| | 261 | #else |
|---|
| 223 | | } |
|---|
| 224 | | |
|---|
| 225 | | |
|---|
| 226 | | static void __attribute__((noinline)) Y420toY422_x86_scalar(UInt8 * o, unsigned outRB, unsigned width, unsigned height, AVFrame * picture) |
|---|
| | 298 | |
|---|
| | 299 | _mm_sfence(); |
|---|
| | 300 | |
|---|
| | 301 | HandleLastRow(o, yc, uc, vc, halfwidth, height); |
|---|
| | 302 | } |
|---|
| | 303 | |
|---|
| | 304 | |
|---|
| | 305 | static void noinline Y420toY422_x86_scalar(UInt8 * o, unsigned outRB, unsigned width, unsigned height, AVFrame * picture) |
|---|
| 257 | | if ((yc | picture->linesize[0]) % 16 == 0) { |
|---|
| 258 | | Y420toY422_sse2(o, outRB, width, height, picture); |
|---|
| 259 | | _mm_sfence(); |
|---|
| 260 | | } else Y420toY422_x86_scalar(o, outRB, width, height, picture); |
|---|
| | 338 | if (!unlikely((yc | picture->linesize[0]) % 16)) Y420toY422_sse2(o, outRB, width, height, picture); |
|---|
| | 339 | else Y420toY422_x86_scalar(o, outRB, width, height, picture); |
|---|
| | 342 | |
|---|
| | 343 | void YA420toV408(UInt8* o, unsigned outRB, unsigned width, unsigned height, AVFrame * picture) |
|---|
| | 344 | { |
|---|
| | 345 | UInt8 *yc = picture->data[0], *u = picture->data[1], *v = picture->data[2], *a = picture->data[3]; |
|---|
| | 346 | unsigned rY = picture->linesize[0], rU = picture->linesize[1], rV = picture->linesize[2], rA = picture->linesize[3], y, x; |
|---|
| | 347 | |
|---|
| | 348 | for (y = 0; y < height; y++) { |
|---|
| | 349 | for (x = 0; x < width; x++) { |
|---|
| | 350 | o[x*4] = u[x/2]; |
|---|
| | 351 | o[x*4+1] = yc[x]; |
|---|
| | 352 | o[x*4+2] = v[x/2]; |
|---|
| | 353 | o[x*4+3] = a[x]; |
|---|
| | 354 | } |
|---|
| | 355 | |
|---|
| | 356 | o += outRB; |
|---|
| | 357 | yc += rY; |
|---|
| | 358 | a += rA; |
|---|
| | 359 | if (y & 1) { |
|---|
| | 360 | u += rU; |
|---|
| | 361 | v += rV; |
|---|
| | 362 | } |
|---|
| | 363 | } |
|---|
| | 364 | } |
|---|