Changeset 279

Show
Ignore:
Timestamp:
01/10/07 15:10:37 (2 years ago)
Author:
astrange
Message:

SSE2 accelerated y420 -> y422 conversion on x86. (All Intel Macs support SSE2.)
Started a (currently non-functional) AltiVec? conversion too.
Split colorspace conversion into its own file.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • trunk/FFusionCodec.c

    r278 r279  
    3131#include <Carbon/Carbon.h> 
    3232#include <QuickTime/QuickTime.h> 
     33#include <Accelerate/Accelerate.h> 
    3334 
    3435#include "FFusionCodec.h" 
     
    3637#include "avcodec.h" 
    3738#include "Codecprintf.h" 
     39#include "ColorConversions.h" 
    3840 
    3941void inline swapFrame(AVFrame * *a, AVFrame * *b) 
     
    103105 
    104106static OSErr FFusionDecompress(AVCodecContext *context, UInt8 *dataPtr, ICMDataProcRecordPtr dataProc, long width, long height, AVFrame *picture, long length, int useFirstFrameHack, int failSilently); 
    105 static void FastY420(UInt8 *baseAddr, AVFrame *picture); 
    106 static void SlowY420(UInt8* baseAddr, int outRB, int width, int height, AVFrame * picture); 
    107 static void EndianSwapRow32(UInt32 *d, UInt32 *s, size_t n); 
    108 static void BGR24toRGB24(UInt8 *baseAddr, long rowBump, long width, long height, AVFrame *picture); 
    109107static int FFusionGetBuffer(AVCodecContext *s, AVFrame *pic); 
    110108static void FFusionReleaseBuffer(AVCodecContext *s, AVFrame *pic); 
     
    934932        else if (myDrp->pixelFormat == k32ARGBPixelFormat && glob->avContext->pix_fmt == PIX_FMT_RGB32) 
    935933        { 
    936                 Ptr dest = drp->baseAddr; 
    937                 Ptr src = (Ptr) picture->data[0]; 
    938                 for (i = 0; i < myDrp->height; i++) { 
    939 #ifdef __BIG_ENDIAN__ 
    940                         memcpy(dest, src, FFMIN(drp->rowBytes, picture->linesize[0])); 
    941 #else 
    942                         EndianSwapRow32(dest, src, myDrp->width); 
    943 #endif 
    944                         dest += drp->rowBytes; 
    945                         src += picture->linesize[0]; 
    946                 } 
     934                RGB32toRGB32((UInt8 *)drp->baseAddr, drp->rowBytes, myDrp->width, myDrp->height, picture); 
    947935        } 
    948936        else 
     
    13061294    return err; 
    13071295} 
    1308  
    1309 //----------------------------------------------------------------- 
    1310 // FastY420 
    1311 //----------------------------------------------------------------- 
    1312 // Returns y420 data directly to QuickTime which then converts 
    1313 // in RGB for display 
    1314 //----------------------------------------------------------------- 
    1315  
    1316 static void FastY420(UInt8 *baseAddr, AVFrame *picture) 
    1317 { 
    1318     PlanarPixmapInfoYUV420 *planar; 
    1319          
    1320         /*From Docs: PixMap baseAddr points to a big-endian PlanarPixmapInfoYUV420 struct; see ImageCodec.i. */ 
    1321     planar = (PlanarPixmapInfoYUV420 *) baseAddr; 
    1322      
    1323     // if ya can't set da poiners, set da offsets 
    1324     planar->componentInfoY.offset = EndianU32_NtoB(picture->data[0] - baseAddr); 
    1325     planar->componentInfoCb.offset =  EndianU32_NtoB(picture->data[1] - baseAddr); 
    1326     planar->componentInfoCr.offset =  EndianU32_NtoB(picture->data[2] - baseAddr); 
    1327      
    1328     // for the 16/32 add look at EDGE in mpegvideo.c 
    1329     planar->componentInfoY.rowBytes = EndianU32_NtoB(picture->linesize[0]); 
    1330     planar->componentInfoCb.rowBytes = EndianU32_NtoB(picture->linesize[1]); 
    1331     planar->componentInfoCr.rowBytes = EndianU32_NtoB(picture->linesize[2]); 
    1332 } 
    1333  
    1334 //----------------------------------------------------------------- 
    1335 // FFusionSlowDecompress 
    1336 //----------------------------------------------------------------- 
    1337 // We have to return 2yuv values because 
    1338 // QT version has no built-in y420 component. 
    1339 // Since we do the conversion ourselves it is not really optimized.... 
    1340 // The function should never be called since many people now 
    1341 // have a decent OS/QT version. 
    1342 //----------------------------------------------------------------- 
    1343  
    1344 #ifdef __BIG_ENDIAN__ 
    1345 //hand-unrolled code is a bad idea on modern CPUs. luckily, this does not run on modern CPUs, only G3s. 
    1346 //also, big-endian only 
    1347 static void  
    1348 SlowY420(UInt8* baseAddr, int outRB, int width, int height, AVFrame * picture) 
    1349 { 
    1350         int             y = height >> 1; 
    1351         int             halfWidth = width >> 1, halfHalfWidth = halfWidth >> 1; 
    1352         UInt8          *inY = picture->data[0], *inU = picture->data[1], *inV = picture->data[2]; 
    1353         int             rB = picture->linesize[0], rbU = picture->linesize[1], rbV = picture->linesize[2]; 
    1354          
    1355         while (y--) { 
    1356                 UInt32         *ldst = (UInt32 *) baseAddr, *ldstr2 = (UInt32 *) (baseAddr + outRB); 
    1357                 UInt32         *lsrc = (UInt32 *) inY, *lsrcr2 = (UInt32 *) (inY + rB); 
    1358                 UInt16         *sU = (UInt16 *) inU, *sV = (UInt16 *) inV; 
    1359                 ptrdiff_t               off; 
    1360                  
    1361                 for (off = 0; off < halfHalfWidth; off++) { 
    1362                         UInt16          chrU = sU[off], chrV = sV[off]; 
    1363                         UInt32          row1luma = lsrc[off], row2luma = lsrcr2[off]; 
    1364                         UInt32          chromas1 = (chrU & 0xff00) << 16 | (chrV & 0xff00), 
    1365                                 chromas2 = (chrU & 0xff) << 24 | (chrV & 0xff) << 8; 
    1366                         int             off2 = off * 2; 
    1367                          
    1368                         ldst[off2] = chromas1 | (row1luma & 0xff000000) >> 8 | (row1luma & 0xff0000) >> 16; 
    1369                         ldstr2[off2] = chromas1 | (row2luma & 0xff000000) >> 8 | (row2luma & 0xff0000) >> 16; 
    1370                         off2++; 
    1371                         ldst[off2] = chromas2 | (row1luma & 0xff00) << 8 | row1luma & 0xff; 
    1372                         ldstr2[off2] = chromas2 | (row2luma & 0xff00) << 8 | row2luma & 0xff; 
    1373                 } 
    1374                  
    1375                 if (halfWidth % 4) { 
    1376                         UInt16         *ssrc = (UInt16 *) inY, *ssrcr2 = (UInt16 *) (inY + rB); 
    1377                          
    1378                         ptrdiff_t       off = halfWidth - 2; 
    1379                         UInt32          chromas = inV[off] << 8 | (inU[off] << 24); 
    1380                         UInt16          row1luma = ssrc[off], row2luma = ssrcr2[off]; 
    1381                          
    1382                         ldst[off] = chromas | row1luma & 0xff | (row1luma & 0xff00) << 8; 
    1383                         ldstr2[off] = chromas | row2luma & 0xff | (row2luma & 0xff00) << 8; 
    1384                 } 
    1385                 inY += rB * 2; 
    1386                 inU += rbU; 
    1387                 inV += rbV; 
    1388                 baseAddr += outRB * 2; 
    1389         } 
    1390 } 
    1391 #else 
    1392 static void  
    1393 SlowY420(UInt8* o, int outRB, int width, int height, AVFrame * picture) 
    1394 { 
    1395         UInt8          *yc = picture->data[0], *u = picture->data[1], *v = picture->data[2]; 
    1396         int             rY = picture->linesize[0], rU = picture->linesize[1], rV = picture->linesize[2], y = 0, x, x2; 
    1397          
    1398         for (; y < height; y++) { 
    1399                 for (x = 0, x2 = 0; x < width; x += 2, x2 += 4) { 
    1400                         int             hx = x >> 1; 
    1401                         o[x2] = u[hx]; 
    1402                         o[x2 + 1] = yc[x]; 
    1403                         o[x2 + 2] = v[hx]; 
    1404                         o[x2 + 3] = yc[x + 1]; 
    1405                 } 
    1406                  
    1407                 o += outRB; 
    1408                 yc += rY; 
    1409                 if (y % 2) { 
    1410                         u += rU; 
    1411                         v += rV; 
    1412                 } 
    1413         } 
    1414 } 
    1415 #endif 
    1416  
    1417 static void BGR24toRGB24(UInt8 *baseAddr, long rowBump, long width, long height, AVFrame *picture) 
    1418 { 
    1419         unsigned int i, j; 
    1420         UInt8 *srcPtr = picture->data[0]; 
    1421  
    1422         for (i = 0; i < height; ++i) 
    1423         { 
    1424                 for (j = 0; j < width * 3; j += 3) 
    1425                 { 
    1426                         baseAddr[j] = srcPtr[j+2]; 
    1427                         baseAddr[j+1] = srcPtr[j+1]; 
    1428                         baseAddr[j+2] = srcPtr[j]; 
    1429                 } 
    1430                 baseAddr += rowBump; 
    1431                 srcPtr += picture->linesize[0]; 
    1432         } 
    1433 } 
    1434  
    1435 static void EndianSwapRow32(UInt32 *d, UInt32 *s, size_t n) 
    1436 { 
    1437         while (n--) {*d++ = EndianU32_NtoB(*s); s++;} 
    1438 } 
  • trunk/Perian.xcodeproj/project.pbxproj

    r276 r279  
    102102                11C85FE70A64315500DF3D73 /* Perian-Info.plist in Resources */ = {isa = PBXBuildFile; fileRef = 11A709DA0A3CFCB6002058D4 /* Perian-Info.plist */; }; 
    103103                11F28DAD0B52EB75000AF78C /* SUUpdateAlert.m in Sources */ = {isa = PBXBuildFile; fileRef = 11F28DAC0B52EB75000AF78C /* SUUpdateAlert.m */; }; 
     104                3D4A7A990B5533BC004C5D6A /* ColorConversions.c in Sources */ = {isa = PBXBuildFile; fileRef = 3D4A7A980B5533BC004C5D6A /* ColorConversions.c */; }; 
    104105                6116E5510B43C27B0020F1CE /* ACBaseCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6116E5370B43C27B0020F1CE /* ACBaseCodec.cpp */; }; 
    105106                6116E5520B43C27B0020F1CE /* ACCodec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6116E5390B43C27B0020F1CE /* ACCodec.cpp */; }; 
     
    416417                11F28DAB0B52EB75000AF78C /* SUUpdateAlert.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SUUpdateAlert.h; sourceTree = "<group>"; }; 
    417418                11F28DAC0B52EB75000AF78C /* SUUpdateAlert.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = SUUpdateAlert.m; sourceTree = "<group>"; }; 
     419                3D4A7A970B5533BC004C5D6A /* ColorConversions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ColorConversions.h; sourceTree = "<group>"; }; 
     420                3D4A7A980B5533BC004C5D6A /* ColorConversions.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ColorConversions.c; sourceTree = "<group>"; }; 
    418421                6116E5370B43C27B0020F1CE /* ACBaseCodec.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = ACBaseCodec.cpp; sourceTree = "<group>"; }; 
    419422                6116E5380B43C27B0020F1CE /* ACBaseCodec.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = ACBaseCodec.h; sourceTree = "<group>"; }; 
     
    591594                F560DF0203D622D001ABA332 /* FFusionCodec.r */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.rez; path = FFusionCodec.r; sourceTree = "<group>"; }; 
    592595                F57755F40B51EB1800C7D833 /* CoreFoundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = CoreFoundation.framework; path = /System/Library/Frameworks/CoreFoundation.framework; sourceTree = "<absolute>"; }; 
    593                 F5CFD1B40B50009000616865 /* PerianPanePListGenerator */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = PerianPanePListGenerator; sourceTree = BUILT_PRODUCTS_DIR; }; 
     596                F5CFD1B40B50009000616865 /* PerianPanePListGenerator */ = {isa = PBXFileReference; includeInIndex = 0; lastKnownFileType = "compiled.mach-o.executable"; path = PerianPanePListGenerator; sourceTree = BUILT_PRODUCTS_DIR; }; 
    594597                F5CFD1BC0B5000CE00616865 /* GBPerianPanePListGenerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GBPerianPanePListGenerator.h; sourceTree = "<group>"; }; 
    595598                F5CFD1BD0B5000CE00616865 /* GBPerianPanePListGenerator.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = GBPerianPanePListGenerator.m; sourceTree = "<group>"; }; 
     
    10511054                                F560DEFE03D61C4601ABA332 /* FFusionCodecDispatch.h */, 
    10521055                                F560DF0003D61D0101ABA332 /* FFusionCodec.c */, 
     1056                                3D4A7A970B5533BC004C5D6A /* ColorConversions.h */, 
     1057                                3D4A7A980B5533BC004C5D6A /* ColorConversions.c */, 
    10531058                        ); 
    10541059                        name = "Video Codec"; 
     
    10601065                                F5CFD2D60B5012A100616865 /* A52Codec.component */, 
    10611066                                F5CFD2D80B5012A100616865 /* AC3MovieImport.component */, 
    1062                                 F5CFD2DA0B5012A100616865 /* A52Preferences.app */, 
     1067                                F5CFD2DA0B5012A100616865 /* Preferences.app */, 
    10631068                        ); 
    10641069                        name = Products; 
     
    13001305                        sourceTree = BUILT_PRODUCTS_DIR; 
    13011306                }; 
    1302                 F5CFD2DA0B5012A100616865 /* A52Preferences.app */ = { 
     1307                F5CFD2DA0B5012A100616865 /* Preferences.app */ = { 
    13031308                        isa = PBXReferenceProxy; 
    13041309                        fileType = wrapper.application; 
    1305                         path = A52Preferences.app; 
     1310                        path = Preferences.app; 
    13061311                        remoteRef = F5CFD2D90B5012A100616865 /* PBXContainerItemProxy */; 
    13071312                        sourceTree = BUILT_PRODUCTS_DIR; 
     
    14691474                                61FD41330B4F6F0800BEEFEA /* MatroskaImportPrivate.cpp in Sources */, 
    14701475                                F52286260B50625E00F8CF7D /* bitstream_info.c in Sources */, 
     1476                                3D4A7A990B5533BC004C5D6A /* ColorConversions.c in Sources */, 
    14711477                        ); 
    14721478                        runOnlyForDeploymentPostprocessing = 0; 
     
    18031809                                FRAMEWORK_SEARCH_PATHS_QUOTED_1 = "\"$(SYSTEM_DEVELOPER_DIR)/SDKs/MacOSX10.4u.sdk/System/Library/Frameworks\""; 
    18041810                                FRAMEWORK_SEARCH_PATHS_QUOTED_2 = "\"$(SYSTEM_DEVELOPER_DIR)/SDKs/MacOSX10.4u.sdk/System/Library/Frameworks\""; 
     1811                                GCC_ALTIVEC_EXTENSIONS = YES; 
    18051812                                GCC_ENABLE_FIX_AND_CONTINUE = NO; 
    18061813                                GCC_GENERATE_DEBUGGING_SYMBOLS = NO; 
     
    19221929                        buildSettings = { 
    19231930                                DEBUG_INFORMATION_FORMAT = dwarf; 
     1931                                GCC_ALTIVEC_EXTENSIONS = YES; 
    19241932                        }; 
    19251933                        name = Development;