Changeset 406
- Timestamp:
- 04/11/07 15:25:20 (1 year ago)
- Files:
-
- trunk/Categories.h (modified) (1 diff)
- trunk/Categories.m (modified) (3 diffs)
- trunk/Perian.xcodeproj/project.pbxproj (modified) (9 diffs)
- trunk/SSADocument.m (modified) (3 diffs)
- trunk/SubImport.mm (modified) (1 diff)
- trunk/UniversalDetector (added)
- trunk/UniversalDetector/DetectorTest.m (added)
- trunk/UniversalDetector/English.lproj (added)
- trunk/UniversalDetector/English.lproj/InfoPlist.strings (added)
- trunk/UniversalDetector/Info.plist (added)
- trunk/UniversalDetector/UniversalDetector.h (added)
- trunk/UniversalDetector/UniversalDetector.mm (added)
- trunk/UniversalDetector/UniversalDetector.xcodeproj (added)
- trunk/UniversalDetector/UniversalDetector.xcodeproj/project.pbxproj (added)
- trunk/UniversalDetector/UniversalDetector_Prefix.pch (added)
- trunk/UniversalDetector/kludge (added)
- trunk/UniversalDetector/kludge/kludge.c (added)
- trunk/UniversalDetector/kludge/nsError.h (added)
- trunk/UniversalDetector/kludge/nscore.h (added)
- trunk/UniversalDetector/kludge/prcpucfg.h (added)
- trunk/UniversalDetector/kludge/prmem.h (added)
- trunk/UniversalDetector/kludge/protypes.h (added)
- trunk/UniversalDetector/kludge/prtypes.h (added)
- trunk/UniversalDetector/scan.pl (added)
- trunk/UniversalDetector/universalchardet (added)
- trunk/UniversalDetector/universalchardet/Makefile.in (added)
- trunk/UniversalDetector/universalchardet/doc (added)
- trunk/UniversalDetector/universalchardet/doc/ChardetInterface.htm (added)
- trunk/UniversalDetector/universalchardet/doc/UniversalCharsetDetection.doc (added)
- trunk/UniversalDetector/universalchardet/src (added)
- trunk/UniversalDetector/universalchardet/src/Big5Freq.tab (added)
- trunk/UniversalDetector/universalchardet/src/CharDistribution.cpp (added)
- trunk/UniversalDetector/universalchardet/src/CharDistribution.h (added)
- trunk/UniversalDetector/universalchardet/src/EUCKRFreq.tab (added)
- trunk/UniversalDetector/universalchardet/src/EUCTWFreq.tab (added)
- trunk/UniversalDetector/universalchardet/src/GB2312Freq.tab (added)
- trunk/UniversalDetector/universalchardet/src/JISFreq.tab (added)
- trunk/UniversalDetector/universalchardet/src/JpCntx.cpp (added)
- trunk/UniversalDetector/universalchardet/src/JpCntx.h (added)
- trunk/UniversalDetector/universalchardet/src/LangBulgarianModel.cpp (added)
- trunk/UniversalDetector/universalchardet/src/LangCyrillicModel.cpp (added)
- trunk/UniversalDetector/universalchardet/src/LangGreekModel.cpp (added)
- trunk/UniversalDetector/universalchardet/src/LangHebrewModel.cpp (added)
- trunk/UniversalDetector/universalchardet/src/LangHungarianModel.cpp (added)
- trunk/UniversalDetector/universalchardet/src/LangThaiModel.cpp (added)
- trunk/UniversalDetector/universalchardet/src/Makefile.in (added)
- trunk/UniversalDetector/universalchardet/src/nsBig5Prober.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsBig5Prober.h (added)
- trunk/UniversalDetector/universalchardet/src/nsCharSetProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsCharSetProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsCodingStateMachine.h (added)
- trunk/UniversalDetector/universalchardet/src/nsEUCJPProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsEUCJPProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsEUCKRProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsEUCKRProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsEUCTWProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsEUCTWProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsEscCharsetProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsEscCharsetProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsEscSM.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsGB2312Prober.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsGB2312Prober.h (added)
- trunk/UniversalDetector/universalchardet/src/nsHebrewProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsHebrewProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsLatin1Prober.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsLatin1Prober.h (added)
- trunk/UniversalDetector/universalchardet/src/nsMBCSGroupProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsMBCSGroupProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsMBCSSM.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsPkgInt.h (added)
- trunk/UniversalDetector/universalchardet/src/nsSBCSGroupProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsSBCSGroupProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsSBCharSetProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsSBCharSetProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsSJISProber.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsSJISProber.h (added)
- trunk/UniversalDetector/universalchardet/src/nsUTF8Prober.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsUTF8Prober.h (added)
- trunk/UniversalDetector/universalchardet/src/nsUdetXPCOMWrapper.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsUdetXPCOMWrapper.h (added)
- trunk/UniversalDetector/universalchardet/src/nsUniversalCharDetDll.h (added)
- trunk/UniversalDetector/universalchardet/src/nsUniversalCharDetModule.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsUniversalDetector.cpp (added)
- trunk/UniversalDetector/universalchardet/src/nsUniversalDetector.h (added)
- trunk/UniversalDetector/universalchardet/src/universalchardet.pkg (added)
- trunk/UniversalDetector/universalchardet/tests (added)
- trunk/UniversalDetector/universalchardet/tests/Makefile.in (added)
- trunk/UniversalDetector/universalchardet/tests/UniversalChardetTest.cpp (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/Categories.h
r335 r406 11 11 @interface NSCharacterSet (STAdditions) 12 12 + (NSCharacterSet *)newlineCharacterSet; 13 + (NSCharacterSet *)whitespaceAndBomCharacterSet; 13 14 @end 14 15 trunk/Categories.m
r394 r406 8 8 9 9 #import "Categories.h" 10 #import "UniversalDetector.h" 11 #import "Codecprintf.h" 10 12 11 13 @implementation NSCharacterSet(STUtilities) … … 14 16 const unichar chars[] = {'\r','\n',0x0085,0x2028,0x2029}; 15 17 return [NSCharacterSet characterSetWithCharactersInString:[NSString stringWithCharacters:chars length:5]]; 18 } 19 20 + (NSCharacterSet *)whitespaceAndBomCharacterSet 21 { 22 const unichar bom = 0xfeff; 23 NSMutableCharacterSet *cs = [[NSMutableCharacterSet alloc] init]; 24 25 [cs addCharactersInString:[NSString stringWithCharacters:&bom length:1]]; 26 27 [cs formUnionWithCharacterSet:[NSCharacterSet whitespaceCharacterSet]]; 28 29 return [cs autorelease]; 16 30 } 17 31 @end … … 73 87 { 74 88 NSData *data = [NSData dataWithContentsOfMappedFile:file]; 75 NSStringEncoding encodings[] = {NSUTF8StringEncoding, NSUnicodeStringEncoding, NSWindowsCP1252StringEncoding, NSWindowsCP1251StringEncoding};76 NSString * str = nil;77 int i;89 UniversalDetector *ud = [[UniversalDetector alloc] init]; 90 NSString *res; 91 CFStringEncoding enc; 78 92 79 for (i = 0; i < sizeof(encodings) / sizeof(NSStringEncoding); i++) { 80 str = [[NSString alloc] initWithData:data encoding:encodings[i]]; 93 [ud analyzeData:data]; 94 95 enc = [ud encoding]; 96 97 if ([ud confidence] < .7) 98 Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but not sure (confidence %f%%).\n",[[ud MIMECharset] UTF8String],[file UTF8String],[ud confidence]*100.); 81 99 82 if (str) return [str autorelease]; 83 } 100 res = [[[NSString alloc] initWithData:data encoding:enc] autorelease]; 84 101 85 NSLog(@"Perian: unable to determine character encoding of %@",file); 86 return nil; 102 if (!res) Codecprintf(NULL,"Failed to load file as guessed encoding %s.\n",[[ud MIMECharset] UTF8String]); 103 [ud release]; 104 105 return res; 87 106 } 88 107 @end trunk/Perian.xcodeproj/project.pbxproj
r377 r406 106 106 3D211A230B6B1A660051299D /* Categories.m in Sources */ = {isa = PBXBuildFile; fileRef = 3D211A220B6B1A660051299D /* Categories.m */; }; 107 107 3D211A300B6B1AD80051299D /* SSATagParsing.m.rl in Sources */ = {isa = PBXBuildFile; fileRef = 3D211A2F0B6B1AD80051299D /* SSATagParsing.m.rl */; }; 108 3D41BEB40BCD3B510069E7C2 /* libuniversaldetector.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 3D41BE9A0BCD3B330069E7C2 /* libuniversaldetector.a */; }; 108 109 3D4A7A990B5533BC004C5D6A /* ColorConversions.c in Sources */ = {isa = PBXBuildFile; fileRef = 3D4A7A980B5533BC004C5D6A /* ColorConversions.c */; }; 109 110 3DAD32DA0B6DB26100DA0A72 /* StdIOCallback.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 61CB114D0ACDF3C2007994BD /* StdIOCallback.cpp */; }; … … 270 271 remoteInfo = "ffmpeg Static Libraries"; 271 272 }; 273 3D41BE950BCD3B330069E7C2 /* PBXContainerItemProxy */ = { 274 isa = PBXContainerItemProxy; 275 containerPortal = 3D41BE8C0BCD3B330069E7C2 /* UniversalDetector.xcodeproj */; 276 proxyType = 2; 277 remoteGlobalIDString = 8DC2EF5B0486A6940098B216; 278 remoteInfo = UniversalDetector; 279 }; 280 3D41BE970BCD3B330069E7C2 /* PBXContainerItemProxy */ = { 281 isa = PBXContainerItemProxy; 282 containerPortal = 3D41BE8C0BCD3B330069E7C2 /* UniversalDetector.xcodeproj */; 283 proxyType = 2; 284 remoteGlobalIDString = 1B0DDCAB0A2D0AD10009B697; 285 remoteInfo = DetectorTest; 286 }; 287 3D41BE990BCD3B330069E7C2 /* PBXContainerItemProxy */ = { 288 isa = PBXContainerItemProxy; 289 containerPortal = 3D41BE8C0BCD3B330069E7C2 /* UniversalDetector.xcodeproj */; 290 proxyType = 2; 291 remoteGlobalIDString = 3DBF4F570BCD396800DA401A; 292 remoteInfo = universaldetector; 293 }; 272 294 61CB120F0ACE0FB1007994BD /* PBXContainerItemProxy */ = { 273 295 isa = PBXContainerItemProxy; … … 452 474 3D211A2E0B6B1AD80051299D /* SSATagParsing.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = SSATagParsing.h; sourceTree = "<group>"; }; 453 475 3D211A2F0B6B1AD80051299D /* SSATagParsing.m.rl */ = {isa = PBXFileReference; explicitFileType = sourcecode.c.objc; fileEncoding = 4; path = SSATagParsing.m.rl; sourceTree = "<group>"; }; 476 3D41BE8C0BCD3B330069E7C2 /* UniversalDetector.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = UniversalDetector.xcodeproj; path = UniversalDetector/UniversalDetector.xcodeproj; sourceTree = "<group>"; }; 477 3D41BEBC0BCD63510069E7C2 /* UniversalDetector.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = UniversalDetector.h; path = UniversalDetector/UniversalDetector.h; sourceTree = "<group>"; }; 454 478 3D4A7A970B5533BC004C5D6A /* ColorConversions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ColorConversions.h; sourceTree = "<group>"; }; 455 479 3D4A7A980B5533BC004C5D6A /* ColorConversions.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = ColorConversions.c; sourceTree = "<group>"; }; … … 663 687 8F483B5E0A6426C1002CCA73 /* AudioToolbox.framework in Frameworks */, 664 688 3D211A1D0B6B18880051299D /* Foundation.framework in Frameworks */, 689 3D41BEB40BCD3B510069E7C2 /* libuniversaldetector.a in Frameworks */, 665 690 ); 666 691 runOnlyForDeploymentPostprocessing = 0; … … 778 803 3D211A210B6B1A660051299D /* Categories.h */, 779 804 3D211A220B6B1A660051299D /* Categories.m */, 805 3D41BEBC0BCD63510069E7C2 /* UniversalDetector.h */, 780 806 ); 781 807 name = Source; … … 829 855 sourceTree = "<group>"; 830 856 }; 857 3D41BE8D0BCD3B330069E7C2 /* Products */ = { 858 isa = PBXGroup; 859 children = ( 860 3D41BE960BCD3B330069E7C2 /* UniversalDetector.framework */, 861 3D41BE980BCD3B330069E7C2 /* DetectorTest */, 862 3D41BE9A0BCD3B330069E7C2 /* libuniversaldetector.a */, 863 ); 864 name = Products; 865 sourceTree = "<group>"; 866 }; 831 867 6116E5360B43C27B0020F1CE /* ACPublic */ = { 832 868 isa = PBXGroup; … … 877 913 isa = PBXGroup; 878 914 children = ( 915 3D41BE8C0BCD3B330069E7C2 /* UniversalDetector.xcodeproj */, 879 916 3D211A2E0B6B1AD80051299D /* SSATagParsing.h */, 880 917 3D211A2F0B6B1AD80051299D /* SSATagParsing.m.rl */, … … 1323 1360 ProjectRef = F5CFD2C90B5012A100616865 /* A52Codec.xcodeproj */; 1324 1361 }, 1362 { 1363 ProductGroup = 3D41BE8D0BCD3B330069E7C2 /* Products */; 1364 ProjectRef = 3D41BE8C0BCD3B330069E7C2 /* UniversalDetector.xcodeproj */; 1365 }, 1325 1366 ); 1326 1367 targets = ( … … 1341 1382 1342 1383 /* Begin PBXReferenceProxy section */ 1384 3D41BE960BCD3B330069E7C2 /* UniversalDetector.framework */ = { 1385 isa = PBXReferenceProxy; 1386 fileType = wrapper.framework; 1387 path = UniversalDetector.framework; 1388 remoteRef = 3D41BE950BCD3B330069E7C2 /* PBXContainerItemProxy */; 1389 sourceTree = BUILT_PRODUCTS_DIR; 1390 }; 1391 3D41BE980BCD3B330069E7C2 /* DetectorTest */ = { 1392 isa = PBXReferenceProxy; 1393 fileType = "compiled.mach-o.executable"; 1394 path = DetectorTest; 1395 remoteRef = 3D41BE970BCD3B330069E7C2 /* PBXContainerItemProxy */; 1396 sourceTree = BUILT_PRODUCTS_DIR; 1397 }; 1398 3D41BE9A0BCD3B330069E7C2 /* libuniversaldetector.a */ = { 1399 isa = PBXReferenceProxy; 1400 fileType = archive.ar; 1401 path = libuniversaldetector.a; 1402 remoteRef = 3D41BE990BCD3B330069E7C2 /* PBXContainerItemProxy */; 1403 sourceTree = BUILT_PRODUCTS_DIR; 1404 }; 1343 1405 F5CFD2D60B5012A100616865 /* A52Codec.component */ = { 1344 1406 isa = PBXReferenceProxy; trunk/SSADocument.m
r394 r406 323 323 NSMutableDictionary *headers, *styleDict; 324 324 NSMutableArray *doclines; 325 NSCharacterSet *ws = [NSCharacterSet whitespace CharacterSet];325 NSCharacterSet *ws = [NSCharacterSet whitespaceAndBomCharacterSet]; 326 326 unichar cai; 327 327 int formatc; … … 332 332 doclines = [NSMutableArray array]; 333 333 334 if (![(NSString*)[lenum nextObject] isEqualToString:@"[Script Info]"]) return -1; 334 ns = [(NSString*)[lenum nextObject] stringByTrimmingCharactersInSet:ws]; 335 if (![ns isEqualToString:@"[Script Info]"]) return -1; 335 336 while (1) { 336 337 ns = (NSString*)[lenum nextObject]; … … 508 509 509 510 FSRefMakePath(theDirectory, path, PATH_MAX); 510 nspath = [ NSString stringWithUTF8String:(char*)path];511 nspath = [[NSString stringWithUTF8String:(char*)path] stringByAppendingPathComponent:(NSString*)filename]; 511 512 free(path); 512 513 513 if ([ssa loadFile: [nspath stringByAppendingPathComponent:(NSString*)filename]]) {514 NSLog(@"Perian: unable to load SSA file %@",nspath);514 if ([ssa loadFile:nspath]) { 515 Codecprintf(NULL,"Unable to load SSA file \"%s\"\n",[nspath UTF8String]); 515 516 err = -1; 516 517 goto bail; trunk/SubImport.mm
r394 r406 188 188 NSString *l = [[NSString alloc] initWithBytes:subTextOffset length:subOffset - subTextOffset encoding:NSUTF8StringEncoding]; 189 189 sl = [[SubLine alloc] initWithLine:l start:startTime end:endTime]; 190 191 190 [l autorelease]; 192 191 [sl autorelease];
