Changeset 495
- Timestamp:
- 05/05/07 13:52:50 (3 years ago)
- Files:
-
- trunk/Categories.m (modified) (1 diff)
- trunk/UniversalDetector/universalchardet/src/nsCharSetProber.h (modified) (1 diff)
- trunk/UniversalDetector/universalchardet/src/nsSBCSGroupProber.cpp (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/Categories.m
r406 r495 88 88 NSData *data = [NSData dataWithContentsOfMappedFile:file]; 89 89 UniversalDetector *ud = [[UniversalDetector alloc] init]; 90 NSString *res; 91 CFStringEncoding enc; 90 NSString *res = nil; 91 NSStringEncoding enc; 92 float conf; 93 NSString *enc_str; 92 94 93 95 [ud analyzeData:data]; 94 96 95 97 enc = [ud encoding]; 98 conf = [ud confidence]; 99 enc_str = [ud MIMECharset]; 96 100 97 if ([ud confidence] < .7) 98 Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but not sure (confidence %f%%).\n",[[ud MIMECharset] UTF8String],[file UTF8String],[ud confidence]*100.); 99 101 if (conf < .5) { 102 if ([enc_str isEqualToString:@"windows-1251"]) { // this may or may not be a good idea... 103 enc = NSWindowsCP1250StringEncoding; // UD is bad at guessing latin2, so if it has a poor match for 1251 we change it to this 104 Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but confidence only %f%%. Trying windows-1250.\n",[enc_str UTF8String],[file UTF8String],conf*100.); 105 } else if (![enc_str isEqualToString:@"US-ASCII"]) Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but not sure (confidence %f%%).\n",[enc_str UTF8String],[file UTF8String],conf*100.); 106 } 107 100 108 res = [[[NSString alloc] initWithData:data encoding:enc] autorelease]; 101 109 102 if (!res) Codecprintf(NULL,"Failed to load file as guessed encoding %s.\n",[ [ud MIMECharset]UTF8String]);110 if (!res) Codecprintf(NULL,"Failed to load file as guessed encoding %s.\n",[enc_str UTF8String]); 103 111 [ud release]; 104 112 105 113 return res; 106 114 } trunk/UniversalDetector/universalchardet/src/nsCharSetProber.h
r406 r495 41 41 #include "nscore.h" 42 42 43 //#define DEBUG_chardet // Uncomment this for debug dump.43 #define DEBUG_chardet // Uncomment this for debug dump. 44 44 45 45 typedef enum { trunk/UniversalDetector/universalchardet/src/nsSBCSGroupProber.cpp
r406 r495 218 218 mProbers[i]->DumpStatus(); 219 219 } 220 printf(" SBCS Group found best match [%s] confidence %f.\r\n",220 if (mProbers[mBestGuess]) printf(" SBCS Group found best match [%s] confidence %f.\r\n", 221 221 mProbers[mBestGuess]->GetCharSetName(), cf); 222 222 }
