| 1 | // |
|---|
| 2 | // SubUtilities.m |
|---|
| 3 | // SSARender2 |
|---|
| 4 | // |
|---|
| 5 | // Created by Alexander Strange on 7/28/07. |
|---|
| 6 | // Copyright 2007 __MyCompanyName__. All rights reserved. |
|---|
| 7 | // |
|---|
| 8 | |
|---|
| 9 | #import "SubUtilities.h" |
|---|
| 10 | #import "UniversalDetector.h" |
|---|
| 11 | #import "Codecprintf.h" |
|---|
| 12 | |
|---|
| 13 | NSArray *STSplitStringIgnoringWhitespace(NSString *str, NSString *split) |
|---|
| 14 | { |
|---|
| 15 | NSArray *tmp = [str componentsSeparatedByString:split]; |
|---|
| 16 | NSCharacterSet *wcs = [NSCharacterSet whitespaceCharacterSet]; |
|---|
| 17 | size_t num = [tmp count], i; |
|---|
| 18 | NSString *values[num]; |
|---|
| 19 | |
|---|
| 20 | [tmp getObjects:values]; |
|---|
| 21 | for (i = 0; i < num; i++) values[i] = [values[i] stringByTrimmingCharactersInSet:wcs]; |
|---|
| 22 | |
|---|
| 23 | return [NSArray arrayWithObjects:values count:num]; |
|---|
| 24 | } |
|---|
| 25 | |
|---|
| 26 | NSArray *STSplitStringWithCount(NSString *str, NSString *split, size_t count) |
|---|
| 27 | { |
|---|
| 28 | NSMutableArray *ar = [NSMutableArray arrayWithCapacity:count]; |
|---|
| 29 | NSScanner *sc = [NSScanner scannerWithString:str]; |
|---|
| 30 | NSString *scv=nil; |
|---|
| 31 | [sc setCharactersToBeSkipped:nil]; |
|---|
| 32 | [sc setCaseSensitive:TRUE]; |
|---|
| 33 | |
|---|
| 34 | while (count != 1) { |
|---|
| 35 | count--; |
|---|
| 36 | [sc scanUpToString:split intoString:&scv]; |
|---|
| 37 | [sc scanString:split intoString:nil]; |
|---|
| 38 | if (!scv) scv = [NSString string]; |
|---|
| 39 | [ar addObject:scv]; |
|---|
| 40 | if ([sc isAtEnd]) break; |
|---|
| 41 | scv = nil; |
|---|
| 42 | } |
|---|
| 43 | |
|---|
| 44 | [sc scanUpToString:@"" intoString:&scv]; |
|---|
| 45 | if (!scv) scv = [NSString string]; |
|---|
| 46 | [ar addObject:scv]; |
|---|
| 47 | |
|---|
| 48 | return ar; |
|---|
| 49 | } |
|---|
| 50 | |
|---|
| 51 | NSMutableString *STStandardizeStringNewlines(NSString *str) |
|---|
| 52 | { |
|---|
| 53 | NSMutableString *ms = [NSMutableString stringWithString:str]; |
|---|
| 54 | [ms replaceOccurrencesOfString:@"\r\n" withString:@"\n" options:0 range:NSMakeRange(0,[ms length])]; |
|---|
| 55 | [ms replaceOccurrencesOfString:@"\r" withString:@"\n" options:0 range:NSMakeRange(0,[ms length])]; |
|---|
| 56 | return ms; |
|---|
| 57 | } |
|---|
| 58 | |
|---|
| 59 | void STSortMutableArrayStably(NSMutableArray *array, int (*compare)(const void *, const void *)) |
|---|
| 60 | { |
|---|
| 61 | int count = [array count]; |
|---|
| 62 | id objs[count]; |
|---|
| 63 | |
|---|
| 64 | [array getObjects:objs]; |
|---|
| 65 | mergesort(objs, count, sizeof(id), compare); |
|---|
| 66 | [array setArray:[NSArray arrayWithObjects:objs count:count]]; |
|---|
| 67 | } |
|---|
| 68 | |
|---|
| 69 | static const short frequencies[] = { |
|---|
| 70 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 71 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 72 | 674, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 73 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 74 | 0, 1026, 29, -1258, 539, -930, -652, -815, -487, -2526, -2161, 146, -956, -914, 1149, -102, |
|---|
| 75 | 293, -2675, -923, -597, 339, 110, 247, 9, 0, 1024, 1239, 0, 0, 0, 0, 0, |
|---|
| 76 | 0, 1980, 1472, 1733, -304, -4086, 273, 582, 333, 2479, 1193, 5014, -1039, 1964, -2025, 1083, |
|---|
| 77 | -154, -5000, -1725, -4843, -366, -1850, -191, 1356, -2262, 1648, 1475, 0, 0, 0, 0, 0, |
|---|
| 78 | 0, 0, 0, 0, 0, -458, 0, 0, 0, 0, 300, 0, 0, 300, 601, 0, |
|---|
| 79 | 0, 0, -2247, 0, 0, 0, 0, 0, 0, 0, 3667, 0, 0, 3491, 3567, 0, |
|---|
| 80 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1993, 0, 0, 0, 0, 0, |
|---|
| 81 | 0, 0, 0, 0, 0, 0, 1472, 0, 0, 0, 5000, 0, 601, 0, 1993, 0, |
|---|
| 82 | 0, 1083, 0, 672, -458, 0, 0, -458, 1409, 0, 0, 0, 0, 0, 1645, 425, |
|---|
| 83 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 601, -1123, |
|---|
| 84 | -1912, 4259, 2573, 8866, 55, 0, 0, -2247, -831, -3788, -3043, 0, 0, 3412, 2921, 1251, |
|---|
| 85 | 0, 0, 1377, 520, 1344, 0, -1123, 0, 0, -1213, 2208, -458, -794, 2636, 3824, 0}; |
|---|
| 86 | |
|---|
| 87 | static BOOL DifferentiateLatin12(const unsigned char *data, int length) |
|---|
| 88 | { |
|---|
| 89 | // generated from french/german (latin1) and hungarian/romanian (latin2) |
|---|
| 90 | |
|---|
| 91 | int frcount = 0; |
|---|
| 92 | |
|---|
| 93 | while (length--) { |
|---|
| 94 | frcount += frequencies[*data++]; |
|---|
| 95 | } |
|---|
| 96 | |
|---|
| 97 | return frcount <= 0; |
|---|
| 98 | } |
|---|
| 99 | |
|---|
| 100 | extern NSString *STLoadFileWithUnknownEncoding(NSString *path) |
|---|
| 101 | { |
|---|
| 102 | NSData *data = [NSData dataWithContentsOfMappedFile:path]; |
|---|
| 103 | UniversalDetector *ud = [[UniversalDetector alloc] init]; |
|---|
| 104 | NSString *res = nil; |
|---|
| 105 | NSStringEncoding enc; |
|---|
| 106 | float conf; |
|---|
| 107 | NSString *enc_str; |
|---|
| 108 | BOOL latin2; |
|---|
| 109 | |
|---|
| 110 | [ud analyzeData:data]; |
|---|
| 111 | |
|---|
| 112 | enc = [ud encoding]; |
|---|
| 113 | conf = [ud confidence]; |
|---|
| 114 | enc_str = [ud MIMECharset]; |
|---|
| 115 | latin2 = [enc_str isEqualToString:@"windows-1250"]; |
|---|
| 116 | |
|---|
| 117 | if (latin2) { |
|---|
| 118 | if (DifferentiateLatin12([data bytes], [data length])) { // seems to actually be latin1 |
|---|
| 119 | enc = NSWindowsCP1252StringEncoding; |
|---|
| 120 | enc_str = @"windows-1252"; |
|---|
| 121 | } |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | if (conf < .6 || latin2) { |
|---|
| 125 | Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but not sure (confidence %f%%).\n",[enc_str UTF8String],[path UTF8String],conf*100.); |
|---|
| 126 | } |
|---|
| 127 | |
|---|
| 128 | res = [[[NSString alloc] initWithData:data encoding:enc] autorelease]; |
|---|
| 129 | |
|---|
| 130 | if (!res) { |
|---|
| 131 | if (latin2) { |
|---|
| 132 | Codecprintf(NULL,"Encoding %s failed, retrying.\n",[enc_str UTF8String]); |
|---|
| 133 | enc = (enc == NSWindowsCP1252StringEncoding) ? NSWindowsCP1250StringEncoding : NSWindowsCP1252StringEncoding; |
|---|
| 134 | res = [[[NSString alloc] initWithData:data encoding:enc] autorelease]; |
|---|
| 135 | if (!res) Codecprintf(NULL,"Both of latin1/2 failed.\n",[enc_str UTF8String]); |
|---|
| 136 | } else Codecprintf(NULL,"Failed to load file as guessed encoding %s.\n",[enc_str UTF8String]); |
|---|
| 137 | } |
|---|
| 138 | [ud release]; |
|---|
| 139 | |
|---|
| 140 | return res; |
|---|
| 141 | } |
|---|
| 142 | |
|---|
| 143 | CFMutableStringRef CopyHomeDirectory() |
|---|
| 144 | { |
|---|
| 145 | NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; |
|---|
| 146 | NSString *home = NSHomeDirectory(); |
|---|
| 147 | CFMutableStringRef mhome = CFStringCreateMutableCopy(NULL, 0, (CFStringRef)home); |
|---|
| 148 | [pool release]; |
|---|
| 149 | |
|---|
| 150 | return mhome; |
|---|
| 151 | } |
|---|