| 1 |
// |
|---|
| 2 |
// SubUtilities.m |
|---|
| 3 |
// SSARender2 |
|---|
| 4 |
// |
|---|
| 5 |
// Created by Alexander Strange on 7/28/07. |
|---|
| 6 |
// Copyright 2007 __MyCompanyName__. All rights reserved. |
|---|
| 7 |
// |
|---|
| 8 |
|
|---|
| 9 |
#import "SubUtilities.h" |
|---|
| 10 |
#import "UniversalDetector.h" |
|---|
| 11 |
#import "Codecprintf.h" |
|---|
| 12 |
|
|---|
| 13 |
NSArray *STSplitStringIgnoringWhitespace(NSString *str, NSString *split) |
|---|
| 14 |
{ |
|---|
| 15 |
NSArray *tmp = [str componentsSeparatedByString:split]; |
|---|
| 16 |
NSCharacterSet *wcs = [NSCharacterSet whitespaceCharacterSet]; |
|---|
| 17 |
size_t num = [tmp count], i; |
|---|
| 18 |
NSString *values[num]; |
|---|
| 19 |
|
|---|
| 20 |
[tmp getObjects:values]; |
|---|
| 21 |
for (i = 0; i < num; i++) values[i] = [values[i] stringByTrimmingCharactersInSet:wcs]; |
|---|
| 22 |
|
|---|
| 23 |
return [NSArray arrayWithObjects:values count:num]; |
|---|
| 24 |
} |
|---|
| 25 |
|
|---|
| 26 |
NSArray *STSplitStringWithCount(NSString *str, NSString *split, size_t count) |
|---|
| 27 |
{ |
|---|
| 28 |
NSMutableArray *ar = [NSMutableArray arrayWithCapacity:count]; |
|---|
| 29 |
NSScanner *sc = [NSScanner scannerWithString:str]; |
|---|
| 30 |
NSString *scv=nil; |
|---|
| 31 |
[sc setCharactersToBeSkipped:nil]; |
|---|
| 32 |
[sc setCaseSensitive:TRUE]; |
|---|
| 33 |
|
|---|
| 34 |
while (count != 1) { |
|---|
| 35 |
count--; |
|---|
| 36 |
[sc scanUpToString:split intoString:&scv]; |
|---|
| 37 |
[sc scanString:split intoString:nil]; |
|---|
| 38 |
if (!scv) scv = [NSString string]; |
|---|
| 39 |
[ar addObject:scv]; |
|---|
| 40 |
if ([sc isAtEnd]) break; |
|---|
| 41 |
scv = nil; |
|---|
| 42 |
} |
|---|
| 43 |
|
|---|
| 44 |
[sc scanUpToString:@"" intoString:&scv]; |
|---|
| 45 |
if (!scv) scv = [NSString string]; |
|---|
| 46 |
[ar addObject:scv]; |
|---|
| 47 |
|
|---|
| 48 |
return ar; |
|---|
| 49 |
} |
|---|
| 50 |
|
|---|
| 51 |
NSMutableString *STStandardizeStringNewlines(NSString *str) |
|---|
| 52 |
{ |
|---|
| 53 |
NSMutableString *ms = [NSMutableString stringWithString:str]; |
|---|
| 54 |
[ms replaceOccurrencesOfString:@"\r\n" withString:@"\n" options:0 range:NSMakeRange(0,[ms length])]; |
|---|
| 55 |
[ms replaceOccurrencesOfString:@"\r" withString:@"\n" options:0 range:NSMakeRange(0,[ms length])]; |
|---|
| 56 |
return ms; |
|---|
| 57 |
} |
|---|
| 58 |
|
|---|
| 59 |
void STSortMutableArrayStably(NSMutableArray *array, int (*compare)(const void *, const void *)) |
|---|
| 60 |
{ |
|---|
| 61 |
int count = [array count]; |
|---|
| 62 |
id objs[count]; |
|---|
| 63 |
|
|---|
| 64 |
[array getObjects:objs]; |
|---|
| 65 |
mergesort(objs, count, sizeof(void*), compare); |
|---|
| 66 |
[array setArray:[NSArray arrayWithObjects:objs count:count]]; |
|---|
| 67 |
} |
|---|
| 68 |
|
|---|
| 69 |
static const short frequencies[] = { |
|---|
| 70 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 71 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 72 |
674, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 73 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
|---|
| 74 |
0, 1026, 29, -1258, 539, -930, -652, -815, -487, -2526, -2161, 146, -956, -914, 1149, -102, |
|---|
| 75 |
293, -2675, -923, -597, 339, 110, 247, 9, 0, 1024, 1239, 0, 0, 0, 0, 0, |
|---|
| 76 |
0, 1980, 1472, 1733, -304, -4086, 273, 582, 333, 2479, 1193, 5014, -1039, 1964, -2025, 1083, |
|---|
| 77 |
-154, -5000, -1725, -4843, -366, -1850, -191, 1356, -2262, 1648, 1475, 0, 0, 0, 0, 0, |
|---|
| 78 |
0, 0, 0, 0, 0, -458, 0, 0, 0, 0, 300, 0, 0, 300, 601, 0, |
|---|
| 79 |
0, 0, -2247, 0, 0, 0, 0, 0, 0, 0, 3667, 0, 0, 3491, 3567, 0, |
|---|
| 80 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1993, 0, 0, 0, 0, 0, |
|---|
| 81 |
0, 0, 0, 0, 0, 0, 1472, 0, 0, 0, 5000, 0, 601, 0, 1993, 0, |
|---|
| 82 |
0, 1083, 0, 672, -458, 0, 0, -458, 1409, 0, 0, 0, 0, 0, 1645, 425, |
|---|
| 83 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 601, -1123, |
|---|
| 84 |
-1912, 4259, 2573, 8866, 55, 0, 0, -2247, -831, -3788, -3043, 0, 0, 3412, 2921, 1251, |
|---|
| 85 |
0, 0, 1377, 520, 1344, 0, -1123, 0, 0, -1213, 2208, -458, -794, 2636, 3824, 0}; |
|---|
| 86 |
|
|---|
| 87 |
static BOOL DifferentiateLatin12(const unsigned char *data, int length) |
|---|
| 88 |
{ |
|---|
| 89 |
// generated from french/german (latin1) and hungarian/romanian (latin2) |
|---|
| 90 |
|
|---|
| 91 |
int frcount = 0; |
|---|
| 92 |
|
|---|
| 93 |
while (length--) { |
|---|
| 94 |
frcount += frequencies[*data++]; |
|---|
| 95 |
} |
|---|
| 96 |
|
|---|
| 97 |
return frcount <= 0; |
|---|
| 98 |
} |
|---|
| 99 |
|
|---|
| 100 |
extern NSString *STLoadFileWithUnknownEncoding(NSString *path) |
|---|
| 101 |
{ |
|---|
| 102 |
NSData *data = [NSData dataWithContentsOfMappedFile:path]; |
|---|
| 103 |
UniversalDetector *ud = [[UniversalDetector alloc] init]; |
|---|
| 104 |
NSString *res = nil; |
|---|
| 105 |
NSStringEncoding enc; |
|---|
| 106 |
float conf; |
|---|
| 107 |
NSString *enc_str; |
|---|
| 108 |
BOOL latin2; |
|---|
| 109 |
|
|---|
| 110 |
[ud analyzeData:data]; |
|---|
| 111 |
|
|---|
| 112 |
enc = [ud encoding]; |
|---|
| 113 |
conf = [ud confidence]; |
|---|
| 114 |
enc_str = [ud MIMECharset]; |
|---|
| 115 |
latin2 = [enc_str isEqualToString:@"windows-1250"]; |
|---|
| 116 |
|
|---|
| 117 |
if (latin2) { |
|---|
| 118 |
if (DifferentiateLatin12([data bytes], [data length])) { // seems to actually be latin1 |
|---|
| 119 |
enc = NSWindowsCP1252StringEncoding; |
|---|
| 120 |
enc_str = @"windows-1252"; |
|---|
| 121 |
} |
|---|
| 122 |
} |
|---|
| 123 |
|
|---|
| 124 |
if (conf < .6 || latin2) { |
|---|
| 125 |
Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but not sure (confidence %f%%).\n",[enc_str UTF8String],[path UTF8String],conf*100.); |
|---|
| 126 |
} |
|---|
| 127 |
|
|---|
| 128 |
res = [[[NSString alloc] initWithData:data encoding:enc] autorelease]; |
|---|
| 129 |
|
|---|
| 130 |
if (!res) { |
|---|
| 131 |
if (latin2) { |
|---|
| 132 |
Codecprintf(NULL,"Encoding %s failed, retrying.\n",[enc_str UTF8String]); |
|---|
| 133 |
enc = (enc == NSWindowsCP1252StringEncoding) ? NSWindowsCP1250StringEncoding : NSWindowsCP1252StringEncoding; |
|---|
| 134 |
res = [[[NSString alloc] initWithData:data encoding:enc] autorelease]; |
|---|
| 135 |
if (!res) Codecprintf(NULL,"Both of latin1/2 failed.\n",[enc_str UTF8String]); |
|---|
| 136 |
} else Codecprintf(NULL,"Failed to load file as guessed encoding %s.\n",[enc_str UTF8String]); |
|---|
| 137 |
} |
|---|
| 138 |
[ud release]; |
|---|
| 139 |
|
|---|
| 140 |
return res; |
|---|
| 141 |
} |
|---|
| 142 |
|
|---|
| 143 |
CFMutableStringRef CopyHomeDirectory() |
|---|
| 144 |
{ |
|---|
| 145 |
NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init]; |
|---|
| 146 |
NSString *home = NSHomeDirectory(); |
|---|
| 147 |
CFMutableStringRef mhome = CFStringCreateMutableCopy(NULL, 0, (CFStringRef)home); |
|---|
| 148 |
[pool release]; |
|---|
| 149 |
|
|---|
| 150 |
return mhome; |
|---|
| 151 |
} |
|---|