source: trunk/Subtitles/SubUtilities.m @ 1048

Revision 1048, 5.9 KB checked in by astrange, 5 years ago (diff)

Fix r1046 for GC.

Forgot that the NSData could be deallocated
while the data was in use. Requires a gross
hack to extend its live range on the stack.

Line 
1/*
2 * SubUtilities.m
3 * Created by Alexander Strange on 7/28/07.
4 *
5 * This file is part of Perian.
6 *
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
11 *
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 * Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22#import "SubUtilities.h"
23#import "UniversalDetector.h"
24#import "Codecprintf.h"
25
26NSArray *STSplitStringIgnoringWhitespace(NSString *str, NSString *split)
27{
28        NSArray *tmp = [str componentsSeparatedByString:split];
29        NSCharacterSet *wcs = [NSCharacterSet whitespaceCharacterSet];
30        size_t num = [tmp count], i;
31        NSString *values[num];
32       
33        [tmp getObjects:values];
34        for (i = 0; i < num; i++) values[i] = [values[i] stringByTrimmingCharactersInSet:wcs];
35       
36        return [NSArray arrayWithObjects:values count:num];
37}
38
39NSArray *STSplitStringWithCount(NSString *str, NSString *split, size_t count)
40{
41        NSMutableArray *ar = [NSMutableArray arrayWithCapacity:count];
42        NSScanner *sc = [NSScanner scannerWithString:str];
43        NSString *scv=nil;
44        [sc setCharactersToBeSkipped:nil];
45        [sc setCaseSensitive:TRUE];
46       
47        while (count != 1) {
48                count--;
49                [sc scanUpToString:split intoString:&scv];
50                [sc scanString:split intoString:nil];
51                if (!scv) scv = [NSString string];
52                [ar addObject:scv];
53                if ([sc isAtEnd]) break;
54                scv = nil;
55        }
56       
57        [sc scanUpToString:@"" intoString:&scv];
58        if (!scv) scv = [NSString string];
59        [ar addObject:scv];
60       
61        return ar;
62}
63
64NSMutableString *STStandardizeStringNewlines(NSString *str)
65{
66        if(str == nil)
67                return nil;
68        NSMutableString *ms = [NSMutableString stringWithString:str];
69        [ms replaceOccurrencesOfString:@"\r\n" withString:@"\n" options:0 range:NSMakeRange(0,[ms length])];
70        [ms replaceOccurrencesOfString:@"\r" withString:@"\n" options:0 range:NSMakeRange(0,[ms length])];
71        return ms;
72}
73
74void STSortMutableArrayStably(NSMutableArray *array, int (*compare)(const void *, const void *))
75{
76        int count = [array count];
77        id  objs[count];
78       
79        [array getObjects:objs];
80        mergesort(objs, count, sizeof(id), compare);
81        [array setArray:[NSArray arrayWithObjects:objs count:count]];
82}
83
84static const short frequencies[] = {
85        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87        674, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
88        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
89        0, 1026, 29, -1258, 539, -930, -652, -815, -487, -2526, -2161, 146, -956, -914, 1149, -102,
90        293, -2675, -923, -597, 339, 110, 247, 9, 0, 1024, 1239, 0, 0, 0, 0, 0,
91        0, 1980, 1472, 1733, -304, -4086, 273, 582, 333, 2479, 1193, 5014, -1039, 1964, -2025, 1083,
92        -154, -5000, -1725, -4843, -366, -1850, -191, 1356, -2262, 1648, 1475, 0, 0, 0, 0, 0,
93        0, 0, 0, 0, 0, -458, 0, 0, 0, 0, 300, 0, 0, 300, 601, 0,
94        0, 0, -2247, 0, 0, 0, 0, 0, 0, 0, 3667, 0, 0, 3491, 3567, 0,
95        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1993, 0, 0, 0, 0, 0,
96        0, 0, 0, 0, 0, 0, 1472, 0, 0, 0, 5000, 0, 601, 0, 1993, 0,
97        0, 1083, 0, 672, -458, 0, 0, -458, 1409, 0, 0, 0, 0, 0, 1645, 425,
98        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 601, -1123,
99        -1912, 4259, 2573, 8866, 55, 0, 0, -2247, -831, -3788, -3043, 0, 0, 3412, 2921, 1251,
100        0, 0, 1377, 520, 1344, 0, -1123, 0, 0, -1213, 2208, -458, -794, 2636, 3824, 0};
101
102BOOL STDifferentiateLatin12(const unsigned char *data, int length)
103{
104        // generated from french/german (latin1) and hungarian/romanian (latin2)
105       
106        int frcount = 0;
107       
108        while (length--) {
109                frcount += frequencies[*data++];
110        }
111       
112        return frcount <= 0;
113}
114
115NSString *STLoadFileWithUnknownEncoding(NSString *path)
116{
117        NSData *data = [NSData dataWithContentsOfMappedFile:path];
118        UniversalDetector *ud = [[UniversalDetector alloc] init];
119        NSString *res = nil;
120        NSStringEncoding enc;
121        float conf;
122        NSString *enc_str;
123        BOOL latin2;
124       
125        [ud analyzeData:data];
126       
127        enc = [ud encoding];
128        conf = [ud confidence];
129        enc_str = [ud MIMECharset];
130        latin2 = enc == NSWindowsCP1250StringEncoding;
131       
132        if (latin2) {
133                if (STDifferentiateLatin12([data bytes], [data length])) { // seems to actually be latin1
134                        enc = NSWindowsCP1252StringEncoding;
135                        enc_str = @"windows-1252";
136                }
137        }
138       
139        if (conf < .6 || latin2) {
140                Codecprintf(NULL,"Guessed encoding \"%s\" for \"%s\", but not sure (confidence %f%%).\n",[enc_str UTF8String],[path UTF8String],conf*100.);
141        }
142       
143        res = [[[NSString alloc] initWithData:data encoding:enc] autorelease];
144       
145        if (!res) {
146                if (latin2) {
147                        Codecprintf(NULL,"Encoding %s failed, retrying.\n",[enc_str UTF8String]);
148                        enc = (enc == NSWindowsCP1252StringEncoding) ? NSWindowsCP1250StringEncoding : NSWindowsCP1252StringEncoding;
149                        res = [[[NSString alloc] initWithData:data encoding:enc] autorelease];
150                        if (!res) Codecprintf(NULL,"Both of latin1/2 failed.\n",[enc_str UTF8String]);
151                } else Codecprintf(NULL,"Failed to load file as guessed encoding %s.\n",[enc_str UTF8String]);
152        }
153        [ud release];
154       
155        return res;
156}
157
158const unichar *STUnicodeForString(NSString *str, NSData **datap)
159{
160        const unichar *p = CFStringGetCharactersPtr((CFStringRef)str);
161       
162        *datap = nil;
163       
164        if (!p) {
165                NSData *data = [[str dataUsingEncoding:NSUnicodeStringEncoding] retain];
166               
167                p = [data bytes];
168               
169                //dataUsingEncoding: adds a BOM
170                //skip it so the string length will match the input string
171                if (*p == 0xfeff)
172                        p++;
173               
174                *datap = data;
175        }
176       
177        return p;
178}
179
180CFMutableStringRef CopyHomeDirectory()
181{
182        NSAutoreleasePool *pool = [[NSAutoreleasePool alloc] init];
183        NSString *home = NSHomeDirectory();
184        CFMutableStringRef mhome = CFStringCreateMutableCopy(NULL, 0, (CFStringRef)home);
185        [pool release];
186       
187        return mhome;
188}
Note: See TracBrowser for help on using the repository browser.