File: | CFString_BDSKExtensions.m |
Location: | line 333, column 5 |
Description: | dead store |
1 | // |
2 | // CFString_BDSKExtensions.m |
3 | // Bibdesk |
4 | // |
5 | // Created by Adam Maxwell on 01/02/06. |
6 | /* |
7 | This software is Copyright (c) 2006-2008 |
8 | Adam Maxwell. All rights reserved. |
9 | |
10 | Redistribution and use in source and binary forms, with or without |
11 | modification, are permitted provided that the following conditions |
12 | are met: |
13 | |
14 | - Redistributions of source code must retain the above copyright |
15 | notice, this list of conditions and the following disclaimer. |
16 | |
17 | - Redistributions in binary form must reproduce the above copyright |
18 | notice, this list of conditions and the following disclaimer in |
19 | the documentation and/or other materials provided with the |
20 | distribution. |
21 | |
22 | - Neither the name of Adam Maxwell nor the names of any |
23 | contributors may be used to endorse or promote products derived |
24 | from this software without specific prior written permission. |
25 | |
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
27 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
28 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
29 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
30 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
31 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
32 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
33 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
34 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
35 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
36 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
37 | */ |
38 | |
39 | #import "CFString_BDSKExtensions.h" |
40 | #import <OmniFoundation/OFPreference.h> |
41 | |
42 | // This object is a cache for our stop words, so we don't have to hit user defaults every time __BDDeleteArticlesForSorting() is called (which is fairly often). |
43 | |
44 | typedef struct __BDSKStopWordCache { |
45 | CFArrayRef stopWords; |
46 | CFIndex numberOfWords; |
47 | } _BDSKStopWordCache; |
48 | |
49 | static _BDSKStopWordCache *stopWordCache = NULL( ( void * ) 0 ); |
50 | |
51 | static void |
52 | stopWordNotificationCallback(CFNotificationCenterRef center, void *observer, CFStringRef name, const void *object, CFDictionaryRef userInfo) |
53 | { |
54 | if (stopWordCache->stopWords) |
55 | CFRelease(stopWordCache->stopWords); |
56 | stopWordCache->stopWords = CFPreferencesCopyAppValue((CFStringRef)BDSKIgnoredSortTermsKey, kCFPreferencesCurrentApplication); |
57 | if (stopWordCache->stopWords) |
58 | stopWordCache->numberOfWords = CFArrayGetCount(stopWordCache->stopWords); |
59 | else |
60 | stopWordCache->numberOfWords = 0; |
61 | } |
62 | |
63 | __attribute__((constructor)) |
64 | static void initializeStopwordCache(void) |
65 | { |
66 | stopWordCache = NSZoneMalloc(NULL( ( void * ) 0 ), sizeof(_BDSKStopWordCache)); |
67 | stopWordCache->stopWords = NULL( ( void * ) 0 ); |
68 | stopWordCache->numberOfWords = 0; |
69 | stopWordNotificationCallback(NULL( ( void * ) 0 ), NULL( ( void * ) 0 ), NULL( ( void * ) 0 ), NULL( ( void * ) 0 ), NULL( ( void * ) 0 )); |
70 | CFNotificationCenterAddObserver(CFNotificationCenterGetLocalCenter(), stopWordCache, stopWordNotificationCallback, CFSTR( ( CFStringRef ) __builtin___CFStringMakeConstantString ( "" "BDSKIgnoredSortTermsChangedNotification" "" ) )("BDSKIgnoredSortTermsChangedNotification"), NULL( ( void * ) 0 ), CFNotificationSuspensionBehaviorDeliverImmediately); |
71 | } |
72 | |
73 | __attribute__((destructor)) |
74 | static void destroyStopwordCache(void) |
75 | { |
76 | CFNotificationCenterRemoveObserver(CFNotificationCenterGetLocalCenter(), stopWordCache, CFSTR( ( CFStringRef ) __builtin___CFStringMakeConstantString ( "" "BDSKIgnoredSortTermsChangedNotification" "" ) )("BDSKIgnoredSortTermsChangedNotification"), NULL( ( void * ) 0 )); |
77 | if (stopWordCache->stopWords) CFRelease(stopWordCache->stopWords); |
78 | NSZoneFree(NULL( ( void * ) 0 ), stopWordCache); |
79 | } |
80 | |
81 | static inline CFArrayRef __BDSKGetStopwords(void) { return stopWordCache->stopWords; } |
82 | static inline CFIndex __BDSKGetStopwordCount(void) { return stopWordCache->numberOfWords; } |
83 | |
84 | #pragma mark - |
85 | |
86 | #define STACK_BUFFER_SIZE 256 |
87 | |
88 | static CFCharacterSetRef whitespaceCharacterSet = NULL( ( void * ) 0 ); |
89 | static CFCharacterSetRef whitespaceAndNewlineCharacterSet = NULL( ( void * ) 0 ); |
90 | static CFCharacterSetRef punctuationCharacterSet = NULL( ( void * ) 0 ); |
91 | |
92 | __attribute__((constructor)) |
93 | static void initializeStaticCharacterSets(void) |
94 | { |
95 | whitespaceCharacterSet = CFRetain(CFCharacterSetGetPredefined(kCFCharacterSetWhitespace)); |
96 | whitespaceAndNewlineCharacterSet = CFRetain(CFCharacterSetGetPredefined(kCFCharacterSetWhitespaceAndNewline)); |
97 | punctuationCharacterSet = CFRetain(CFCharacterSetGetPredefined(kCFCharacterSetPunctuation)); |
98 | } |
99 | |
100 | static inline |
101 | BOOL __BDCharacterIsWhitespace(UniChar c) |
102 | { |
103 | // minor optimization: check for an ASCII character, since those are most common in TeX |
104 | return ( (c <= 0x007E && c >= 0x0021) ? NO( BOOL ) 0 : CFCharacterSetIsCharacterMember(whitespaceCharacterSet, c) ); |
105 | } |
106 | |
107 | static inline |
108 | BOOL __BDCharacterIsWhitespaceOrNewline(UniChar c) |
109 | { |
110 | // minor optimization: check for an ASCII character, since those are most common in TeX |
111 | return ( (c <= 0x007E && c >= 0x0021) ? NO( BOOL ) 0 : CFCharacterSetIsCharacterMember(whitespaceAndNewlineCharacterSet, c) ); |
112 | } |
113 | |
114 | static inline |
115 | BOOL __BDCharacterIsPunctuation(UniChar c) |
116 | { |
117 | return ( CFCharacterSetIsCharacterMember(punctuationCharacterSet, c) ); |
118 | } |
119 | |
120 | static inline |
121 | Boolean __BDStringContainsWhitespace(CFStringRef string, CFIndex length) |
122 | { |
123 | const UniChar *ptr = CFStringGetCharactersPtr(string); |
124 | if(ptr != NULL( ( void * ) 0 )){ |
125 | while(length--) |
126 | if(__BDCharacterIsWhitespace(ptr[length])) |
127 | return TRUE1; |
128 | } else { |
129 | CFStringInlineBuffer inlineBuffer; |
130 | CFStringInitInlineBuffer(string, &inlineBuffer, CFRangeMake(0, length)); |
131 | |
132 | while(length--) |
133 | if(__BDCharacterIsWhitespace(CFStringGetCharacterFromInlineBuffer(&inlineBuffer, length))) |
134 | return TRUE1; |
135 | } |
136 | |
137 | return FALSE0; |
138 | } |
139 | |
140 | static inline |
141 | CFStringRef __BDStringCreateByCollapsingAndTrimmingWhitespace(CFAllocatorRef allocator, CFStringRef aString) |
142 | { |
143 | |
144 | CFIndex length = CFStringGetLength(aString); |
145 | |
146 | if(length == 0) |
147 | return CFRetain(CFSTR( ( CFStringRef ) __builtin___CFStringMakeConstantString ( "" "" "" ) )("")); |
148 | |
149 | // improves efficiency somewhat when adding autocomplete strings, since we can completely avoid allocation |
150 | if(__BDStringContainsWhitespace(aString, length) == FALSE0) |
151 | return CFRetain(aString); |
152 | |
153 | // set up the buffer to fetch the characters |
154 | CFIndex cnt = 0; |
155 | CFStringInlineBuffer inlineBuffer; |
156 | CFStringInitInlineBuffer(aString, &inlineBuffer, CFRangeMake(0, length)); |
157 | UniChar ch; |
158 | UniChar *buffer, stackBuffer[STACK_BUFFER_SIZE256]; |
159 | CFStringRef retStr; |
160 | allocator = (allocator == NULL( ( void * ) 0 )) ? CFGetAllocator(aString) : allocator; |
161 | |
162 | if(length >= STACK_BUFFER_SIZE256) { |
163 | buffer = (UniChar *)CFAllocatorAllocate(allocator, (length + 1) * sizeof(UniChar), 0); |
164 | } else { |
165 | buffer = stackBuffer; |
166 | } |
167 | |
168 | NSCAssert1do { if ( ! ( ( buffer != ( ( void * ) 0 ) ) ) ) { [ [ NSAssertionHandler currentHandler ] handleFailureInFunction : [ NSString stringWithUTF8String : __PRETTY_FUNCTION__ ] file : [ NSString stringWithUTF8String : "/Volumes/Local/Users/amaxwell/build/bibdesk-clean/CFString_BDSKExtensions.m" ] lineNumber : 168 description : ( ( @ "failed to allocate memory for string of length %d" ) ) , ( ( length ) ) , ( 0 ) , ( 0 ) , ( 0 ) , ( 0 ) ] ; } } while ( 0 )(buffer != NULL, @"failed to allocate memory for string of length %d", length); |
169 | |
170 | BOOL isFirst = NO( BOOL ) 0; |
171 | int bufCnt = 0; |
172 | for(cnt = 0; cnt < length; cnt++){ |
173 | ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); |
174 | if(!__BDCharacterIsWhitespace(ch)){ |
175 | isFirst = YES( BOOL ) 1; |
176 | buffer[bufCnt++] = ch; // not whitespace, so we want to keep it |
177 | } else { |
178 | if(isFirst){ |
179 | buffer[bufCnt++] = ' '; // if it's the first whitespace, we add a single space |
180 | isFirst = NO( BOOL ) 0; |
181 | } |
182 | } |
183 | } |
184 | |
185 | if(buffer[(bufCnt-1)] == ' ') // we've collapsed any trailing whitespace, so disregard it |
186 | bufCnt--; |
187 | |
188 | retStr = CFStringCreateWithCharacters(allocator, buffer, bufCnt); |
189 | if(buffer != stackBuffer) CFAllocatorDeallocate(allocator, buffer); |
190 | return retStr; |
191 | } |
192 | |
193 | static inline |
194 | Boolean __BDStringContainsWhitespaceOrNewline(CFStringRef string, CFIndex length) |
195 | { |
196 | const UniChar *ptr = CFStringGetCharactersPtr(string); |
197 | if(ptr != NULL( ( void * ) 0 )){ |
198 | while(length--) |
199 | if(__BDCharacterIsWhitespaceOrNewline(ptr[length])) |
200 | return TRUE1; |
201 | } else { |
202 | CFStringInlineBuffer inlineBuffer; |
203 | CFStringInitInlineBuffer(string, &inlineBuffer, CFRangeMake(0, length)); |
204 | |
205 | while(length--) |
206 | if(__BDCharacterIsWhitespaceOrNewline(CFStringGetCharacterFromInlineBuffer(&inlineBuffer, length))) |
207 | return TRUE1; |
208 | } |
209 | |
210 | return FALSE0; |
211 | } |
212 | |
213 | static inline |
214 | CFStringRef __BDStringCreateByCollapsingAndTrimmingWhitespaceAndNewlines(CFAllocatorRef allocator, CFStringRef aString) |
215 | { |
216 | |
217 | CFIndex length = CFStringGetLength(aString); |
218 | |
219 | if(length == 0) |
220 | return CFRetain(CFSTR( ( CFStringRef ) __builtin___CFStringMakeConstantString ( "" "" "" ) )("")); |
221 | |
222 | // improves efficiency somewhat when adding autocomplete strings, since we can completely avoid allocation |
223 | if(__BDStringContainsWhitespaceOrNewline(aString, length) == FALSE0) |
224 | return CFRetain(aString); |
225 | |
226 | // set up the buffer to fetch the characters |
227 | CFIndex cnt = 0; |
228 | CFStringInlineBuffer inlineBuffer; |
229 | CFStringInitInlineBuffer(aString, &inlineBuffer, CFRangeMake(0, length)); |
230 | UniChar ch; |
231 | UniChar *buffer, stackBuffer[STACK_BUFFER_SIZE256]; |
232 | CFStringRef retStr; |
233 | |
234 | allocator = (allocator == NULL( ( void * ) 0 )) ? CFGetAllocator(aString) : allocator; |
235 | |
236 | if(length >= STACK_BUFFER_SIZE256) { |
237 | buffer = (UniChar *)CFAllocatorAllocate(allocator, length * sizeof(UniChar), 0); |
238 | } else { |
239 | buffer = stackBuffer; |
240 | } |
241 | |
242 | NSCAssert1do { if ( ! ( ( buffer != ( ( void * ) 0 ) ) ) ) { [ [ NSAssertionHandler currentHandler ] handleFailureInFunction : [ NSString stringWithUTF8String : __PRETTY_FUNCTION__ ] file : [ NSString stringWithUTF8String : "/Volumes/Local/Users/amaxwell/build/bibdesk-clean/CFString_BDSKExtensions.m" ] lineNumber : 242 description : ( ( @ "failed to allocate memory for string of length %d" ) ) , ( ( length ) ) , ( 0 ) , ( 0 ) , ( 0 ) , ( 0 ) ] ; } } while ( 0 )(buffer != NULL, @"failed to allocate memory for string of length %d", length); |
243 | |
244 | BOOL isFirst = NO( BOOL ) 0; |
245 | int bufCnt = 0; |
246 | for(cnt = 0; cnt < length; cnt++){ |
247 | ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); |
248 | if(!__BDCharacterIsWhitespaceOrNewline(ch)){ |
249 | isFirst = YES( BOOL ) 1; |
250 | buffer[bufCnt++] = ch; // not whitespace, so we want to keep it |
251 | } else { |
252 | if(isFirst){ |
253 | buffer[bufCnt++] = ' '; // if it's the first whitespace, we add a single space |
254 | isFirst = NO( BOOL ) 0; |
255 | } |
256 | } |
257 | } |
258 | |
259 | if(buffer[(bufCnt-1)] == ' ') // we've collapsed any trailing whitespace, so disregard it |
260 | bufCnt--; |
261 | |
262 | retStr = CFStringCreateWithCharacters(allocator, buffer, bufCnt); |
263 | if(buffer != stackBuffer) CFAllocatorDeallocate(allocator, buffer); |
264 | return retStr; |
265 | } |
266 | |
267 | static inline Boolean |
268 | __BDShouldRemoveUniChar(UniChar c){ return (c == '`' || c == '$' || c == '\\' || __BDCharacterIsPunctuation(c)); } |
269 | |
270 | // private function for removing some tex special characters from a string |
271 | // (only those I consider relevant to sorting) |
272 | static inline |
273 | void __BDDeleteTeXCharactersForSorting(CFMutableStringRef texString) |
274 | { |
275 | if(BDIsEmptyString(texString)) |
276 | return; |
277 | |
278 | CFAllocatorRef allocator = CFGetAllocator(texString); |
279 | |
280 | CFStringInlineBuffer inlineBuffer; |
281 | CFIndex length = CFStringGetLength(texString); |
282 | CFIndex cnt = 0; |
283 | |
284 | // create an immutable copy to use with the inline buffer |
285 | CFStringRef myCopy = CFStringCreateCopy(allocator, texString); |
286 | CFStringInitInlineBuffer(myCopy, &inlineBuffer, CFRangeMake(0, length)); |
287 | UniChar ch; |
288 | |
289 | // delete the {`$\\( characters, since they're irrelevant to sorting, and typically |
290 | // appear at the beginning of a word |
291 | CFIndex delCnt = 0; |
292 | while(cnt < length){ |
293 | ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); |
294 | if(__BDShouldRemoveUniChar(ch)){ |
295 | // remove from the mutable string; we have to keep track of our index in the copy and the original |
296 | CFStringDelete(texString, CFRangeMake(delCnt, 1)); |
297 | } else { |
298 | delCnt++; |
299 | } |
300 | cnt++; |
301 | } |
302 | CFRelease(myCopy); // dispose of our temporary copy |
303 | } |
304 | |
305 | static inline |
306 | void __BDDeleteArticlesForSorting(CFMutableStringRef mutableString) |
307 | { |
308 | if(mutableString == nil( ( void * ) 0 )) |
309 | return; |
310 | |
311 | CFIndex count = __BDSKGetStopwordCount(); |
312 | if(!count) return; |
313 | |
314 | // remove certain terms for sorting, according to preferences |
315 | // each one is typically an article, and we only look |
316 | // for these at the beginning of a string |
317 | CFArrayRef articlesToRemove = __BDSKGetStopwords(); |
318 | |
319 | // get the max string length of any of the strings in the plist; we don't want to search any farther than necessary |
320 | CFIndex maxRemoveLength = 0; |
321 | CFIndex idx = count; |
322 | while(idx--) |
323 | maxRemoveLength = MAX( { __typeof__ ( CFStringGetLength ( CFArrayGetValueAtIndex ( articlesToRemove , idx ) ) ) __a = ( CFStringGetLength ( CFArrayGetValueAtIndex ( articlesToRemove , idx ) ) ) ; __typeof__ ( maxRemoveLength ) __b = ( maxRemoveLength ) ; __a < __b ? __b : __a ; } )(CFStringGetLength(CFArrayGetValueAtIndex(articlesToRemove, idx)), maxRemoveLength); |
324 | |
325 | idx = count; |
326 | CFRange searchRange, articleRange; |
327 | Boolean found; |
328 | CFIndex start = 0, length = CFStringGetLength(mutableString); |
329 | |
330 | while (start < length && __BDShouldRemoveUniChar(CFStringGetCharacterAtIndex(mutableString, start))) |
331 | start++; |
332 | |
Value stored to 'searchRange' is never read | |
333 | searchRange = CFRangeMake(start, MIN( { __typeof__ ( length - start ) __a = ( length - start ) ; __typeof__ ( maxRemoveLength ) __b = ( maxRemoveLength ) ; __a < __b ? __a : __b ; } )(length - start, maxRemoveLength)); |
334 | |
335 | while(idx--){ |
336 | found = CFStringFindWithOptions(mutableString, CFArrayGetValueAtIndex(articlesToRemove, idx), CFRangeMake(0, MIN( { __typeof__ ( length ) __a = ( length ) ; __typeof__ ( maxRemoveLength ) __b = ( maxRemoveLength ) ; __a < __b ? __a : __b ; } )(length, maxRemoveLength)), kCFCompareAnchored | kCFCompareCaseInsensitive, &articleRange); |
337 | |
338 | // make sure the next character is whitespace before deleting, after checking bounds |
339 | if(found && length > articleRange.length && |
340 | (__BDCharacterIsWhitespace(CFStringGetCharacterAtIndex(mutableString, articleRange.length)) || |
341 | __BDCharacterIsPunctuation(CFStringGetCharacterAtIndex(mutableString, articleRange.length)))) { |
342 | articleRange.length++; |
343 | CFStringDelete(mutableString, articleRange); |
344 | break; |
345 | } |
346 | } |
347 | } |
348 | |
349 | static inline |
350 | void __BDDeleteTeXCommandsForSorting(CFMutableStringRef mutableString) |
351 | { |
352 | // this will go into an endless loop if the string is nil, but /only/ if the function is declared inline |
353 | if(mutableString == nil( ( void * ) 0 )) |
354 | return; |
355 | |
356 | NSRange searchRange = NSMakeRange(0, CFStringGetLength(mutableString)); |
357 | NSRange cmdRange; |
358 | unsigned startLoc; |
359 | |
360 | // This will find and remove the commands such as \textit{some word} that can confuse the sort order; |
361 | // unfortunately, we can't remove things like {\textit some word}, since it could also be something |
362 | // like {\LaTeX is great}, so this is a compromise |
363 | while( (cmdRange = [(NSMutableString *)mutableString rangeOfTeXCommandInRange:searchRange]).location != NSNotFound){ |
364 | // delete the command |
365 | [(NSMutableString *)mutableString deleteCharactersInRange:cmdRange]; |
366 | startLoc = cmdRange.location; |
367 | searchRange.location = startLoc; |
368 | searchRange.length = [(NSMutableString *)mutableString length] - startLoc; |
369 | } |
370 | } |
371 | |
372 | static inline |
373 | uint32_t __BDFastHash(CFStringRef aString) |
374 | { |
375 | |
376 | // Golden ratio - arbitrary start value to avoid mapping all 0's to all 0's |
377 | // or anything like that. |
378 | unsigned PHI = 0x9e3779b9U; |
379 | |
380 | // Paul Hsieh's SuperFastHash |
381 | // http://www.azillionmonkeys.com/qed/hash.html |
382 | // Implementation from Apple's WebCore/khtml/xml/dom_stringimpl.cpp, designed |
383 | // to hash UTF-16 characters. |
384 | |
385 | unsigned l = CFStringGetLength(aString); |
386 | uint32_t fastHash = PHI; |
387 | uint32_t tmp; |
388 | |
389 | const UniChar *s = CFStringGetCharactersPtr(aString); |
390 | UniChar *buf = NULL( ( void * ) 0 ), stackBuffer[STACK_BUFFER_SIZE256]; |
391 | CFAllocatorRef allocator = NULL( ( void * ) 0 ); |
392 | |
393 | if(s == NULL( ( void * ) 0 )){ |
394 | |
395 | if(l > STACK_BUFFER_SIZE256){ |
396 | allocator = CFGetAllocator(aString); |
397 | buf = (UniChar *)CFAllocatorAllocate(allocator, l * sizeof(UniChar), 0);; |
398 | NSCAssertdo { if ( ! ( ( buf != ( ( void * ) 0 ) ) ) ) { [ [ NSAssertionHandler currentHandler ] handleFailureInFunction : [ NSString stringWithUTF8String : __PRETTY_FUNCTION__ ] file : [ NSString stringWithUTF8String : "/Volumes/Local/Users/amaxwell/build/bibdesk-clean/CFString_BDSKExtensions.m" ] lineNumber : 398 description : ( ( @ "unable to allocate memory" ) ) , ( 0 ) , ( 0 ) , ( 0 ) , ( 0 ) , ( 0 ) ] ; } } while ( 0 )(buf != NULL, @"unable to allocate memory"); |
399 | } else { |
400 | buf = stackBuffer; |
401 | } |
402 | CFStringGetCharacters(aString, CFRangeMake(0, l), buf); |
403 | s = buf; |
404 | } |
405 | |
406 | int rem = l & 1; |
407 | l >>= 1; |
408 | |
409 | // Main loop |
410 | for (; l > 0; l--) { |
411 | fastHash += s[0]; |
412 | tmp = (s[1] << 11) ^ fastHash; |
413 | fastHash = (fastHash << 16) ^ tmp; |
414 | s += 2; |
415 | fastHash += fastHash >> 11; |
416 | } |
417 | |
418 | // Handle end case |
419 | if (rem) { |
420 | fastHash += s[0]; |
421 | fastHash ^= fastHash << 11; |
422 | fastHash += fastHash >> 17; |
423 | } |
424 | |
425 | if(buf != stackBuffer) CFAllocatorDeallocate(allocator, buf); |
426 | |
427 | // Force "avalanching" of final 127 bits |
428 | fastHash ^= fastHash << 3; |
429 | fastHash += fastHash >> 5; |
430 | fastHash ^= fastHash << 2; |
431 | fastHash += fastHash >> 15; |
432 | fastHash ^= fastHash << 10; |
433 | |
434 | // this avoids ever returning a hash code of 0, since that is used to |
435 | // signal "hash not computed yet", using a value that is likely to be |
436 | // effectively the same as 0 when the low bits are masked |
437 | if (fastHash == 0) |
438 | fastHash = 0x80000000; |
439 | |
440 | return fastHash; |
441 | } |
442 | |
443 | static inline |
444 | CFStringRef __BDStringCreateByNormalizingWhitespaceAndNewlines(CFAllocatorRef allocator, CFStringRef aString) |
445 | { |
446 | |
447 | CFIndex length = CFStringGetLength(aString); |
448 | |
449 | if(length == 0) |
450 | return CFRetain(CFSTR( ( CFStringRef ) __builtin___CFStringMakeConstantString ( "" "" "" ) )("")); |
451 | |
452 | // set up the buffer to fetch the characters |
453 | CFIndex cnt = 0; |
454 | CFStringInlineBuffer inlineBuffer; |
455 | CFStringInitInlineBuffer(aString, &inlineBuffer, CFRangeMake(0, length)); |
456 | UniChar ch; |
457 | UniChar *buffer, stackBuffer[STACK_BUFFER_SIZE256]; |
458 | CFStringRef retStr; |
459 | |
460 | if(length >= STACK_BUFFER_SIZE256) { |
461 | buffer = (UniChar *)CFAllocatorAllocate(allocator, (length + 1) * sizeof(UniChar), 0); |
462 | } else { |
463 | buffer = stackBuffer; |
464 | } |
465 | |
466 | NSCAssert1do { if ( ! ( ( buffer != ( ( void * ) 0 ) ) ) ) { [ [ NSAssertionHandler currentHandler ] handleFailureInFunction : [ NSString stringWithUTF8String : __PRETTY_FUNCTION__ ] file : [ NSString stringWithUTF8String : "/Volumes/Local/Users/amaxwell/build/bibdesk-clean/CFString_BDSKExtensions.m" ] lineNumber : 466 description : ( ( @ "failed to allocate memory for string of length %d" ) ) , ( ( length ) ) , ( 0 ) , ( 0 ) , ( 0 ) , ( 0 ) ] ; } } while ( 0 )(buffer != NULL, @"failed to allocate memory for string of length %d", length); |
467 | |
468 | int bufCnt = 0; |
469 | BOOL ignoreNextNewline = NO( BOOL ) 0; |
470 | |
471 | for(cnt = 0; cnt < length; cnt++){ |
472 | ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); |
473 | if(__BDCharacterIsWhitespace(ch)){ |
474 | ignoreNextNewline = NO( BOOL ) 0; |
475 | buffer[bufCnt++] = ' '; // replace with a single space |
476 | } else if('\r' == ch){ // we can have \r\n, which should appear as a single \n |
477 | ignoreNextNewline = YES( BOOL ) 1; |
478 | buffer[bufCnt++] = '\n'; |
479 | } else if('\n' == ch){ // see if previous char was \r |
480 | if(!ignoreNextNewline) |
481 | buffer[bufCnt++] = '\n'; |
482 | ignoreNextNewline = NO( BOOL ) 0; |
483 | } else if(BDIsNewlineCharacter(ch)){ |
484 | ignoreNextNewline = NO( BOOL ) 0; |
485 | buffer[bufCnt++] = '\n'; |
486 | } else { |
487 | ignoreNextNewline = NO( BOOL ) 0; |
488 | buffer[bufCnt++] = ch; |
489 | } |
490 | } |
491 | |
492 | retStr = CFStringCreateWithCharacters(allocator, buffer, bufCnt); |
493 | if(buffer != stackBuffer) CFAllocatorDeallocate(allocator, buffer); |
494 | return retStr; |
495 | } |
496 | |
497 | static inline void |
498 | __BDDeleteCharactersInCharacterSet(CFMutableStringRef theString, CFCharacterSetRef charSet) |
499 | { |
500 | CFStringInlineBuffer inlineBuffer; |
501 | CFIndex length = CFStringGetLength(theString); |
502 | CFIndex cnt = 0; |
503 | |
504 | // create an immutable copy to use with the inline buffer |
505 | CFStringRef myCopy = CFStringCreateCopy(kCFAllocatorDefault, theString); |
506 | CFStringInitInlineBuffer(myCopy, &inlineBuffer, CFRangeMake(0, length)); |
507 | UniChar ch; |
508 | |
509 | CFIndex delCnt = 0; |
510 | while(cnt < length){ |
511 | ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); |
512 | if(CFCharacterSetIsCharacterMember(charSet, ch)){ |
513 | // remove from the mutable string; we have to keep track of our index in the copy and the original |
514 | CFStringDelete(theString, CFRangeMake(delCnt, 1)); |
515 | } else { |
516 | delCnt++; |
517 | } |
518 | cnt++; |
519 | } |
520 | CFRelease(myCopy); // dispose of our temporary copy |
521 | } |
522 | |
523 | /* This is very similar to CFStringTrimWhitespace from CF-368.1. It takes a buffer of unichars, and removes the whitespace characters from each end, then returns the contents in the original buffer (the pointer is unchanged). The length returned is the new length, and the length passed is the buffer length. */ |
524 | static inline CFIndex __BDCharactersTrimmingWhitespace(UniChar *chars, CFIndex length) |
525 | { |
526 | CFIndex newStartIndex = 0; |
527 | CFIndex buffer_idx = 0; |
528 | |
529 | while (buffer_idx < length && __BDCharacterIsWhitespace(chars[buffer_idx])) |
530 | buffer_idx++; |
531 | newStartIndex = buffer_idx; |
532 | |
533 | if (newStartIndex < length) { |
534 | |
535 | buffer_idx = length - 1; |
536 | while (0 <= buffer_idx && __BDCharacterIsWhitespace(chars[buffer_idx])) |
537 | buffer_idx--; |
538 | length = buffer_idx - newStartIndex + 1; |
539 | |
540 | // @@ CFStringTrimWhitespace uses memmove(chars, chars + newStartIndex * sizeof(UniChar), length * sizeof(UniChar)), but that doesn't work in my testing here. |
541 | memmove(chars, chars + newStartIndex, length * sizeof(UniChar)); |
542 | } else { |
543 | // whitespace only |
544 | length = 0; |
545 | } |
546 | |
547 | return length; |
548 | } |
549 | |
550 | #pragma mark API |
551 | |
552 | // Copied from CFString.c (CF368.25) with the addition of a single parameter for specifying comparison options (e.g. case-insensitive). |
553 | CFArrayRef BDStringCreateArrayBySeparatingStringsWithOptions(CFAllocatorRef allocator, CFStringRef string, CFStringRef separatorString, CFOptionFlags compareOptions) |
554 | { |
555 | CFArrayRef separatorRanges; |
556 | CFIndex length = CFStringGetLength(string); |
557 | |
558 | if (!(separatorRanges = CFStringCreateArrayWithFindResults(allocator, string, separatorString, CFRangeMake(0, length), compareOptions))) { |
559 | return CFArrayCreate(allocator, (const void**)&string, 1, & kCFTypeArrayCallBacks); |
560 | } else { |
561 | CFIndex idx; |
562 | CFIndex count = CFArrayGetCount(separatorRanges); |
563 | CFIndex startIndex = 0; |
564 | CFIndex numChars; |
565 | CFMutableArrayRef array = CFArrayCreateMutable(allocator, count + 2, & kCFTypeArrayCallBacks); |
566 | const CFRange *currentRange; |
567 | CFStringRef substring; |
568 | |
569 | for (idx = 0;idx < count;idx++) { |
570 | currentRange = CFArrayGetValueAtIndex(separatorRanges, idx); |
571 | numChars = currentRange->location - startIndex; |
572 | substring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(startIndex, numChars)); |
573 | CFArrayAppendValue(array, substring); |
574 | CFRelease(substring); |
575 | startIndex = currentRange->location + currentRange->length; |
576 | } |
577 | substring = CFStringCreateWithSubstring(allocator, string, CFRangeMake(startIndex, length - startIndex)); |
578 | CFArrayAppendValue(array, substring); |
579 | CFRelease(substring); |
580 | |
581 | CFRelease(separatorRanges); |
582 | |
583 | return array; |
584 | } |
585 | } |
586 | |
587 | CFArrayRef BDStringCreateComponentsSeparatedByCharacterSetTrimWhitespace(CFAllocatorRef allocator, CFStringRef string, CFCharacterSetRef charSet, Boolean trim) |
588 | { |
589 | |
590 | CFIndex length = CFStringGetLength(string); |
591 | CFStringInlineBuffer inlineBuffer; |
592 | CFStringInitInlineBuffer(string, &inlineBuffer, CFRangeMake(0, length)); |
593 | |
594 | if(allocator == NULL( ( void * ) 0 )) allocator = CFAllocatorGetDefault(); |
595 | CFMutableArrayRef array = CFArrayCreateMutable(allocator, 0, &kCFTypeArrayCallBacks); |
596 | CFIndex cnt; |
597 | UniChar ch; |
598 | |
599 | // full length of string has to be large enough for the buffer |
600 | UniChar *buffer, stackBuffer[STACK_BUFFER_SIZE256]; |
601 | if(length >= STACK_BUFFER_SIZE256) { |
602 | buffer = (UniChar *)CFAllocatorAllocate(allocator, length * sizeof(UniChar), 0); |
603 | } else { |
604 | buffer = stackBuffer; |
605 | } |
606 | |
607 | NSCAssert1do { if ( ! ( ( buffer != ( ( void * ) 0 ) ) ) ) { [ [ NSAssertionHandler currentHandler ] handleFailureInFunction : [ NSString stringWithUTF8String : __PRETTY_FUNCTION__ ] file : [ NSString stringWithUTF8String : "/Volumes/Local/Users/amaxwell/build/bibdesk-clean/CFString_BDSKExtensions.m" ] lineNumber : 607 description : ( ( @ "Unable to allocate buffer for %@" ) ) , ( ( string ) ) , ( 0 ) , ( 0 ) , ( 0 ) , ( 0 ) ] ; } } while ( 0 )(buffer != NULL, @"Unable to allocate buffer for %@", string); |
608 | CFIndex bufCnt = 0; |
609 | CFStringRef component; |
610 | |
611 | // scan characters into a buffer; when a character from the charSet is reached, stop and create a string |
612 | for(cnt = 0; cnt < length; cnt++){ |
613 | ch = CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt); |
614 | if(CFCharacterSetIsCharacterMember(charSet, ch) == FALSE0){ |
615 | buffer[bufCnt++] = ch; |
616 | } else { |
617 | if(bufCnt){ |
618 | if(trim) bufCnt = __BDCharactersTrimmingWhitespace(buffer, bufCnt); |
619 | component = CFStringCreateWithCharacters(allocator, buffer, bufCnt); |
620 | CFArrayAppendValue(array, component); |
621 | CFRelease(component); |
622 | bufCnt = 0; |
623 | } |
624 | } |
625 | } |
626 | |
627 | // get the final component from the buffer and create a string |
628 | if(bufCnt){ |
629 | if(trim) bufCnt = __BDCharactersTrimmingWhitespace(buffer, bufCnt); |
630 | component = CFStringCreateWithCharacters(allocator, buffer, (bufCnt)); |
631 | CFArrayAppendValue(array, component); |
632 | CFRelease(component); |
633 | } |
634 | |
635 | if(buffer != stackBuffer) CFAllocatorDeallocate(allocator, buffer); |
636 | |
637 | return array; |
638 | } |
639 | |
640 | CFHashCode BDCaseInsensitiveStringHash(const void *value) |
641 | { |
642 | if(value == NULL( ( void * ) 0 )) return 0; |
643 | |
644 | CFAllocatorRef allocator = CFGetAllocator(value); |
645 | CFIndex len = CFStringGetLength(value); |
646 | |
647 | // use a generous length, in case the lowercase changes the number of characters |
648 | UniChar *buffer, stackBuffer[STACK_BUFFER_SIZE256]; |
649 | if(len + 10 >= STACK_BUFFER_SIZE256) { |
650 | buffer = (UniChar *)CFAllocatorAllocate(allocator, (len + 10) * sizeof(UniChar), 0); |
651 | } else { |
652 | buffer = stackBuffer; |
653 | } |
654 | CFStringGetCharacters(value, CFRangeMake(0, len), buffer); |
655 | |
656 | // If we create the string with external characters, CFStringGetCharactersPtr is guaranteed to succeed; since we're going to call CFStringGetCharacters anyway in fastHash if CFStringGetCharactsPtr fails, let's do it now when we lowercase the string |
657 | CFMutableStringRef mutableString = CFStringCreateMutableWithExternalCharactersNoCopy(allocator, buffer, len, len + 10, (buffer != stackBuffer ? allocator : kCFAllocatorNull)); |
658 | CFStringLowercase(mutableString, NULL( ( void * ) 0 )); |
659 | uint32_t hash = __BDFastHash(mutableString); |
660 | |
661 | // if we used the allocator, this should free the buffer for us |
662 | CFRelease(mutableString); |
663 | return hash; |
664 | } |
665 | |
666 | |
667 | CFStringRef BDStringCreateByCollapsingAndTrimmingWhitespace(CFAllocatorRef allocator, CFStringRef string){ return __BDStringCreateByCollapsingAndTrimmingWhitespace(allocator, string); } |
668 | CFStringRef BDStringCreateByCollapsingAndTrimmingWhitespaceAndNewlines(CFAllocatorRef allocator, CFStringRef string){ return __BDStringCreateByCollapsingAndTrimmingWhitespaceAndNewlines(allocator, string); } |
669 | CFStringRef BDStringCreateByNormalizingWhitespaceAndNewlines(CFAllocatorRef allocator, CFStringRef string){ return __BDStringCreateByNormalizingWhitespaceAndNewlines(allocator, string); } |
670 | |
671 | // useful when you want the range of a single character without messing with character sets, or just to know if a character exists in a string (pass NULL for resultRange if you don't care where the result is located) |
672 | Boolean BDStringFindCharacter(CFStringRef string, UniChar character, CFRange searchRange, CFRange *resultRange) |
673 | { |
674 | if(CFStringGetLength(string) == 0) return FALSE0; |
675 | CFStringInlineBuffer inlineBuffer; |
676 | |
677 | CFStringInitInlineBuffer(string, &inlineBuffer, searchRange); |
678 | CFIndex cnt = 0; |
679 | |
680 | do { |
681 | if(CFStringGetCharacterFromInlineBuffer(&inlineBuffer, cnt) == character){ |
682 | if(resultRange != NULL( ( void * ) 0 )){ |
683 | resultRange->location = searchRange.location + cnt; |
684 | resultRange->length = 1; |
685 | } |
686 | return TRUE1; |
687 | } |
688 | } while(++cnt < searchRange.length); |
689 | |
690 | return FALSE0; |
691 | } |
692 | |
693 | Boolean BDIsNewlineCharacter(UniChar c) |
694 | { |
695 | // minor optimization: check for an ASCII character, since those are most common in TeX |
696 | return ( (c <= 0x007E && c >= 0x0021) ? NO( BOOL ) 0 : CFCharacterSetIsCharacterMember((CFCharacterSetRef)[NSCharacterSet newlineCharacterSet], c) ); |
697 | } |
698 | |
699 | Boolean BDStringHasAccentedCharacters(CFStringRef string) |
700 | { |
701 | CFMutableStringRef mutableString = CFStringCreateMutableCopy(CFGetAllocator(string), CFStringGetLength(string), string); |
702 | CFStringNormalize(mutableString, kCFStringNormalizationFormD); |
703 | Boolean success = CFStringFindCharacterFromSet(mutableString, CFCharacterSetGetPredefined(kCFCharacterSetNonBase), CFRangeMake(0, CFStringGetLength(mutableString)), 0, NULL( ( void * ) 0 )); |
704 | CFRelease(mutableString); |
705 | return success; |
706 | } |
707 | |
708 | #pragma mark Mutable Strings |
709 | |
710 | void BDDeleteTeXForSorting(CFMutableStringRef mutableString){ |
711 | __BDDeleteTeXCommandsForSorting(mutableString); |
712 | // get rid of braces and such... |
713 | __BDDeleteTeXCharactersForSorting(mutableString); |
714 | } |
715 | void BDDeleteArticlesForSorting(CFMutableStringRef mutableString){ __BDDeleteArticlesForSorting(mutableString); } |
716 | void BDDeleteCharactersInCharacterSet(CFMutableStringRef mutableString, CFCharacterSetRef charSet){ |
717 | __BDDeleteCharactersInCharacterSet(mutableString, charSet); |
718 | } |