Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(178)

Side by Side Diff: Source/WebCore/platform/mac/WebCoreNSURLExtras.mm

Issue 13713003: Remove all of WebCore/platform/mac which is not mentioned in WebCore.gypi. (Closed) Base URL: svn://svn.chromium.org/blink/trunk
Patch Set: Added back a couple needed headers Created 7 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2005, 2007 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
14 * its contributors may be used to endorse or promote products derived
15 * from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #import "config.h"
30 #import "WebCoreObjCExtras.h"
31 #import "WebCoreNSStringExtras.h"
32 #import "WebCoreNSURLExtras.h"
33 #import "WebCoreSystemInterface.h"
34 #import <wtf/RetainPtr.h>
35 #import <wtf/Vector.h>
36 #import <unicode/uchar.h>
37 #import <unicode/uidna.h>
38 #import <unicode/uscript.h>
39
40 // Needs to be big enough to hold an IDN-encoded name.
41 // For host names bigger than this, we won't do IDN encoding, which is almost ce rtainly OK.
42 #define HOST_NAME_BUFFER_LENGTH 2048
43 #define URL_BYTES_BUFFER_LENGTH 2048
44
45 typedef void (* StringRangeApplierFunction)(NSString *string, NSRange range, voi d *context);
46
47 static pthread_once_t IDNScriptWhiteListFileRead = PTHREAD_ONCE_INIT;
48 static uint32_t IDNScriptWhiteList[(USCRIPT_CODE_LIMIT + 31) / 32];
49
50 namespace WebCore {
51
52 static inline BOOL isLookalikeCharacter(int charCode)
53 {
54 // This function treats the following as unsafe, lookalike characters:
55 // any non-printable character, any character considered as whitespace that isn't already converted to a space by ICU,
56 // any ignorable character, and emoji characters related to locks.
57
58 // We also considered the characters in Mozilla's blacklist (http://kb.mozil lazine.org/Network.IDN.blacklist_chars),
59 // and included all of these characters that ICU can encode.
60
61 if (!u_isprint(charCode) || u_isUWhiteSpace(charCode) || u_hasBinaryProperty (charCode, UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
62 return YES;
63
64 switch (charCode) {
65 case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
66 case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
67 case 0x0251: /* LATIN SMALL LETTER ALPHA */
68 case 0x0261: /* LATIN SMALL LETTER SCRIPT G */
69 case 0x0335: /* COMBINING SHORT STROKE OVERLAY */
70 case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
71 case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
72 case 0x05B4: /* HEBREW POINT HIRIQ */
73 case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
74 case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
75 case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
76 case 0x0660: /* ARABIC INDIC DIGIT ZERO */
77 case 0x06D4: /* ARABIC FULL STOP */
78 case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
79 case 0x2027: /* HYPHENATION POINT */
80 case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
81 case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
82 case 0x2044: /* FRACTION SLASH */
83 case 0x2215: /* DIVISION SLASH */
84 case 0x2216: /* SET MINUS */
85 case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */
86 case 0x23AE: /* INTEGRAL EXTENSION */
87 case 0x244A: /* OCR DOUBLE BACKSLASH */
88 case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
89 case 0x2572: /* BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */
90 case 0x29F8: /* BIG SOLIDUS */
91 case 0x29f6: /* SOLIDUS WITH OVERBAR */
92 case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
93 case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
94 case 0x3008: /* LEFT ANGLE BRACKET */
95 case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
96 case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
97 case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
98 case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */
99 case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
100 case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
101 case 0x33DF: /* SQUARE A OVER M */
102 case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
103 case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
104 case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
105 case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
106 case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
107 case 0x1F50F: /* LOCK WITH INK PEN */
108 case 0x1F510: /* CLOSED LOCK WITH KEY */
109 case 0x1F511: /* KEY */
110 case 0x1F512: /* LOCK */
111 case 0x1F513: /* OPEN LOCK */
112 return YES;
113 default:
114 return NO;
115 }
116 }
117
118 static BOOL readIDNScriptWhiteListFile(NSString *filename)
119 {
120 if (!filename)
121 return NO;
122
123 FILE *file = fopen([filename fileSystemRepresentation], "r");
124 if (!file)
125 return NO;
126
127 // Read a word at a time.
128 // Allow comments, starting with # character to the end of the line.
129 while (1) {
130 // Skip a comment if present.
131 if (fscanf(file, " #%*[^\n\r]%*[\n\r]") == EOF)
132 break;
133
134 // Read a script name if present.
135 char word[33];
136 int result = fscanf(file, " %32[^# \t\n\r]%*[^# \t\n\r] ", word);
137 if (result == EOF)
138 break;
139
140 if (result == 1) {
141 // Got a word, map to script code and put it into the array.
142 int32_t script = u_getPropertyValueEnum(UCHAR_SCRIPT, word);
143 if (script >= 0 && script < USCRIPT_CODE_LIMIT) {
144 size_t index = script / 32;
145 uint32_t mask = 1 << (script % 32);
146 IDNScriptWhiteList[index] |= mask;
147 }
148 }
149 }
150 fclose(file);
151 return YES;
152 }
153
154 static void readIDNScriptWhiteList(void)
155 {
156 // Read white list from library.
157 NSArray *dirs = NSSearchPathForDirectoriesInDomains(NSLibraryDirectory, NSAl lDomainsMask, YES);
158 int numDirs = [dirs count];
159 for (int i = 0; i < numDirs; i++) {
160 if (readIDNScriptWhiteListFile([[dirs objectAtIndex:i] stringByAppending PathComponent:@"IDNScriptWhiteList.txt"]))
161 return;
162 }
163
164 // Fall back on white list inside bundle.
165 NSBundle *bundle = [NSBundle bundleWithIdentifier:@"com.apple.WebKit"];
166
167 if (!readIDNScriptWhiteListFile([bundle pathForResource:@"IDNScriptWhiteList " ofType:@"txt"]))
168 CRASH();
169 }
170
171 static BOOL allCharactersInIDNScriptWhiteList(const UChar *buffer, int32_t lengt h)
172 {
173 pthread_once(&IDNScriptWhiteListFileRead, readIDNScriptWhiteList);
174
175 int32_t i = 0;
176 while (i < length) {
177 UChar32 c;
178 U16_NEXT(buffer, i, length, c)
179 UErrorCode error = U_ZERO_ERROR;
180 UScriptCode script = uscript_getScript(c, &error);
181 if (error != U_ZERO_ERROR) {
182 LOG_ERROR("got ICU error while trying to look at scripts: %d", error );
183 return NO;
184 }
185 if (script < 0) {
186 LOG_ERROR("got negative number for script code from ICU: %d", script );
187 return NO;
188 }
189 if (script >= USCRIPT_CODE_LIMIT)
190 return NO;
191
192 size_t index = script / 32;
193 uint32_t mask = 1 << (script % 32);
194 if (!(IDNScriptWhiteList[index] & mask))
195 return NO;
196
197 if (isLookalikeCharacter(c))
198 return NO;
199 }
200 return YES;
201 }
202
203 static BOOL allCharactersAllowedByTLDRules(const UChar* buffer, int32_t length)
204 {
205 // Skip trailing dot for root domain.
206 if (buffer[length - 1] == '.')
207 length--;
208
209 if (length > 3 && buffer[length - 3] == '.'
210 && buffer[length - 2] == 0x0440 // CYRILLIC SMALL LETTER ER
211 && buffer[length - 1] == 0x0444) // CYRILLIC SMALL LETTER EF
212 {
213 // Rules defined by <http://www.cctld.ru/ru/docs/rulesrf.php>. This code only checks requirements that matter for presentation purposes.
214 for (int32_t i = length - 4; i; --i) {
215 UChar ch = buffer[i];
216
217 // Only modern Russian letters, digits and dashes are allowed.
218 if ((ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451|| (ch >= '0' && c h <= '9') || ch == '-')
219 continue;
220
221 // Only check top level domain. Lower level registrars may have diff erent rules.
222 if (ch == '.')
223 break;
224
225 return NO;
226 }
227 return YES;
228 }
229
230 // Not a known top level domain with special rules.
231 return NO;
232 }
233
234 // Return value of nil means no mapping is necessary.
235 // If makeString is NO, then return value is either nil or self to indicate mapp ing is necessary.
236 // If makeString is YES, then return value is either nil or the mapped string.
237 static NSString *mapHostNameWithRange(NSString *string, NSRange range, BOOL enco de, BOOL makeString)
238 {
239 if (range.length > HOST_NAME_BUFFER_LENGTH)
240 return nil;
241
242 if (![string length])
243 return nil;
244
245 UChar sourceBuffer[HOST_NAME_BUFFER_LENGTH];
246 UChar destinationBuffer[HOST_NAME_BUFFER_LENGTH];
247
248 if (encode && [string rangeOfString:@"%" options:NSLiteralSearch range:range ].location != NSNotFound) {
249 NSString *substring = [string substringWithRange:range];
250 substring = WebCoreCFAutorelease(CFURLCreateStringByReplacingPercentEsca pes(NULL, (CFStringRef)substring, CFSTR("")));
251 if (substring) {
252 string = substring;
253 range = NSMakeRange(0, [string length]);
254 }
255 }
256
257 int length = range.length;
258 [string getCharacters:sourceBuffer range:range];
259
260 UErrorCode error = U_ZERO_ERROR;
261 int32_t numCharactersConverted = (encode ? uidna_IDNToASCII : uidna_IDNToUni code)(sourceBuffer, length, destinationBuffer, HOST_NAME_BUFFER_LENGTH, UIDNA_AL LOW_UNASSIGNED, NULL, &error);
262 if (error != U_ZERO_ERROR)
263 return nil;
264
265 if (numCharactersConverted == length && !memcmp(sourceBuffer, destinationBuf fer, length * sizeof(UChar)))
266 return nil;
267
268 if (!encode && !allCharactersInIDNScriptWhiteList(destinationBuffer, numChar actersConverted) && !allCharactersAllowedByTLDRules(destinationBuffer, numCharac tersConverted))
269 return nil;
270
271 return makeString ? (NSString *)[NSString stringWithCharacters:destinationBu ffer length:numCharactersConverted] : string;
272 }
273
274 BOOL hostNameNeedsDecodingWithRange(NSString *string, NSRange range)
275 {
276 return mapHostNameWithRange(string, range, NO, NO) != nil;
277 }
278
279 BOOL hostNameNeedsEncodingWithRange(NSString *string, NSRange range)
280 {
281 return mapHostNameWithRange(string, range, YES, NO) != nil;
282 }
283
284 NSString *decodeHostNameWithRange(NSString *string, NSRange range)
285 {
286 return mapHostNameWithRange(string, range, NO, YES);
287 }
288
289 NSString *encodeHostNameWithRange(NSString *string, NSRange range)
290 {
291 return mapHostNameWithRange(string, range, YES, YES);
292 }
293
294 NSString *decodeHostName(NSString *string)
295 {
296 NSString *name = mapHostNameWithRange(string, NSMakeRange(0, [string length] ), NO, YES);
297 return !name ? string : name;
298 }
299
300 NSString *encodeHostName(NSString *string)
301 {
302 NSString *name = mapHostNameWithRange(string, NSMakeRange(0, [string length ]), YES, YES);
303 return !name ? string : name;
304 }
305
306 static void collectRangesThatNeedMapping(NSString *string, NSRange range, void * context, BOOL encode)
307 {
308 BOOL needsMapping = encode ? hostNameNeedsEncodingWithRange(string, range) : hostNameNeedsDecodingWithRange(string, range);
309 if (!needsMapping)
310 return;
311
312 NSMutableArray **array = (NSMutableArray **)context;
313 if (!*array)
314 *array = [[NSMutableArray alloc] init];
315
316 [*array addObject:[NSValue valueWithRange:range]];
317 }
318
319 static void collectRangesThatNeedEncoding(NSString *string, NSRange range, void *context)
320 {
321 return collectRangesThatNeedMapping(string, range, context, YES);
322 }
323
324 static void collectRangesThatNeedDecoding(NSString *string, NSRange range, void *context)
325 {
326 return collectRangesThatNeedMapping(string, range, context, NO);
327 }
328
329 static inline NSCharacterSet *retain(NSCharacterSet *charset)
330 {
331 CFRetain(charset);
332 return charset;
333 }
334
335 static void applyHostNameFunctionToMailToURLString(NSString *string, StringRange ApplierFunction f, void *context)
336 {
337 // In a mailto: URL, host names come after a '@' character and end with a '> ' or ',' or '?' character.
338 // Skip quoted strings so that characters in them don't confuse us.
339 // When we find a '?' character, we are past the part of the URL that contai ns host names.
340
341 static NSCharacterSet *hostNameOrStringStartCharacters = retain([NSCharacter Set characterSetWithCharactersInString:@"\"@?"]);
342 static NSCharacterSet *hostNameEndCharacters = retain([NSCharacterSet charac terSetWithCharactersInString:@">,?"]);
343 static NSCharacterSet *quotedStringCharacters = retain([NSCharacterSet chara cterSetWithCharactersInString:@"\"\\"]);
344
345 unsigned stringLength = [string length];
346 NSRange remaining = NSMakeRange(0, stringLength);
347
348 while (1) {
349 // Find start of host name or of quoted string.
350 NSRange hostNameOrStringStart = [string rangeOfCharacterFromSet:hostName OrStringStartCharacters options:0 range:remaining];
351 if (hostNameOrStringStart.location == NSNotFound)
352 return;
353
354 unichar c = [string characterAtIndex:hostNameOrStringStart.location];
355 remaining.location = NSMaxRange(hostNameOrStringStart);
356 remaining.length = stringLength - remaining.location;
357
358 if (c == '?')
359 return;
360
361 if (c == '@') {
362 // Find end of host name.
363 unsigned hostNameStart = remaining.location;
364 NSRange hostNameEnd = [string rangeOfCharacterFromSet:hostNameEndCha racters options:0 range:remaining];
365 BOOL done;
366 if (hostNameEnd.location == NSNotFound) {
367 hostNameEnd.location = stringLength;
368 done = YES;
369 } else {
370 remaining.location = hostNameEnd.location;
371 remaining.length = stringLength - remaining.location;
372 done = NO;
373 }
374
375 // Process host name range.
376 f(string, NSMakeRange(hostNameStart, hostNameEnd.location - hostName Start), context);
377
378 if (done)
379 return;
380 } else {
381 // Skip quoted string.
382 ASSERT(c == '"');
383 while (1) {
384 NSRange escapedCharacterOrStringEnd = [string rangeOfCharacterFr omSet:quotedStringCharacters options:0 range:remaining];
385 if (escapedCharacterOrStringEnd.location == NSNotFound)
386 return;
387
388 c = [string characterAtIndex:escapedCharacterOrStringEnd.locatio n];
389 remaining.location = NSMaxRange(escapedCharacterOrStringEnd);
390 remaining.length = stringLength - remaining.location;
391
392 // If we are the end of the string, then break from the string l oop back to the host name loop.
393 if (c == '"')
394 break;
395
396 // Skip escaped character.
397 ASSERT(c == '\\');
398 if (!remaining.length)
399 return;
400
401 remaining.location += 1;
402 remaining.length -= 1;
403 }
404 }
405 }
406 }
407
408 static void applyHostNameFunctionToURLString(NSString *string, StringRangeApplie rFunction f, void *context)
409 {
410 // Find hostnames. Too bad we can't use any real URL-parsing code to do this ,
411 // but we have to do it before doing all the %-escaping, and this is the onl y
412 // code we have that parses mailto URLs anyway.
413
414 // Maybe we should implement this using a character buffer instead?
415
416 if (hasCaseInsensitivePrefix(string, @"mailto:")) {
417 applyHostNameFunctionToMailToURLString(string, f, context);
418 return;
419 }
420
421 // Find the host name in a hierarchical URL.
422 // It comes after a "://" sequence, with scheme characters preceding.
423 // If ends with the end of the string or a ":", "/", or a "?".
424 // If there is a "@" character, the host part is just the part after the "@" .
425 NSRange separatorRange = [string rangeOfString:@"://"];
426 if (separatorRange.location == NSNotFound)
427 return;
428
429 // Check that all characters before the :// are valid scheme characters.
430 static NSCharacterSet *nonSchemeCharacters = retain([[NSCharacterSet charact erSetWithCharactersInString:@"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwx yz0123456789+-."] invertedSet]);
431 if ([string rangeOfCharacterFromSet:nonSchemeCharacters options:0 range:NSMa keRange(0, separatorRange.location)].location != NSNotFound)
432 return;
433
434 unsigned stringLength = [string length];
435
436 static NSCharacterSet *hostTerminators = retain([NSCharacterSet characterSet WithCharactersInString:@":/?#"]);
437
438 // Start after the separator.
439 unsigned authorityStart = NSMaxRange(separatorRange);
440
441 // Find terminating character.
442 NSRange hostNameTerminator = [string rangeOfCharacterFromSet:hostTerminators options:0 range:NSMakeRange(authorityStart, stringLength - authorityStart)];
443 unsigned hostNameEnd = hostNameTerminator.location == NSNotFound ? stringLen gth : hostNameTerminator.location;
444
445 // Find "@" for the start of the host name.
446 NSRange userInfoTerminator = [string rangeOfString:@"@" options:0 range:NSMa keRange(authorityStart, hostNameEnd - authorityStart)];
447 unsigned hostNameStart = userInfoTerminator.location == NSNotFound ? authori tyStart : NSMaxRange(userInfoTerminator);
448
449 f(string, NSMakeRange(hostNameStart, hostNameEnd - hostNameStart), context);
450 }
451
452 static NSString *mapHostNames(NSString *string, BOOL encode)
453 {
454 // Generally, we want to optimize for the case where there is one host name that does not need mapping.
455
456 if (encode && [string canBeConvertedToEncoding:NSASCIIStringEncoding])
457 return string;
458
459 // Make a list of ranges that actually need mapping.
460 NSMutableArray *hostNameRanges = nil;
461 StringRangeApplierFunction f = encode ? collectRangesThatNeedEncoding : coll ectRangesThatNeedDecoding;
462 applyHostNameFunctionToURLString(string, f, &hostNameRanges);
463 if (!hostNameRanges)
464 return string;
465
466 // Do the mapping.
467 NSMutableString *mutableCopy = [string mutableCopy];
468 unsigned i = [hostNameRanges count];
469 while (i--) {
470 NSRange hostNameRange = [[hostNameRanges objectAtIndex:i] rangeValue];
471 NSString *mappedHostName = encode ? encodeHostNameWithRange(string, host NameRange) : decodeHostNameWithRange(string, hostNameRange);
472 [mutableCopy replaceCharactersInRange:hostNameRange withString:mappedHos tName];
473 }
474 [hostNameRanges release];
475 return [mutableCopy autorelease];
476 }
477
478 static BOOL isHexDigit(char c)
479 {
480 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
481 }
482
483 static char hexDigit(int i)
484 {
485 if (i < 0 || i > 16)
486 return '0';
487
488 return (i >= 10) ? i - 10 + 'A' : i += '0';
489 }
490
491 static int hexDigitValue(char c)
492 {
493 if (c >= '0' && c <= '9')
494 return c - '0';
495
496 if (c >= 'A' && c <= 'F')
497 return c - 'A' + 10;
498
499 if (c >= 'a' && c <= 'f')
500 return c - 'a' + 10;
501
502 LOG_ERROR("illegal hex digit");
503 return 0;
504 }
505
506 static NSString *stringByTrimmingWhitespace(NSString *string)
507 {
508 NSMutableString *trimmed = [[string mutableCopy] autorelease];
509 CFStringTrimWhitespace((CFMutableStringRef)trimmed);
510 return trimmed;
511 }
512
513 NSURL *URLByTruncatingOneCharacterBeforeComponent(NSURL *URL, CFIndex component)
514 {
515 if (!URL)
516 return nil;
517
518 CFRange fragRg = CFURLGetByteRangeForComponent((CFURLRef)URL, static_cast<CF URLComponentType>(component), NULL);
519 if (fragRg.location == kCFNotFound)
520 return URL;
521
522 UInt8 *urlBytes, buffer[2048];
523 CFIndex numBytes = CFURLGetBytes((CFURLRef)URL, buffer, 2048);
524 if (numBytes == -1) {
525 numBytes = CFURLGetBytes((CFURLRef)URL, NULL, 0);
526 urlBytes = static_cast<UInt8*>(malloc(numBytes));
527 CFURLGetBytes((CFURLRef)URL, urlBytes, numBytes);
528 } else
529 urlBytes = buffer;
530
531 NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlByt es, fragRg.location - 1, kCFStringEncodingUTF8, NULL));
532 if (!result)
533 result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, fragRg.location - 1, kCFStringEncodingISOLatin1, NULL));
534
535 if (urlBytes != buffer)
536 free(urlBytes);
537 return result ? [result autorelease] : URL;
538 }
539
540 static NSURL *URLByRemovingResourceSpecifier(NSURL *URL)
541 {
542 return URLByTruncatingOneCharacterBeforeComponent(URL, kCFURLComponentResour ceSpecifier);
543 }
544
545 NSURL *URLWithData(NSData *data, NSURL *baseURL)
546 {
547 if (!data)
548 return nil;
549
550 NSURL *result = nil;
551 size_t length = [data length];
552 if (length > 0) {
553 // work around <rdar://4470771>: CFURLCreateAbsoluteURLWithBytes(.., TRU E) doesn't remove non-path components.
554 baseURL = URLByRemovingResourceSpecifier(baseURL);
555
556 const UInt8 *bytes = static_cast<const UInt8*>([data bytes]);
557
558 // CFURLCreateAbsoluteURLWithBytes would complain to console if we passe d a path to it.
559 if (bytes[0] == '/' && !baseURL)
560 return nil;
561
562 // NOTE: We use UTF-8 here since this encoding is used when computing st rings when returning URL components
563 // (e.g calls to NSURL -path). However, this function is not tolerant of illegal UTF-8 sequences, which
564 // could either be a malformed string or bytes in a different encoding, like shift-jis, so we fall back
565 // onto using ISO Latin 1 in those cases.
566 result = WebCoreCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, byte s, length, kCFStringEncodingUTF8, (CFURLRef)baseURL, YES));
567 if (!result)
568 result = WebCoreCFAutorelease(CFURLCreateAbsoluteURLWithBytes(NULL, bytes, length, kCFStringEncodingISOLatin1, (CFURLRef)baseURL, YES));
569 } else
570 result = [NSURL URLWithString:@""];
571
572 return result;
573 }
574
575 NSURL *URLWithUserTypedString(NSString *string, NSURL *URL)
576 {
577 if (!string)
578 return nil;
579
580 string = mapHostNames(stringByTrimmingWhitespace(string), YES);
581
582 NSData *userTypedData = [string dataUsingEncoding:NSUTF8StringEncoding];
583 ASSERT(userTypedData);
584
585 const UInt8* inBytes = static_cast<const UInt8 *>([userTypedData bytes]);
586 int inLength = [userTypedData length];
587 if (!inLength)
588 return [NSURL URLWithString:@""];
589
590 char* outBytes = static_cast<char *>(malloc(inLength * 3)); // large enough to %-escape every character
591 char* p = outBytes;
592 int outLength = 0;
593 for (int i = 0; i < inLength; i++) {
594 UInt8 c = inBytes[i];
595 if (c <= 0x20 || c >= 0x7f) {
596 *p++ = '%';
597 *p++ = hexDigit(c >> 4);
598 *p++ = hexDigit(c & 0xf);
599 outLength += 3;
600 } else {
601 *p++ = c;
602 outLength++;
603 }
604 }
605
606 NSData *data = [NSData dataWithBytesNoCopy:outBytes length:outLength]; // ad opts outBytes
607 return URLWithData(data, URL);
608 }
609
610 static BOOL hasQuestionMarkOnlyQueryString(NSURL *URL)
611 {
612 CFRange rangeWithSeparators;
613 CFURLGetByteRangeForComponent((CFURLRef)URL, kCFURLComponentQuery, &rangeWit hSeparators);
614 if (rangeWithSeparators.location != kCFNotFound && rangeWithSeparators.lengt h == 1)
615 return YES;
616
617 return NO;
618 }
619
620 #define completeURL (CFURLComponentType)-1
621
622 NSData *dataForURLComponentType(NSURL *URL, CFIndex componentType)
623 {
624 static int URLComponentTypeBufferLength = 2048;
625
626 UInt8 staticAllBytesBuffer[URLComponentTypeBufferLength];
627 UInt8 *allBytesBuffer = staticAllBytesBuffer;
628
629 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)URL, allBytesBuffer, URLCompon entTypeBufferLength);
630 if (bytesFilled == -1) {
631 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)URL, NULL, 0);
632 allBytesBuffer = static_cast<UInt8 *>(malloc(bytesToAllocate));
633 bytesFilled = CFURLGetBytes((CFURLRef)URL, allBytesBuffer, bytesToAlloca te);
634 }
635
636 CFRange range;
637 if (componentType != completeURL) {
638 range = CFURLGetByteRangeForComponent((CFURLRef)URL, static_cast<CFURLCo mponentType>(componentType), NULL);
639 if (range.location == kCFNotFound)
640 return nil;
641 } else {
642 range.location = 0;
643 range.length = bytesFilled;
644 }
645
646 NSData *componentData = [NSData dataWithBytes:allBytesBuffer + range.locatio n length:range.length];
647
648 const unsigned char *bytes = static_cast<const unsigned char *>([componentDa ta bytes]);
649 NSMutableData *resultData = [NSMutableData data];
650 // NOTE: add leading '?' to query strings non-zero length query strings.
651 // NOTE: retain question-mark only query strings.
652 if (componentType == kCFURLComponentQuery) {
653 if (range.length > 0 || hasQuestionMarkOnlyQueryString(URL))
654 [resultData appendBytes:"?" length:1];
655 }
656 for (int i = 0; i < range.length; i++) {
657 unsigned char c = bytes[i];
658 if (c <= 0x20 || c >= 0x7f) {
659 char escaped[3];
660 escaped[0] = '%';
661 escaped[1] = hexDigit(c >> 4);
662 escaped[2] = hexDigit(c & 0xf);
663 [resultData appendBytes:escaped length:3];
664 } else {
665 char b[1];
666 b[0] = c;
667 [resultData appendBytes:b length:1];
668 }
669 }
670
671 if (staticAllBytesBuffer != allBytesBuffer)
672 free(allBytesBuffer);
673
674 return resultData;
675 }
676
677 static NSURL *URLByRemovingComponentAndSubsequentCharacter(NSURL *URL, CFURLComp onentType component)
678 {
679 CFRange range = CFURLGetByteRangeForComponent((CFURLRef)URL, component, 0);
680 if (range.location == kCFNotFound)
681 return URL;
682
683 // Remove one subsequent character.
684 range.length++;
685
686 UInt8* urlBytes;
687 UInt8 buffer[2048];
688 CFIndex numBytes = CFURLGetBytes((CFURLRef)URL, buffer, 2048);
689 if (numBytes == -1) {
690 numBytes = CFURLGetBytes((CFURLRef)URL, NULL, 0);
691 urlBytes = static_cast<UInt8*>(malloc(numBytes));
692 CFURLGetBytes((CFURLRef)URL, urlBytes, numBytes);
693 } else
694 urlBytes = buffer;
695
696 if (numBytes < range.location)
697 return URL;
698 if (numBytes < range.location + range.length)
699 range.length = numBytes - range.location;
700
701 memmove(urlBytes + range.location, urlBytes + range.location + range.length, numBytes - range.location + range.length);
702
703 NSURL *result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlByt es, numBytes - range.length, kCFStringEncodingUTF8, NULL));
704 if (!result)
705 result = (NSURL *)CFMakeCollectable(CFURLCreateWithBytes(NULL, urlBytes, numBytes - range.length, kCFStringEncodingISOLatin1, NULL));
706
707 if (urlBytes != buffer)
708 free(urlBytes);
709
710 return result ? [result autorelease] : URL;
711 }
712
713 NSURL *URLByRemovingUserInfo(NSURL *URL)
714 {
715 return URLByRemovingComponentAndSubsequentCharacter(URL, kCFURLComponentUser Info);
716 }
717
718 NSData *originalURLData(NSURL *URL)
719 {
720 UInt8 *buffer = (UInt8 *)malloc(URL_BYTES_BUFFER_LENGTH);
721 CFIndex bytesFilled = CFURLGetBytes((CFURLRef)URL, buffer, URL_BYTES_BUFFER_ LENGTH);
722 if (bytesFilled == -1) {
723 CFIndex bytesToAllocate = CFURLGetBytes((CFURLRef)URL, NULL, 0);
724 buffer = (UInt8 *)realloc(buffer, bytesToAllocate);
725 bytesFilled = CFURLGetBytes((CFURLRef)URL, buffer, bytesToAllocate);
726 ASSERT(bytesFilled == bytesToAllocate);
727 }
728
729 // buffer is adopted by the NSData
730 NSData *data = [NSData dataWithBytesNoCopy:buffer length:bytesFilled freeWhe nDone:YES];
731
732 NSURL *baseURL = (NSURL *)CFURLGetBaseURL((CFURLRef)URL);
733 if (baseURL)
734 return originalURLData(URLWithData(data, baseURL));
735 return data;
736 }
737
738 static CFStringRef createStringWithEscapedUnsafeCharacters(CFStringRef string)
739 {
740 CFIndex length = CFStringGetLength(string);
741 Vector<UChar, 2048> sourceBuffer(length);
742 CFStringGetCharacters(string, CFRangeMake(0, length), sourceBuffer.data());
743
744 Vector<UChar, 2048> outBuffer;
745
746 CFIndex i = 0;
747 while (i < length) {
748 UChar32 c;
749 U16_NEXT(sourceBuffer, i, length, c)
750
751 if (isLookalikeCharacter(c)) {
752 uint8_t utf8Buffer[4];
753 CFIndex offset = 0;
754 UBool failure = false;
755 U8_APPEND(utf8Buffer, offset, 4, c, failure)
756 ASSERT(!failure);
757
758 for (CFIndex j = 0; j < offset; ++j) {
759 outBuffer.append('%');
760 outBuffer.append(hexDigit(utf8Buffer[j] >> 4));
761 outBuffer.append(hexDigit(utf8Buffer[j] & 0xf));
762 }
763 } else {
764 UChar utf16Buffer[2];
765 CFIndex offset = 0;
766 UBool failure = false;
767 U16_APPEND(utf16Buffer, offset, 2, c, failure)
768 ASSERT(!failure);
769 for (CFIndex j = 0; j < offset; ++j)
770 outBuffer.append(utf16Buffer[j]);
771 }
772 }
773
774 return CFStringCreateWithCharacters(NULL, outBuffer.data(), outBuffer.size() );
775 }
776
777 NSString *userVisibleString(NSURL *URL)
778 {
779 NSData *data = originalURLData(URL);
780 const unsigned char *before = static_cast<const unsigned char*>([data bytes] );
781 int length = [data length];
782
783 bool needsHostNameDecoding = false;
784
785 const unsigned char *p = before;
786 int bufferLength = (length * 3) + 1;
787 char *after = static_cast<char *>(malloc(bufferLength)); // large enough to %-escape every character
788 char *q = after;
789 for (int i = 0; i < length; i++) {
790 unsigned char c = p[i];
791 // unescape escape sequences that indicate bytes greater than 0x7f
792 if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < leng th && isHexDigit(p[i + 2])) {
793 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
794 if (u > 0x7f) {
795 // unescape
796 *q++ = u;
797 } else {
798 // do not unescape
799 *q++ = p[i];
800 *q++ = p[i + 1];
801 *q++ = p[i + 2];
802 }
803 i += 2;
804 } else {
805 *q++ = c;
806
807 // Check for "xn--" in an efficient, non-case-sensitive, way.
808 if (c == '-' && i >= 3 && !needsHostNameDecoding && (q[-4] | 0x20) = = 'x' && (q[-3] | 0x20) == 'n' && q[-2] == '-')
809 needsHostNameDecoding = true;
810 }
811 }
812 *q = '\0';
813
814 // Check string to see if it can be converted to display using UTF-8
815 NSString *result = [NSString stringWithUTF8String:after];
816 if (!result) {
817 // Could not convert to UTF-8.
818 // Convert characters greater than 0x7f to escape sequences.
819 // Shift current string to the end of the buffer
820 // then we will copy back bytes to the start of the buffer
821 // as we convert.
822 int afterlength = q - after;
823 char *p = after + bufferLength - afterlength - 1;
824 memmove(p, after, afterlength + 1); // copies trailing '\0'
825 char *q = after;
826 while (*p) {
827 unsigned char c = *p;
828 if (c > 0x7f) {
829 *q++ = '%';
830 *q++ = hexDigit(c >> 4);
831 *q++ = hexDigit(c & 0xf);
832 } else
833 *q++ = *p;
834 p++;
835 }
836 *q = '\0';
837 result = [NSString stringWithUTF8String:after];
838 }
839
840 free(after);
841
842 result = mapHostNames(result, !needsHostNameDecoding);
843 result = [result precomposedStringWithCanonicalMapping];
844 return WebCoreCFAutorelease(createStringWithEscapedUnsafeCharacters((CFStrin gRef)result));
845 }
846
847 BOOL isUserVisibleURL(NSString *string)
848 {
849 BOOL valid = YES;
850 // get buffer
851
852 char static_buffer[1024];
853 const char *p;
854 BOOL success = CFStringGetCString((CFStringRef)string, static_buffer, 1023, kCFStringEncodingUTF8);
855 p = success ? static_buffer : [string UTF8String];
856
857 int length = strlen(p);
858
859 // check for characters <= 0x20 or >=0x7f, %-escape sequences of %7f, and xn --, these
860 // are the things that will lead _web_userVisibleString to actually change t hings.
861 for (int i = 0; i < length; i++) {
862 unsigned char c = p[i];
863 // escape control characters, space, and delete
864 if (c <= 0x20 || c == 0x7f) {
865 valid = NO;
866 break;
867 } else if (c == '%' && (i + 1 < length && isHexDigit(p[i + 1])) && i + 2 < length && isHexDigit(p[i + 2])) {
868 unsigned char u = (hexDigitValue(p[i + 1]) << 4) | hexDigitValue(p[i + 2]);
869 if (u > 0x7f) {
870 valid = NO;
871 break;
872 }
873 i += 2;
874 } else {
875 // Check for "xn--" in an efficient, non-case-sensitive, way.
876 if (c == '-' && i >= 3 && (p[i - 3] | 0x20) == 'x' && (p[i - 2] | 0x 20) == 'n' && p[i - 1] == '-') {
877 valid = NO;
878 break;
879 }
880 }
881 }
882
883 return valid;
884 }
885
886 } // namespace WebCore
OLDNEW
« no previous file with comments | « Source/WebCore/platform/mac/WebCoreNSURLExtras.h ('k') | Source/WebCore/platform/mac/WebCoreObjCExtras.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698