OLD | NEW |
| (Empty) |
1 // Copyright 2011 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 // TODO(cira): Remove LanguageMatcher from v8 when ICU implements | |
29 // language matching API. | |
30 | |
31 #include "src/extensions/experimental/language-matcher.h" | |
32 | |
33 #include <string.h> | |
34 | |
35 #include "src/extensions/experimental/i18n-utils.h" | |
36 #include "unicode/datefmt.h" // For getAvailableLocales | |
37 #include "unicode/locid.h" | |
38 #include "unicode/uloc.h" | |
39 | |
40 namespace v8 { | |
41 namespace internal { | |
42 | |
43 const unsigned int LanguageMatcher::kLanguageWeight = 75; | |
44 const unsigned int LanguageMatcher::kScriptWeight = 20; | |
45 const unsigned int LanguageMatcher::kRegionWeight = 5; | |
46 const unsigned int LanguageMatcher::kThreshold = 50; | |
47 const unsigned int LanguageMatcher::kPositionBonus = 1; | |
48 const char* const LanguageMatcher::kDefaultLocale = "root"; | |
49 | |
50 static const char* GetLanguageException(const char*); | |
51 static bool BCP47ToICUFormat(const char*, char*); | |
52 static int CompareLocaleSubtags(const char*, const char*); | |
53 static bool BuildLocaleName(const char*, const char*, LocaleIDMatch*); | |
54 | |
55 LocaleIDMatch::LocaleIDMatch() | |
56 : score(-1) { | |
57 I18NUtils::StrNCopy( | |
58 bcp47_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale); | |
59 | |
60 I18NUtils::StrNCopy( | |
61 icu_id, ULOC_FULLNAME_CAPACITY, LanguageMatcher::kDefaultLocale); | |
62 } | |
63 | |
64 LocaleIDMatch& LocaleIDMatch::operator=(const LocaleIDMatch& rhs) { | |
65 I18NUtils::StrNCopy(this->bcp47_id, ULOC_FULLNAME_CAPACITY, rhs.bcp47_id); | |
66 I18NUtils::StrNCopy(this->icu_id, ULOC_FULLNAME_CAPACITY, rhs.icu_id); | |
67 this->score = rhs.score; | |
68 | |
69 return *this; | |
70 } | |
71 | |
72 // static | |
73 void LanguageMatcher::GetBestMatchForPriorityList( | |
74 v8::Handle<v8::Array> locales, LocaleIDMatch* result) { | |
75 v8::HandleScope handle_scope; | |
76 | |
77 unsigned int position_bonus = locales->Length() * kPositionBonus; | |
78 | |
79 int max_score = 0; | |
80 LocaleIDMatch match; | |
81 for (unsigned int i = 0; i < locales->Length(); ++i) { | |
82 position_bonus -= kPositionBonus; | |
83 | |
84 v8::TryCatch try_catch; | |
85 v8::Local<v8::Value> locale_id = locales->Get(v8::Integer::New(i)); | |
86 | |
87 // Return default if exception is raised when reading parameter. | |
88 if (try_catch.HasCaught()) break; | |
89 | |
90 // JavaScript arrays can be heterogenous so check each item | |
91 // if it's a string. | |
92 if (!locale_id->IsString()) continue; | |
93 | |
94 if (!CompareToSupportedLocaleIDList(locale_id->ToString(), &match)) { | |
95 continue; | |
96 } | |
97 | |
98 // Skip items under threshold. | |
99 if (match.score < kThreshold) continue; | |
100 | |
101 match.score += position_bonus; | |
102 if (match.score > max_score) { | |
103 *result = match; | |
104 | |
105 max_score = match.score; | |
106 } | |
107 } | |
108 } | |
109 | |
110 // static | |
111 void LanguageMatcher::GetBestMatchForString( | |
112 v8::Handle<v8::String> locale, LocaleIDMatch* result) { | |
113 LocaleIDMatch match; | |
114 | |
115 if (CompareToSupportedLocaleIDList(locale, &match) && | |
116 match.score >= kThreshold) { | |
117 *result = match; | |
118 } | |
119 } | |
120 | |
121 // static | |
122 bool LanguageMatcher::CompareToSupportedLocaleIDList( | |
123 v8::Handle<v8::String> locale_id, LocaleIDMatch* result) { | |
124 static int32_t available_count = 0; | |
125 // Depending on how ICU data is built, locales returned by | |
126 // Locale::getAvailableLocale() are not guaranteed to support DateFormat, | |
127 // Collation and other services. We can call getAvailableLocale() of all the | |
128 // services we want to support and take the intersection of them all, but | |
129 // using DateFormat::getAvailableLocales() should suffice. | |
130 // TODO(cira): Maybe make this thread-safe? | |
131 static const icu::Locale* available_locales = | |
132 icu::DateFormat::getAvailableLocales(available_count); | |
133 | |
134 // Skip this locale_id if it's not in ASCII. | |
135 static LocaleIDMatch default_match; | |
136 v8::String::AsciiValue ascii_value(locale_id); | |
137 if (*ascii_value == NULL) return false; | |
138 | |
139 char locale[ULOC_FULLNAME_CAPACITY]; | |
140 if (!BCP47ToICUFormat(*ascii_value, locale)) return false; | |
141 | |
142 icu::Locale input_locale(locale); | |
143 | |
144 // Position of the best match locale in list of available locales. | |
145 int position = -1; | |
146 const char* language = GetLanguageException(input_locale.getLanguage()); | |
147 const char* script = input_locale.getScript(); | |
148 const char* region = input_locale.getCountry(); | |
149 for (int32_t i = 0; i < available_count; ++i) { | |
150 int current_score = 0; | |
151 int sign = | |
152 CompareLocaleSubtags(language, available_locales[i].getLanguage()); | |
153 current_score += sign * kLanguageWeight; | |
154 | |
155 sign = CompareLocaleSubtags(script, available_locales[i].getScript()); | |
156 current_score += sign * kScriptWeight; | |
157 | |
158 sign = CompareLocaleSubtags(region, available_locales[i].getCountry()); | |
159 current_score += sign * kRegionWeight; | |
160 | |
161 if (current_score >= kThreshold && current_score > result->score) { | |
162 result->score = current_score; | |
163 position = i; | |
164 } | |
165 } | |
166 | |
167 // Didn't find any good matches so use defaults. | |
168 if (position == -1) return false; | |
169 | |
170 return BuildLocaleName(available_locales[position].getBaseName(), | |
171 input_locale.getName(), result); | |
172 } | |
173 | |
174 // For some unsupported language subtags it is better to fallback to related | |
175 // language that is supported than to default. | |
176 static const char* GetLanguageException(const char* language) { | |
177 // Serbo-croatian to Serbian. | |
178 if (!strcmp(language, "sh")) return "sr"; | |
179 | |
180 // Norweigan to Norweiaan to Norwegian Bokmal. | |
181 if (!strcmp(language, "no")) return "nb"; | |
182 | |
183 // Moldavian to Romanian. | |
184 if (!strcmp(language, "mo")) return "ro"; | |
185 | |
186 // Tagalog to Filipino. | |
187 if (!strcmp(language, "tl")) return "fil"; | |
188 | |
189 return language; | |
190 } | |
191 | |
192 // Converts user input from BCP47 locale id format to ICU compatible format. | |
193 // Returns false if uloc_forLanguageTag call fails or if extension is too long. | |
194 static bool BCP47ToICUFormat(const char* locale_id, char* result) { | |
195 UErrorCode status = U_ZERO_ERROR; | |
196 int32_t locale_size = 0; | |
197 | |
198 char locale[ULOC_FULLNAME_CAPACITY]; | |
199 I18NUtils::StrNCopy(locale, ULOC_FULLNAME_CAPACITY, locale_id); | |
200 | |
201 // uloc_forLanguageTag has a bug where long extension can crash the code. | |
202 // We need to check if extension part of language id conforms to the length. | |
203 // ICU bug: http://bugs.icu-project.org/trac/ticket/8519 | |
204 const char* extension = strstr(locale_id, "-u-"); | |
205 if (extension != NULL && | |
206 strlen(extension) > ULOC_KEYWORD_AND_VALUES_CAPACITY) { | |
207 // Truncate to get non-crashing string, but still preserve base language. | |
208 int base_length = strlen(locale_id) - strlen(extension); | |
209 locale[base_length] = '\0'; | |
210 } | |
211 | |
212 uloc_forLanguageTag(locale, result, ULOC_FULLNAME_CAPACITY, | |
213 &locale_size, &status); | |
214 return !U_FAILURE(status); | |
215 } | |
216 | |
217 // Compares locale id subtags. | |
218 // Returns 1 for match or -1 for mismatch. | |
219 static int CompareLocaleSubtags(const char* lsubtag, const char* rsubtag) { | |
220 return strcmp(lsubtag, rsubtag) == 0 ? 1 : -1; | |
221 } | |
222 | |
223 // Builds a BCP47 compliant locale id from base name of matched locale and | |
224 // full user specified locale. | |
225 // Returns false if uloc_toLanguageTag failed to convert locale id. | |
226 // Example: | |
227 // base_name of matched locale (ICU ID): de_DE | |
228 // input_locale_name (ICU ID): de_AT@collation=phonebk | |
229 // result (ICU ID): de_DE@collation=phonebk | |
230 // result (BCP47 ID): de-DE-u-co-phonebk | |
231 static bool BuildLocaleName(const char* base_name, | |
232 const char* input_locale_name, | |
233 LocaleIDMatch* result) { | |
234 I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name); | |
235 | |
236 // Get extensions (if any) from the original locale. | |
237 const char* extension = strchr(input_locale_name, ULOC_KEYWORD_SEPARATOR); | |
238 if (extension != NULL) { | |
239 I18NUtils::StrNCopy(result->icu_id + strlen(base_name), | |
240 ULOC_KEYWORD_AND_VALUES_CAPACITY, extension); | |
241 } else { | |
242 I18NUtils::StrNCopy(result->icu_id, ULOC_LANG_CAPACITY, base_name); | |
243 } | |
244 | |
245 // Convert ICU locale name into BCP47 format. | |
246 UErrorCode status = U_ZERO_ERROR; | |
247 uloc_toLanguageTag(result->icu_id, result->bcp47_id, | |
248 ULOC_FULLNAME_CAPACITY, false, &status); | |
249 return !U_FAILURE(status); | |
250 } | |
251 | |
252 } } // namespace v8::internal | |
OLD | NEW |