V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
js-locale.cc
1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-locale.h"
10 
11 #include <map>
12 #include <memory>
13 #include <string>
14 #include <vector>
15 
16 #include "src/api.h"
17 #include "src/global-handles.h"
18 #include "src/heap/factory.h"
19 #include "src/isolate.h"
20 #include "src/objects-inl.h"
21 #include "src/objects/intl-objects.h"
22 #include "src/objects/js-locale-inl.h"
23 #include "unicode/char16ptr.h"
24 #include "unicode/locid.h"
25 #include "unicode/uloc.h"
26 #include "unicode/unistr.h"
27 
28 namespace v8 {
29 namespace internal {
30 
31 namespace {
32 
33 JSLocale::CaseFirst GetCaseFirst(const char* str) {
34  if (strcmp(str, "upper") == 0) return JSLocale::CaseFirst::UPPER;
35  if (strcmp(str, "lower") == 0) return JSLocale::CaseFirst::LOWER;
36  if (strcmp(str, "false") == 0) return JSLocale::CaseFirst::FALSE_VALUE;
37  UNREACHABLE();
38 }
39 
40 JSLocale::HourCycle GetHourCycle(const char* str) {
41  if (strcmp(str, "h11") == 0) return JSLocale::HourCycle::H11;
42  if (strcmp(str, "h12") == 0) return JSLocale::HourCycle::H12;
43  if (strcmp(str, "h23") == 0) return JSLocale::HourCycle::H23;
44  if (strcmp(str, "h24") == 0) return JSLocale::HourCycle::H24;
45  UNREACHABLE();
46 }
47 
48 JSLocale::Numeric GetNumeric(const char* str) {
49  return strcmp(str, "true") == 0 ? JSLocale::Numeric::TRUE_VALUE
50  : JSLocale::Numeric::FALSE_VALUE;
51 }
52 
53 struct OptionData {
54  const char* name;
55  const char* key;
56  const std::vector<const char*>* possible_values;
57  bool is_bool_value;
58 };
59 
60 // Inserts tags from options into locale string.
61 Maybe<bool> InsertOptionsIntoLocale(Isolate* isolate,
62  Handle<JSReceiver> options,
63  char* icu_locale) {
64  CHECK(isolate);
65  CHECK(icu_locale);
66 
67  const std::vector<const char*> hour_cycle_values = {"h11", "h12", "h23",
68  "h24"};
69  const std::vector<const char*> case_first_values = {"upper", "lower",
70  "false"};
71  const std::vector<const char*> empty_values = {};
72  const std::array<OptionData, 6> kOptionToUnicodeTagMap = {
73  {{"calendar", "ca", &empty_values, false},
74  {"collation", "co", &empty_values, false},
75  {"hourCycle", "hc", &hour_cycle_values, false},
76  {"caseFirst", "kf", &case_first_values, false},
77  {"numeric", "kn", &empty_values, true},
78  {"numberingSystem", "nu", &empty_values, false}}};
79 
80  // TODO(cira): Pass in values as per the spec to make this to be
81  // spec compliant.
82 
83  for (const auto& option_to_bcp47 : kOptionToUnicodeTagMap) {
84  std::unique_ptr<char[]> value_str = nullptr;
85  bool value_bool = false;
86  Maybe<bool> maybe_found =
87  option_to_bcp47.is_bool_value
88  ? Intl::GetBoolOption(isolate, options, option_to_bcp47.name,
89  "locale", &value_bool)
90  : Intl::GetStringOption(isolate, options, option_to_bcp47.name,
91  *(option_to_bcp47.possible_values),
92  "locale", &value_str);
93  MAYBE_RETURN(maybe_found, Nothing<bool>());
94 
95  // TODO(cira): Use fallback value if value is not found to make
96  // this spec compliant.
97  if (!maybe_found.FromJust()) continue;
98 
99  if (option_to_bcp47.is_bool_value) {
100  value_str = value_bool ? isolate->factory()->true_string()->ToCString()
101  : isolate->factory()->false_string()->ToCString();
102  }
103  DCHECK_NOT_NULL(value_str.get());
104 
105  // Convert bcp47 key and value into legacy ICU format so we can use
106  // uloc_setKeywordValue.
107  const char* key = uloc_toLegacyKey(option_to_bcp47.key);
108  DCHECK_NOT_NULL(key);
109 
110  // Overwrite existing, or insert new key-value to the locale string.
111  const char* value = uloc_toLegacyType(key, value_str.get());
112  UErrorCode status = U_ZERO_ERROR;
113  if (value) {
114  // TODO(cira): ICU puts artificial limit on locale length, while BCP47
115  // doesn't. Switch to C++ API when it's ready.
116  // Related ICU bug - https://ssl.icu-project.org/trac/ticket/13417.
117  uloc_setKeywordValue(key, value, icu_locale, ULOC_FULLNAME_CAPACITY,
118  &status);
119  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
120  return Just(false);
121  }
122  } else {
123  return Just(false);
124  }
125  }
126 
127  return Just(true);
128 }
129 
130 // Fills in the JSLocale object slots with Unicode tag/values.
131 bool PopulateLocaleWithUnicodeTags(Isolate* isolate, const char* icu_locale,
132  Handle<JSLocale> locale_holder) {
133  CHECK(isolate);
134  CHECK(icu_locale);
135 
136  Factory* factory = isolate->factory();
137 
138  UErrorCode status = U_ZERO_ERROR;
139  UEnumeration* keywords = uloc_openKeywords(icu_locale, &status);
140  if (!keywords) return true;
141 
142  char value[ULOC_FULLNAME_CAPACITY];
143  while (const char* keyword = uenum_next(keywords, nullptr, &status)) {
144  uloc_getKeywordValue(icu_locale, keyword, value, ULOC_FULLNAME_CAPACITY,
145  &status);
146  if (U_FAILURE(status)) {
147  status = U_ZERO_ERROR;
148  continue;
149  }
150 
151  // Ignore those we don't recognize - spec allows that.
152  const char* bcp47_key = uloc_toUnicodeLocaleKey(keyword);
153  if (bcp47_key) {
154  const char* bcp47_value = uloc_toUnicodeLocaleType(bcp47_key, value);
155  if (bcp47_value) {
156  if (strcmp(bcp47_key, "kn") == 0) {
157  locale_holder->set_numeric(GetNumeric(bcp47_value));
158  } else if (strcmp(bcp47_key, "ca") == 0) {
159  Handle<String> bcp47_handle =
160  factory->NewStringFromAsciiChecked(bcp47_value);
161  locale_holder->set_calendar(*bcp47_handle);
162  } else if (strcmp(bcp47_key, "kf") == 0) {
163  locale_holder->set_case_first(GetCaseFirst(bcp47_value));
164  } else if (strcmp(bcp47_key, "co") == 0) {
165  Handle<String> bcp47_handle =
166  factory->NewStringFromAsciiChecked(bcp47_value);
167  locale_holder->set_collation(*bcp47_handle);
168  } else if (strcmp(bcp47_key, "hc") == 0) {
169  locale_holder->set_hour_cycle(GetHourCycle(bcp47_value));
170  } else if (strcmp(bcp47_key, "nu") == 0) {
171  Handle<String> bcp47_handle =
172  factory->NewStringFromAsciiChecked(bcp47_value);
173  locale_holder->set_numbering_system(*bcp47_handle);
174  }
175  }
176  }
177  }
178 
179  uenum_close(keywords);
180 
181  return true;
182 }
183 } // namespace
184 
185 MaybeHandle<JSLocale> JSLocale::Initialize(Isolate* isolate,
186  Handle<JSLocale> locale_holder,
187  Handle<String> locale,
188  Handle<JSReceiver> options) {
189  locale_holder->set_flags(0);
190  static const char* const kMethod = "Intl.Locale";
191  v8::Isolate* v8_isolate = reinterpret_cast<v8::Isolate*>(isolate);
192  UErrorCode status = U_ZERO_ERROR;
193 
194  // Get ICU locale format, and canonicalize it.
195  char icu_result[ULOC_FULLNAME_CAPACITY];
196 
197  if (locale->length() == 0) {
198  THROW_NEW_ERROR(isolate, NewRangeError(MessageTemplate::kLocaleNotEmpty),
199  JSLocale);
200  }
201 
202  v8::String::Utf8Value bcp47_locale(v8_isolate, v8::Utils::ToLocal(locale));
203  CHECK_LT(0, bcp47_locale.length());
204  CHECK_NOT_NULL(*bcp47_locale);
205 
206  int parsed_length = 0;
207  int icu_length =
208  uloc_forLanguageTag(*bcp47_locale, icu_result, ULOC_FULLNAME_CAPACITY,
209  &parsed_length, &status);
210 
211  if (U_FAILURE(status) ||
212  parsed_length < static_cast<int>(bcp47_locale.length()) ||
213  status == U_STRING_NOT_TERMINATED_WARNING || icu_length == 0) {
214  THROW_NEW_ERROR(
215  isolate,
216  NewRangeError(MessageTemplate::kLocaleBadParameters,
217  isolate->factory()->NewStringFromAsciiChecked(kMethod),
218  locale_holder),
219  JSLocale);
220  }
221 
222  Maybe<bool> error = InsertOptionsIntoLocale(isolate, options, icu_result);
223  MAYBE_RETURN(error, MaybeHandle<JSLocale>());
224  if (!error.FromJust()) {
225  THROW_NEW_ERROR(
226  isolate,
227  NewRangeError(MessageTemplate::kLocaleBadParameters,
228  isolate->factory()->NewStringFromAsciiChecked(kMethod),
229  locale_holder),
230  JSLocale);
231  }
232 
233  if (!PopulateLocaleWithUnicodeTags(isolate, icu_result, locale_holder)) {
234  THROW_NEW_ERROR(
235  isolate,
236  NewRangeError(MessageTemplate::kLocaleBadParameters,
237  isolate->factory()->NewStringFromAsciiChecked(kMethod),
238  locale_holder),
239  JSLocale);
240  }
241 
242  // Extract language, script and region parts.
243  char icu_language[ULOC_LANG_CAPACITY];
244  uloc_getLanguage(icu_result, icu_language, ULOC_LANG_CAPACITY, &status);
245 
246  char icu_script[ULOC_SCRIPT_CAPACITY];
247  uloc_getScript(icu_result, icu_script, ULOC_SCRIPT_CAPACITY, &status);
248 
249  char icu_region[ULOC_COUNTRY_CAPACITY];
250  uloc_getCountry(icu_result, icu_region, ULOC_COUNTRY_CAPACITY, &status);
251 
252  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
253  THROW_NEW_ERROR(
254  isolate,
255  NewRangeError(MessageTemplate::kLocaleBadParameters,
256  isolate->factory()->NewStringFromAsciiChecked(kMethod),
257  locale_holder),
258  JSLocale);
259  }
260 
261  Factory* factory = isolate->factory();
262 
263  // NOTE: One shouldn't use temporary handles, because they can go out of
264  // scope and be garbage collected before properly assigned.
265  // DON'T DO THIS: locale_holder->set_language(*f->NewStringAscii...);
266  Handle<String> language = factory->NewStringFromAsciiChecked(icu_language);
267  locale_holder->set_language(*language);
268 
269  if (strlen(icu_script) != 0) {
270  Handle<String> script = factory->NewStringFromAsciiChecked(icu_script);
271  locale_holder->set_script(*script);
272  }
273 
274  if (strlen(icu_region) != 0) {
275  Handle<String> region = factory->NewStringFromAsciiChecked(icu_region);
276  locale_holder->set_region(*region);
277  }
278 
279  char icu_base_name[ULOC_FULLNAME_CAPACITY];
280  uloc_getBaseName(icu_result, icu_base_name, ULOC_FULLNAME_CAPACITY, &status);
281  // We need to convert it back to BCP47.
282  char bcp47_result[ULOC_FULLNAME_CAPACITY];
283  uloc_toLanguageTag(icu_base_name, bcp47_result, ULOC_FULLNAME_CAPACITY, true,
284  &status);
285  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
286  THROW_NEW_ERROR(
287  isolate,
288  NewRangeError(MessageTemplate::kLocaleBadParameters,
289  isolate->factory()->NewStringFromAsciiChecked(kMethod),
290  locale_holder),
291  JSLocale);
292  }
293  Handle<String> base_name = factory->NewStringFromAsciiChecked(bcp47_result);
294  locale_holder->set_base_name(*base_name);
295 
296  // Produce final representation of the locale string, for toString().
297  uloc_toLanguageTag(icu_result, bcp47_result, ULOC_FULLNAME_CAPACITY, true,
298  &status);
299  if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
300  THROW_NEW_ERROR(
301  isolate,
302  NewRangeError(MessageTemplate::kLocaleBadParameters,
303  isolate->factory()->NewStringFromAsciiChecked(kMethod),
304  locale_holder),
305  JSLocale);
306  }
307  Handle<String> locale_handle =
308  factory->NewStringFromAsciiChecked(bcp47_result);
309  locale_holder->set_locale(*locale_handle);
310 
311  return locale_holder;
312 }
313 
314 namespace {
315 
316 Handle<String> MorphLocale(Isolate* isolate, String language_tag,
317  int32_t (*morph_func)(const char*, char*, int32_t,
318  UErrorCode*)) {
319  Factory* factory = isolate->factory();
320  char localeBuffer[ULOC_FULLNAME_CAPACITY];
321  char morphBuffer[ULOC_FULLNAME_CAPACITY];
322 
323  UErrorCode status = U_ZERO_ERROR;
324  // Convert from language id to locale.
325  int32_t parsed_length;
326  int32_t length =
327  uloc_forLanguageTag(language_tag->ToCString().get(), localeBuffer,
328  ULOC_FULLNAME_CAPACITY, &parsed_length, &status);
329  CHECK(parsed_length == language_tag->length());
330  DCHECK(U_SUCCESS(status));
331  DCHECK_GT(length, 0);
332  DCHECK_NOT_NULL(morph_func);
333  // Add the likely subtags or Minimize the subtags on the locale id
334  length =
335  (*morph_func)(localeBuffer, morphBuffer, ULOC_FULLNAME_CAPACITY, &status);
336  DCHECK(U_SUCCESS(status));
337  DCHECK_GT(length, 0);
338  // Returns a well-formed language tag
339  length = uloc_toLanguageTag(morphBuffer, localeBuffer, ULOC_FULLNAME_CAPACITY,
340  false, &status);
341  DCHECK(U_SUCCESS(status));
342  DCHECK_GT(length, 0);
343  std::string lang(localeBuffer, length);
344  std::replace(lang.begin(), lang.end(), '_', '-');
345 
346  return factory->NewStringFromAsciiChecked(lang.c_str());
347 }
348 
349 } // namespace
350 
351 Handle<String> JSLocale::Maximize(Isolate* isolate, String locale) {
352  return MorphLocale(isolate, locale, uloc_addLikelySubtags);
353 }
354 
355 Handle<String> JSLocale::Minimize(Isolate* isolate, String locale) {
356  return MorphLocale(isolate, locale, uloc_minimizeSubtags);
357 }
358 
359 Handle<String> JSLocale::CaseFirstAsString() const {
360  switch (case_first()) {
361  case CaseFirst::UPPER:
362  return GetReadOnlyRoots().upper_string_handle();
363  case CaseFirst::LOWER:
364  return GetReadOnlyRoots().lower_string_handle();
365  case CaseFirst::FALSE_VALUE:
366  return GetReadOnlyRoots().false_string_handle();
367  case CaseFirst::COUNT:
368  UNREACHABLE();
369  }
370 }
371 
372 Handle<String> JSLocale::HourCycleAsString() const {
373  switch (hour_cycle()) {
374  case HourCycle::H11:
375  return GetReadOnlyRoots().h11_string_handle();
376  case HourCycle::H12:
377  return GetReadOnlyRoots().h12_string_handle();
378  case HourCycle::H23:
379  return GetReadOnlyRoots().h23_string_handle();
380  case HourCycle::H24:
381  return GetReadOnlyRoots().h24_string_handle();
382  case HourCycle::COUNT:
383  UNREACHABLE();
384  }
385 }
386 
387 Handle<String> JSLocale::NumericAsString() const {
388  switch (numeric()) {
389  case Numeric::NOTSET:
390  return GetReadOnlyRoots().undefined_string_handle();
391  case Numeric::TRUE_VALUE:
392  return GetReadOnlyRoots().true_string_handle();
393  case Numeric::FALSE_VALUE:
394  return GetReadOnlyRoots().false_string_handle();
395  case Numeric::COUNT:
396  UNREACHABLE();
397  }
398 }
399 
400 } // namespace internal
401 } // namespace v8
Definition: libplatform.h:13