V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
js-list-format.cc
1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-list-format.h"
10 
11 #include <memory>
12 #include <vector>
13 
14 #include "src/elements-inl.h"
15 #include "src/elements.h"
16 #include "src/heap/factory.h"
17 #include "src/isolate.h"
18 #include "src/objects-inl.h"
19 #include "src/objects/intl-objects.h"
20 #include "src/objects/js-array-inl.h"
21 #include "src/objects/js-list-format-inl.h"
22 #include "src/objects/managed.h"
23 #include "unicode/fieldpos.h"
24 #include "unicode/fpositer.h"
25 #include "unicode/listformatter.h"
26 #include "unicode/ulistformatter.h"
27 
28 namespace v8 {
29 namespace internal {
30 
31 namespace {
32 const char* kStandard = "standard";
33 const char* kOr = "or";
34 const char* kUnit = "unit";
35 const char* kStandardShort = "standard-short";
36 const char* kUnitShort = "unit-short";
37 const char* kUnitNarrow = "unit-narrow";
38 
39 const char* GetIcuStyleString(JSListFormat::Style style,
40  JSListFormat::Type type) {
41  switch (type) {
42  case JSListFormat::Type::CONJUNCTION:
43  switch (style) {
44  case JSListFormat::Style::LONG:
45  return kStandard;
46  case JSListFormat::Style::SHORT:
47  return kStandardShort;
48  // NARROW is now not allowed if type is not unit
49  // It is impossible to reach because we've already thrown a RangeError
50  // when style is "narrow" and type is not "unit".
51  case JSListFormat::Style::NARROW:
52  case JSListFormat::Style::COUNT:
53  UNREACHABLE();
54  }
55  case JSListFormat::Type::DISJUNCTION:
56  switch (style) {
57  // Currently, ListFormat::createInstance on "or-short"
58  // will fail so we use "or" here.
59  // See https://unicode.org/cldr/trac/ticket/11254
60  // TODO(ftang): change to return kOr or kOrShort depend on
61  // style after the above issue fixed in CLDR/ICU.
62  // CLDR bug: https://unicode.org/cldr/trac/ticket/11254
63  // ICU bug: https://unicode-org.atlassian.net/browse/ICU-20014
64  case JSListFormat::Style::LONG:
65  case JSListFormat::Style::SHORT:
66  return kOr;
67  // NARROW is now not allowed if type is not unit
68  // It is impossible to reach because we've already thrown a RangeError
69  // when style is "narrow" and type is not "unit".
70  case JSListFormat::Style::NARROW:
71  case JSListFormat::Style::COUNT:
72  UNREACHABLE();
73  }
74  case JSListFormat::Type::UNIT:
75  switch (style) {
76  case JSListFormat::Style::LONG:
77  return kUnit;
78  case JSListFormat::Style::SHORT:
79  return kUnitShort;
80  case JSListFormat::Style::NARROW:
81  return kUnitNarrow;
82  case JSListFormat::Style::COUNT:
83  UNREACHABLE();
84  }
85  case JSListFormat::Type::COUNT:
86  UNREACHABLE();
87  }
88 }
89 
90 } // namespace
91 
92 JSListFormat::Style get_style(const char* str) {
93  switch (str[0]) {
94  case 'n':
95  if (strcmp(&str[1], "arrow") == 0) return JSListFormat::Style::NARROW;
96  break;
97  case 'l':
98  if (strcmp(&str[1], "ong") == 0) return JSListFormat::Style::LONG;
99  break;
100  case 's':
101  if (strcmp(&str[1], "hort") == 0) return JSListFormat::Style::SHORT;
102  break;
103  }
104  UNREACHABLE();
105 }
106 
107 JSListFormat::Type get_type(const char* str) {
108  switch (str[0]) {
109  case 'c':
110  if (strcmp(&str[1], "onjunction") == 0)
111  return JSListFormat::Type::CONJUNCTION;
112  break;
113  case 'd':
114  if (strcmp(&str[1], "isjunction") == 0)
115  return JSListFormat::Type::DISJUNCTION;
116  break;
117  case 'u':
118  if (strcmp(&str[1], "nit") == 0) return JSListFormat::Type::UNIT;
119  break;
120  }
121  UNREACHABLE();
122 }
123 
124 MaybeHandle<JSListFormat> JSListFormat::Initialize(
125  Isolate* isolate, Handle<JSListFormat> list_format, Handle<Object> locales,
126  Handle<Object> input_options) {
127  list_format->set_flags(0);
128 
129  Handle<JSReceiver> options;
130  // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
131  Maybe<std::vector<std::string>> maybe_requested_locales =
132  Intl::CanonicalizeLocaleList(isolate, locales);
133  MAYBE_RETURN(maybe_requested_locales, Handle<JSListFormat>());
134  std::vector<std::string> requested_locales =
135  maybe_requested_locales.FromJust();
136 
137  // 4. If options is undefined, then
138  if (input_options->IsUndefined(isolate)) {
139  // 4. a. Let options be ObjectCreate(null).
140  options = isolate->factory()->NewJSObjectWithNullProto();
141  // 5. Else
142  } else {
143  // 5. a. Let options be ? ToObject(options).
144  ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
145  Object::ToObject(isolate, input_options),
146  JSListFormat);
147  }
148 
149  // Note: No need to create a record. It's not observable.
150  // 6. Let opt be a new Record.
151 
152  // 7. Let t be GetOption(options, "type", "string", «"conjunction",
153  // "disjunction", "unit"», "conjunction").
154  Maybe<Type> maybe_type = Intl::GetStringOption<Type>(
155  isolate, options, "type", "Intl.ListFormat",
156  {"conjunction", "disjunction", "unit"},
157  {Type::CONJUNCTION, Type::DISJUNCTION, Type::UNIT}, Type::CONJUNCTION);
158  MAYBE_RETURN(maybe_type, MaybeHandle<JSListFormat>());
159  Type type_enum = maybe_type.FromJust();
160 
161  // 8. Set listFormat.[[Type]] to t.
162  list_format->set_type(type_enum);
163 
164  // 9. Let s be ? GetOption(options, "style", "string",
165  // «"long", "short", "narrow"», "long").
166  Maybe<Style> maybe_style = Intl::GetStringOption<Style>(
167  isolate, options, "style", "Intl.ListFormat", {"long", "short", "narrow"},
168  {Style::LONG, Style::SHORT, Style::NARROW}, Style::LONG);
169  MAYBE_RETURN(maybe_style, MaybeHandle<JSListFormat>());
170  Style style_enum = maybe_style.FromJust();
171 
172  // 10. Set listFormat.[[Style]] to s.
173  list_format->set_style(style_enum);
174 
175  // 12. Let matcher be ? GetOption(options, "localeMatcher", "string", «
176  // "lookup", "best fit" », "best fit").
177  Maybe<Intl::MatcherOption> maybe_locale_matcher =
178  Intl::GetLocaleMatcher(isolate, options, "Intl.ListFormat");
179  MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSListFormat>());
180  Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
181 
182  // 14. If style is "narrow" and type is not "unit", throw a RangeError
183  // exception.
184  if (style_enum == Style::NARROW && type_enum != Type::UNIT) {
185  THROW_NEW_ERROR(
186  isolate, NewRangeError(MessageTemplate::kIllegalTypeWhileStyleNarrow),
187  JSListFormat);
188  }
189 
190  // 15. Let r be ResolveLocale(%ListFormat%.[[AvailableLocales]],
191  // requestedLocales, opt, undefined, localeData).
192  Intl::ResolvedLocale r =
193  Intl::ResolveLocale(isolate, JSListFormat::GetAvailableLocales(),
194  requested_locales, matcher, {});
195 
196  // 24. Set listFormat.[[Locale]] to r.[[Locale]].
197  Handle<String> locale_str =
198  isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
199  list_format->set_locale(*locale_str);
200 
201  icu::Locale icu_locale = r.icu_locale;
202  UErrorCode status = U_ZERO_ERROR;
203  icu::ListFormatter* formatter = icu::ListFormatter::createInstance(
204  icu_locale, GetIcuStyleString(style_enum, type_enum), status);
205  if (U_FAILURE(status)) {
206  delete formatter;
207  FATAL("Failed to create ICU list formatter, are ICU data files missing?");
208  }
209  CHECK_NOT_NULL(formatter);
210 
211  Handle<Managed<icu::ListFormatter>> managed_formatter =
212  Managed<icu::ListFormatter>::FromRawPtr(isolate, 0, formatter);
213 
214  list_format->set_icu_formatter(*managed_formatter);
215  return list_format;
216 }
217 
218 // ecma402 #sec-intl.pluralrules.prototype.resolvedoptions
219 Handle<JSObject> JSListFormat::ResolvedOptions(Isolate* isolate,
220  Handle<JSListFormat> format) {
221  Factory* factory = isolate->factory();
222  // 4. Let options be ! ObjectCreate(%ObjectPrototype%).
223  Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
224 
225  // 5. For each row of Table 1, except the header row, do
226  // Table 1: Resolved Options of ListFormat Instances
227  // Internal Slot Property
228  // [[Locale]] "locale"
229  // [[Type]] "type"
230  // [[Style]] "style"
231  Handle<String> locale(format->locale(), isolate);
232  JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
233  NONE);
234  JSObject::AddProperty(isolate, result, factory->type_string(),
235  format->TypeAsString(), NONE);
236  JSObject::AddProperty(isolate, result, factory->style_string(),
237  format->StyleAsString(), NONE);
238  // 6. Return options.
239  return result;
240 }
241 
242 Handle<String> JSListFormat::StyleAsString() const {
243  switch (style()) {
244  case Style::LONG:
245  return GetReadOnlyRoots().long_string_handle();
246  case Style::SHORT:
247  return GetReadOnlyRoots().short_string_handle();
248  case Style::NARROW:
249  return GetReadOnlyRoots().narrow_string_handle();
250  case Style::COUNT:
251  UNREACHABLE();
252  }
253 }
254 
255 Handle<String> JSListFormat::TypeAsString() const {
256  switch (type()) {
257  case Type::CONJUNCTION:
258  return GetReadOnlyRoots().conjunction_string_handle();
259  case Type::DISJUNCTION:
260  return GetReadOnlyRoots().disjunction_string_handle();
261  case Type::UNIT:
262  return GetReadOnlyRoots().unit_string_handle();
263  case Type::COUNT:
264  UNREACHABLE();
265  }
266 }
267 
268 namespace {
269 
270 MaybeHandle<JSArray> GenerateListFormatParts(
271  Isolate* isolate, const icu::UnicodeString& formatted,
272  const std::vector<icu::FieldPosition>& positions) {
273  Factory* factory = isolate->factory();
274  Handle<JSArray> array =
275  factory->NewJSArray(static_cast<int>(positions.size()));
276  int index = 0;
277  int prev_item_end_index = 0;
278  Handle<String> substring;
279  for (const icu::FieldPosition pos : positions) {
280  CHECK(pos.getBeginIndex() >= prev_item_end_index);
281  CHECK(pos.getField() == ULISTFMT_ELEMENT_FIELD);
282  if (pos.getBeginIndex() != prev_item_end_index) {
283  ASSIGN_RETURN_ON_EXCEPTION(
284  isolate, substring,
285  Intl::ToString(isolate, formatted, prev_item_end_index,
286  pos.getBeginIndex()),
287  JSArray);
288  Intl::AddElement(isolate, array, index++, factory->literal_string(),
289  substring);
290  }
291  ASSIGN_RETURN_ON_EXCEPTION(
292  isolate, substring,
293  Intl::ToString(isolate, formatted, pos.getBeginIndex(),
294  pos.getEndIndex()),
295  JSArray);
296  Intl::AddElement(isolate, array, index++, factory->element_string(),
297  substring);
298  prev_item_end_index = pos.getEndIndex();
299  }
300  if (prev_item_end_index != formatted.length()) {
301  ASSIGN_RETURN_ON_EXCEPTION(
302  isolate, substring,
303  Intl::ToString(isolate, formatted, prev_item_end_index,
304  formatted.length()),
305  JSArray);
306  Intl::AddElement(isolate, array, index++, factory->literal_string(),
307  substring);
308  }
309  return array;
310 }
311 
312 // Get all the FieldPosition into a vector from FieldPositionIterator and return
313 // them in output order.
314 std::vector<icu::FieldPosition> GenerateFieldPosition(
315  icu::FieldPositionIterator iter) {
316  std::vector<icu::FieldPosition> positions;
317  icu::FieldPosition pos;
318  while (iter.next(pos)) {
319  // Only take the information of the ULISTFMT_ELEMENT_FIELD field.
320  if (pos.getField() == ULISTFMT_ELEMENT_FIELD) {
321  positions.push_back(pos);
322  }
323  }
324  // Because the format may reoder the items, ICU FieldPositionIterator
325  // keep the order for FieldPosition based on the order of the input items.
326  // But the formatToParts API in ECMA402 expects in formatted output order.
327  // Therefore we have to sort based on beginIndex of the FieldPosition.
328  // Example of such is in the "ur" (Urdu) locale with type: "unit", where the
329  // main text flows from right to left, the formatted list of unit should flow
330  // from left to right and therefore in the memory the formatted result will
331  // put the first item on the last in the result string according the current
332  // CLDR patterns.
333  // See 'listPattern' pattern in
334  // third_party/icu/source/data/locales/ur_IN.txt
335  std::sort(positions.begin(), positions.end(),
336  [](icu::FieldPosition a, icu::FieldPosition b) {
337  return a.getBeginIndex() < b.getBeginIndex();
338  });
339  return positions;
340 }
341 
342 // Extract String from JSArray into array of UnicodeString
343 Maybe<std::vector<icu::UnicodeString>> ToUnicodeStringArray(
344  Isolate* isolate, Handle<JSArray> array) {
345  Factory* factory = isolate->factory();
346  // In general, ElementsAccessor::Get actually isn't guaranteed to give us the
347  // elements in order. But if it is a holey array, it will cause the exception
348  // with the IsString check.
349  auto* accessor = array->GetElementsAccessor();
350  uint32_t length = accessor->NumberOfElements(*array);
351 
352  // ecma402 #sec-createpartsfromlist
353  // 2. If list contains any element value such that Type(value) is not String,
354  // throw a TypeError exception.
355  //
356  // Per spec it looks like we're supposed to throw a TypeError exception if the
357  // item isn't already a string, rather than coercing to a string.
358  std::vector<icu::UnicodeString> result;
359  for (uint32_t i = 0; i < length; i++) {
360  DCHECK(accessor->HasElement(*array, i));
361  Handle<Object> item = accessor->Get(array, i);
362  DCHECK(!item.is_null());
363  if (!item->IsString()) {
364  THROW_NEW_ERROR_RETURN_VALUE(
365  isolate,
366  NewTypeError(MessageTemplate::kArrayItemNotType,
367  factory->list_string(),
368  // TODO(ftang): For dictionary-mode arrays, i isn't
369  // actually the index in the array but the index in the
370  // dictionary.
371  factory->NewNumber(i), factory->String_string()),
372  Nothing<std::vector<icu::UnicodeString>>());
373  }
374  result.push_back(
375  Intl::ToICUUnicodeString(isolate, Handle<String>::cast(item)));
376  }
377  DCHECK(!array->HasDictionaryElements());
378  return Just(result);
379 }
380 
381 } // namespace
382 
383 // ecma402 #sec-formatlist
384 MaybeHandle<String> JSListFormat::FormatList(Isolate* isolate,
385  Handle<JSListFormat> format,
386  Handle<JSArray> list) {
387  DCHECK(!list->IsUndefined());
388  // ecma402 #sec-createpartsfromlist
389  // 2. If list contains any element value such that Type(value) is not String,
390  // throw a TypeError exception.
391  Maybe<std::vector<icu::UnicodeString>> maybe_array =
392  ToUnicodeStringArray(isolate, list);
393  MAYBE_RETURN(maybe_array, Handle<String>());
394  std::vector<icu::UnicodeString> array = maybe_array.FromJust();
395 
396  icu::ListFormatter* formatter = format->icu_formatter()->raw();
397  CHECK_NOT_NULL(formatter);
398 
399  UErrorCode status = U_ZERO_ERROR;
400  icu::UnicodeString formatted;
401  formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted,
402  status);
403  DCHECK(U_SUCCESS(status));
404 
405  return Intl::ToString(isolate, formatted);
406 }
407 
408 std::set<std::string> JSListFormat::GetAvailableLocales() {
409  int32_t num_locales = 0;
410  // TODO(ftang): for now just use
411  // icu::Locale::getAvailableLocales(count) until we migrate to
412  // Intl::GetAvailableLocales().
413  // ICU FR at https://unicode-org.atlassian.net/browse/ICU-20015
414  const icu::Locale* icu_available_locales =
415  icu::Locale::getAvailableLocales(num_locales);
416  return Intl::BuildLocaleSet(icu_available_locales, num_locales);
417 }
418 
419 // ecma42 #sec-formatlisttoparts
420 MaybeHandle<JSArray> JSListFormat::FormatListToParts(
421  Isolate* isolate, Handle<JSListFormat> format, Handle<JSArray> list) {
422  DCHECK(!list->IsUndefined());
423  // ecma402 #sec-createpartsfromlist
424  // 2. If list contains any element value such that Type(value) is not String,
425  // throw a TypeError exception.
426  Maybe<std::vector<icu::UnicodeString>> maybe_array =
427  ToUnicodeStringArray(isolate, list);
428  MAYBE_RETURN(maybe_array, Handle<JSArray>());
429  std::vector<icu::UnicodeString> array = maybe_array.FromJust();
430 
431  icu::ListFormatter* formatter = format->icu_formatter()->raw();
432  CHECK_NOT_NULL(formatter);
433 
434  UErrorCode status = U_ZERO_ERROR;
435  icu::UnicodeString formatted;
436  icu::FieldPositionIterator iter;
437  formatter->format(array.data(), static_cast<int32_t>(array.size()), formatted,
438  &iter, status);
439  DCHECK(U_SUCCESS(status));
440 
441  std::vector<icu::FieldPosition> field_positions = GenerateFieldPosition(iter);
442  return GenerateListFormatParts(isolate, formatted, field_positions);
443 }
444 } // namespace internal
445 } // namespace v8
Definition: libplatform.h:13