V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
js-segmenter.cc
1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-segmenter.h"
10 
11 #include <map>
12 #include <memory>
13 #include <string>
14 
15 #include "src/heap/factory.h"
16 #include "src/isolate.h"
17 #include "src/objects-inl.h"
18 #include "src/objects/intl-objects.h"
19 #include "src/objects/js-segmenter-inl.h"
20 #include "src/objects/managed.h"
21 #include "unicode/brkiter.h"
22 
23 namespace v8 {
24 namespace internal {
25 
26 JSSegmenter::LineBreakStyle JSSegmenter::GetLineBreakStyle(const char* str) {
27  if (strcmp(str, "strict") == 0) return JSSegmenter::LineBreakStyle::STRICT;
28  if (strcmp(str, "normal") == 0) return JSSegmenter::LineBreakStyle::NORMAL;
29  if (strcmp(str, "loose") == 0) return JSSegmenter::LineBreakStyle::LOOSE;
30  UNREACHABLE();
31 }
32 
33 JSSegmenter::Granularity JSSegmenter::GetGranularity(const char* str) {
34  if (strcmp(str, "grapheme") == 0) return JSSegmenter::Granularity::GRAPHEME;
35  if (strcmp(str, "word") == 0) return JSSegmenter::Granularity::WORD;
36  if (strcmp(str, "sentence") == 0) return JSSegmenter::Granularity::SENTENCE;
37  if (strcmp(str, "line") == 0) return JSSegmenter::Granularity::LINE;
38  UNREACHABLE();
39 }
40 
41 MaybeHandle<JSSegmenter> JSSegmenter::Initialize(
42  Isolate* isolate, Handle<JSSegmenter> segmenter_holder,
43  Handle<Object> locales, Handle<Object> input_options) {
44  segmenter_holder->set_flags(0);
45 
46  // 3. Let requestedLocales be ? CanonicalizeLocaleList(locales).
47  Maybe<std::vector<std::string>> maybe_requested_locales =
48  Intl::CanonicalizeLocaleList(isolate, locales);
49  MAYBE_RETURN(maybe_requested_locales, Handle<JSSegmenter>());
50  std::vector<std::string> requested_locales =
51  maybe_requested_locales.FromJust();
52 
53  // 11. If options is undefined, then
54  Handle<JSReceiver> options;
55  if (input_options->IsUndefined(isolate)) {
56  // 11. a. Let options be ObjectCreate(null).
57  options = isolate->factory()->NewJSObjectWithNullProto();
58  // 12. Else
59  } else {
60  // 23. a. Let options be ? ToObject(options).
61  ASSIGN_RETURN_ON_EXCEPTION(isolate, options,
62  Object::ToObject(isolate, input_options),
63  JSSegmenter);
64  }
65 
66  // 4. Let opt be a new Record.
67  // 5. Let matcher be ? GetOption(options, "localeMatcher", "string",
68  // « "lookup", "best fit" », "best fit").
69  // 6. Set opt.[[localeMatcher]] to matcher.
70  Maybe<Intl::MatcherOption> maybe_locale_matcher =
71  Intl::GetLocaleMatcher(isolate, options, "Intl.Segmenter");
72  MAYBE_RETURN(maybe_locale_matcher, MaybeHandle<JSSegmenter>());
73  Intl::MatcherOption matcher = maybe_locale_matcher.FromJust();
74 
75  // 8. Set opt.[[lb]] to lineBreakStyle.
76 
77  // 9. Let r be ResolveLocale(%Segmenter%.[[AvailableLocales]],
78  // requestedLocales, opt, %Segmenter%.[[RelevantExtensionKeys]]).
79  Intl::ResolvedLocale r =
80  Intl::ResolveLocale(isolate, JSSegmenter::GetAvailableLocales(),
81  requested_locales, matcher, {});
82 
83  // 7. Let lineBreakStyle be ? GetOption(options, "lineBreakStyle", "string", «
84  // "strict", "normal", "loose" », "normal").
85  Maybe<LineBreakStyle> maybe_line_break_style =
86  Intl::GetStringOption<LineBreakStyle>(
87  isolate, options, "lineBreakStyle", "Intl.Segmenter",
88  {"strict", "normal", "loose"},
89  {LineBreakStyle::STRICT, LineBreakStyle::NORMAL,
90  LineBreakStyle::LOOSE},
91  LineBreakStyle::NORMAL);
92  MAYBE_RETURN(maybe_line_break_style, MaybeHandle<JSSegmenter>());
93  LineBreakStyle line_break_style_enum = maybe_line_break_style.FromJust();
94 
95  // 10. Set segmenter.[[Locale]] to the value of r.[[Locale]].
96  Handle<String> locale_str =
97  isolate->factory()->NewStringFromAsciiChecked(r.locale.c_str());
98  segmenter_holder->set_locale(*locale_str);
99 
100  // 13. Let granularity be ? GetOption(options, "granularity", "string", «
101  // "grapheme", "word", "sentence", "line" », "grapheme").
102  Maybe<Granularity> maybe_granularity = Intl::GetStringOption<Granularity>(
103  isolate, options, "granularity", "Intl.Segmenter",
104  {"grapheme", "word", "sentence", "line"},
105  {Granularity::GRAPHEME, Granularity::WORD, Granularity::SENTENCE,
106  Granularity::LINE},
107  Granularity::GRAPHEME);
108  MAYBE_RETURN(maybe_granularity, MaybeHandle<JSSegmenter>());
109  Granularity granularity_enum = maybe_granularity.FromJust();
110 
111  // 14. Set segmenter.[[SegmenterGranularity]] to granularity.
112  segmenter_holder->set_granularity(granularity_enum);
113 
114  // 15. If granularity is "line",
115  if (granularity_enum == Granularity::LINE) {
116  // a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
117  segmenter_holder->set_line_break_style(line_break_style_enum);
118  } else {
119  segmenter_holder->set_line_break_style(LineBreakStyle::NOTSET);
120  }
121 
122  icu::Locale icu_locale = r.icu_locale;
123  DCHECK(!icu_locale.isBogus());
124 
125  UErrorCode status = U_ZERO_ERROR;
126  std::unique_ptr<icu::BreakIterator> icu_break_iterator;
127 
128  switch (granularity_enum) {
129  case Granularity::GRAPHEME:
130  icu_break_iterator.reset(
131  icu::BreakIterator::createCharacterInstance(icu_locale, status));
132  break;
133  case Granularity::WORD:
134  icu_break_iterator.reset(
135  icu::BreakIterator::createWordInstance(icu_locale, status));
136  break;
137  case Granularity::SENTENCE:
138  icu_break_iterator.reset(
139  icu::BreakIterator::createSentenceInstance(icu_locale, status));
140  break;
141  case Granularity::LINE: {
142  // 15. If granularity is "line",
143  // a. Set segmenter.[[SegmenterLineBreakStyle]] to r.[[lb]].
144  const char* key = uloc_toLegacyKey("lb");
145  CHECK_NOT_NULL(key);
146  const char* value =
147  uloc_toLegacyType(key, segmenter_holder->LineBreakStyleAsCString());
148  CHECK_NOT_NULL(value);
149  UErrorCode status = U_ZERO_ERROR;
150  icu_locale.setKeywordValue(key, value, status);
151  CHECK(U_SUCCESS(status));
152  icu_break_iterator.reset(
153  icu::BreakIterator::createLineInstance(icu_locale, status));
154  break;
155  }
156  case Granularity::COUNT:
157  UNREACHABLE();
158  }
159 
160  CHECK(U_SUCCESS(status));
161  CHECK_NOT_NULL(icu_break_iterator.get());
162 
163  Handle<Managed<icu::BreakIterator>> managed_break_iterator =
164  Managed<icu::BreakIterator>::FromUniquePtr(isolate, 0,
165  std::move(icu_break_iterator));
166 
167  segmenter_holder->set_icu_break_iterator(*managed_break_iterator);
168  return segmenter_holder;
169 }
170 
171 // ecma402 #sec-Intl.Segmenter.prototype.resolvedOptions
172 Handle<JSObject> JSSegmenter::ResolvedOptions(
173  Isolate* isolate, Handle<JSSegmenter> segmenter_holder) {
174  Factory* factory = isolate->factory();
175  // 3. Let options be ! ObjectCreate(%ObjectPrototype%).
176  Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
177  // 4. For each row of Table 1, except the header row, do
178  // a. Let p be the Property value of the current row.
179  // b. Let v be the value of pr's internal slot whose name is the Internal Slot
180  // value of the current row.
181  //
182  // c. If v is not undefined, then
183  // i. Perform ! CreateDataPropertyOrThrow(options, p, v).
184  // Table 1: Resolved Options of Segmenter Instances
185  // Internal Slot Property
186  // [[Locale]] "locale"
187  // [[SegmenterGranularity]] "granularity"
188  // [[SegmenterLineBreakStyle]] "lineBreakStyle"
189 
190  Handle<String> locale(segmenter_holder->locale(), isolate);
191  JSObject::AddProperty(isolate, result, factory->locale_string(), locale,
192  NONE);
193  JSObject::AddProperty(isolate, result, factory->granularity_string(),
194  segmenter_holder->GranularityAsString(), NONE);
195  if (segmenter_holder->line_break_style() != LineBreakStyle::NOTSET) {
196  JSObject::AddProperty(isolate, result, factory->lineBreakStyle_string(),
197  segmenter_holder->LineBreakStyleAsString(), NONE);
198  }
199  // 5. Return options.
200  return result;
201 }
202 
203 const char* JSSegmenter::LineBreakStyleAsCString() const {
204  switch (line_break_style()) {
205  case LineBreakStyle::STRICT:
206  return "strict";
207  case LineBreakStyle::NORMAL:
208  return "normal";
209  case LineBreakStyle::LOOSE:
210  return "loose";
211  case LineBreakStyle::COUNT:
212  case LineBreakStyle::NOTSET:
213  UNREACHABLE();
214  }
215 }
216 
217 Handle<String> JSSegmenter::LineBreakStyleAsString() const {
218  switch (line_break_style()) {
219  case LineBreakStyle::STRICT:
220  return GetReadOnlyRoots().strict_string_handle();
221  case LineBreakStyle::NORMAL:
222  return GetReadOnlyRoots().normal_string_handle();
223  case LineBreakStyle::LOOSE:
224  return GetReadOnlyRoots().loose_string_handle();
225  case LineBreakStyle::COUNT:
226  case LineBreakStyle::NOTSET:
227  UNREACHABLE();
228  }
229 }
230 
231 Handle<String> JSSegmenter::GranularityAsString() const {
232  switch (granularity()) {
233  case Granularity::GRAPHEME:
234  return GetReadOnlyRoots().grapheme_string_handle();
235  case Granularity::WORD:
236  return GetReadOnlyRoots().word_string_handle();
237  case Granularity::SENTENCE:
238  return GetReadOnlyRoots().sentence_string_handle();
239  case Granularity::LINE:
240  return GetReadOnlyRoots().line_string_handle();
241  case Granularity::COUNT:
242  UNREACHABLE();
243  }
244 }
245 
246 std::set<std::string> JSSegmenter::GetAvailableLocales() {
247  int32_t num_locales = 0;
248  const icu::Locale* icu_available_locales =
249  icu::BreakIterator::getAvailableLocales(num_locales);
250  return Intl::BuildLocaleSet(icu_available_locales, num_locales);
251 }
252 
253 } // namespace internal
254 } // namespace v8
Definition: libplatform.h:13