V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
js-segment-iterator.cc
1 // Copyright 2018 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_INTL_SUPPORT
6 #error Internationalization is expected to be enabled.
7 #endif // V8_INTL_SUPPORT
8 
9 #include "src/objects/js-segment-iterator.h"
10 
11 #include <map>
12 #include <memory>
13 #include <string>
14 
15 #include "src/heap/factory.h"
16 #include "src/isolate.h"
17 #include "src/objects-inl.h"
18 #include "src/objects/intl-objects.h"
19 #include "src/objects/js-segment-iterator-inl.h"
20 #include "src/objects/managed.h"
21 #include "unicode/brkiter.h"
22 
23 namespace v8 {
24 namespace internal {
25 
26 MaybeHandle<String> JSSegmentIterator::GetSegment(Isolate* isolate,
27  int32_t start,
28  int32_t end) const {
29  return Intl::ToString(isolate, *(unicode_string()->raw()), start, end);
30 }
31 
32 Handle<String> JSSegmentIterator::GranularityAsString() const {
33  switch (granularity()) {
34  case JSSegmenter::Granularity::GRAPHEME:
35  return GetReadOnlyRoots().grapheme_string_handle();
36  case JSSegmenter::Granularity::WORD:
37  return GetReadOnlyRoots().word_string_handle();
38  case JSSegmenter::Granularity::SENTENCE:
39  return GetReadOnlyRoots().sentence_string_handle();
40  case JSSegmenter::Granularity::LINE:
41  return GetReadOnlyRoots().line_string_handle();
42  case JSSegmenter::Granularity::COUNT:
43  UNREACHABLE();
44  }
45 }
46 
47 MaybeHandle<JSSegmentIterator> JSSegmentIterator::Create(
48  Isolate* isolate, icu::BreakIterator* break_iterator,
49  JSSegmenter::Granularity granularity, Handle<String> text) {
50  CHECK_NOT_NULL(break_iterator);
51  // 1. Let iterator be ObjectCreate(%SegmentIteratorPrototype%).
52  Handle<Map> map = Handle<Map>(
53  isolate->native_context()->intl_segment_iterator_map(), isolate);
54  Handle<JSObject> result = isolate->factory()->NewJSObjectFromMap(map);
55 
56  Handle<JSSegmentIterator> segment_iterator =
57  Handle<JSSegmentIterator>::cast(result);
58 
59  segment_iterator->set_flags(0);
60  segment_iterator->set_granularity(granularity);
61  // 2. Let iterator.[[SegmentIteratorSegmenter]] be segmenter.
62  Handle<Managed<icu::BreakIterator>> managed_break_iterator =
63  Managed<icu::BreakIterator>::FromRawPtr(isolate, 0, break_iterator);
64  segment_iterator->set_icu_break_iterator(*managed_break_iterator);
65 
66  // 3. Let iterator.[[SegmentIteratorString]] be string.
67  Managed<icu::UnicodeString>* unicode_string =
68  Intl::SetTextToBreakIterator(isolate, text, break_iterator);
69  segment_iterator->set_unicode_string(unicode_string);
70 
71  // 4. Let iterator.[[SegmentIteratorPosition]] be 0.
72  // 5. Let iterator.[[SegmentIteratorBreakType]] be an implementation-dependent
73  // string representing a break at the edge of a string.
74  // step 4 and 5 are stored inside break_iterator.
75 
76  return segment_iterator;
77 }
78 
79 // ecma402 #sec-segment-iterator-prototype-breakType
80 Handle<Object> JSSegmentIterator::BreakType() const {
81  icu::BreakIterator* break_iterator = icu_break_iterator()->raw();
82  int32_t rule_status = break_iterator->getRuleStatus();
83  switch (granularity()) {
84  case JSSegmenter::Granularity::GRAPHEME:
85  return GetReadOnlyRoots().undefined_value_handle();
86  case JSSegmenter::Granularity::WORD:
87  if (rule_status >= UBRK_WORD_NONE && rule_status < UBRK_WORD_NONE_LIMIT) {
88  // "words" that do not fit into any of other categories. Includes spaces
89  // and most punctuation.
90  return GetReadOnlyRoots().none_string_handle();
91  }
92  if ((rule_status >= UBRK_WORD_NUMBER &&
93  rule_status < UBRK_WORD_NUMBER_LIMIT) ||
94  (rule_status >= UBRK_WORD_LETTER &&
95  rule_status < UBRK_WORD_LETTER_LIMIT) ||
96  (rule_status >= UBRK_WORD_KANA &&
97  rule_status < UBRK_WORD_KANA_LIMIT) ||
98  (rule_status >= UBRK_WORD_IDEO &&
99  rule_status < UBRK_WORD_IDEO_LIMIT)) {
100  // words that appear to be numbers, letters, kana characters,
101  // ideographic characters, etc
102  return GetReadOnlyRoots().word_string_handle();
103  }
104  return GetReadOnlyRoots().undefined_value_handle();
105  case JSSegmenter::Granularity::LINE:
106  if (rule_status >= UBRK_LINE_SOFT && rule_status < UBRK_LINE_SOFT_LIMIT) {
107  // soft line breaks, positions at which a line break is acceptable but
108  // not required
109  return GetReadOnlyRoots().soft_string_handle();
110  }
111  if ((rule_status >= UBRK_LINE_HARD &&
112  rule_status < UBRK_LINE_HARD_LIMIT)) {
113  // hard, or mandatory line breaks
114  return GetReadOnlyRoots().hard_string_handle();
115  }
116  return GetReadOnlyRoots().undefined_value_handle();
117  case JSSegmenter::Granularity::SENTENCE:
118  if (rule_status >= UBRK_SENTENCE_TERM &&
119  rule_status < UBRK_SENTENCE_TERM_LIMIT) {
120  // sentences ending with a sentence terminator ('.', '?', '!', etc.)
121  // character, possibly followed by a hard separator (CR, LF, PS, etc.)
122  return GetReadOnlyRoots().term_string_handle();
123  }
124  if ((rule_status >= UBRK_SENTENCE_SEP &&
125  rule_status < UBRK_SENTENCE_SEP_LIMIT)) {
126  // sentences that do not contain an ending sentence terminator ('.',
127  // '?', '!', etc.) character, but are ended only by a hard separator
128  // (CR, LF, PS, etc.) hard, or mandatory line breaks
129  return GetReadOnlyRoots().sep_string_handle();
130  }
131  return GetReadOnlyRoots().undefined_value_handle();
132  case JSSegmenter::Granularity::COUNT:
133  UNREACHABLE();
134  }
135 }
136 
137 // ecma402 #sec-segment-iterator-prototype-position
138 Handle<Object> JSSegmentIterator::Position(
139  Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
140  icu::BreakIterator* icu_break_iterator =
141  segment_iterator->icu_break_iterator()->raw();
142  CHECK_NOT_NULL(icu_break_iterator);
143  return isolate->factory()->NewNumberFromInt(icu_break_iterator->current());
144 }
145 
146 // ecma402 #sec-segment-iterator-prototype-next
147 MaybeHandle<JSReceiver> JSSegmentIterator::Next(
148  Isolate* isolate, Handle<JSSegmentIterator> segment_iterator) {
149  Factory* factory = isolate->factory();
150  icu::BreakIterator* icu_break_iterator =
151  segment_iterator->icu_break_iterator()->raw();
152  // 3. Let _previousPosition be iterator.[[SegmentIteratorPosition]].
153  int32_t prev = icu_break_iterator->current();
154  // 4. Let done be AdvanceSegmentIterator(iterator, forwards).
155  int32_t position = icu_break_iterator->next();
156  if (position == icu::BreakIterator::DONE) {
157  // 5. If done is true, return CreateIterResultObject(undefined, true).
158  return factory->NewJSIteratorResult(isolate->factory()->undefined_value(),
159  true);
160  }
161  // 6. Let newPosition be iterator.[[SegmentIteratorPosition]].
162  Handle<Object> new_position = factory->NewNumberFromInt(position);
163 
164  // 8. Let segment be the substring of string from previousPosition to
165  // newPosition, inclusive of previousPosition and exclusive of newPosition.
166  Handle<String> segment;
167  ASSIGN_RETURN_ON_EXCEPTION(
168  isolate, segment, segment_iterator->GetSegment(isolate, prev, position),
169  JSReceiver);
170 
171  // 9. Let breakType be iterator.[[SegmentIteratorBreakType]].
172  Handle<Object> break_type = segment_iterator->BreakType();
173 
174  // 10. Let result be ! ObjectCreate(%ObjectPrototype%).
175  Handle<JSObject> result = factory->NewJSObject(isolate->object_function());
176 
177  // 11. Perform ! CreateDataProperty(result "segment", segment).
178  CHECK(JSReceiver::CreateDataProperty(
179  isolate, result, factory->segment_string(), segment, kDontThrow)
180  .FromJust());
181 
182  // 12. Perform ! CreateDataProperty(result, "breakType", breakType).
183  CHECK(JSReceiver::CreateDataProperty(isolate, result,
184  factory->breakType_string(), break_type,
185  kDontThrow)
186  .FromJust());
187 
188  // 13. Perform ! CreateDataProperty(result, "position", newPosition).
189  CHECK(JSReceiver::CreateDataProperty(isolate, result,
190  factory->position_string(), new_position,
191  kDontThrow)
192  .FromJust());
193 
194  // 14. Return CreateIterResultObject(result, false).
195  return factory->NewJSIteratorResult(result, false);
196 }
197 
198 // ecma402 #sec-segment-iterator-prototype-following
199 Maybe<bool> JSSegmentIterator::Following(
200  Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
201  Handle<Object> from_obj) {
202  Factory* factory = isolate->factory();
203  icu::BreakIterator* icu_break_iterator =
204  segment_iterator->icu_break_iterator()->raw();
205  // 3. If from is not undefined,
206  if (!from_obj->IsUndefined()) {
207  // a. Let from be ? ToIndex(from).
208  uint32_t from;
209  if (!from_obj->ToArrayIndex(&from)) {
210  THROW_NEW_ERROR_RETURN_VALUE(
211  isolate,
212  NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
213  factory->NewStringFromStaticChars("from"),
214  factory->NewStringFromStaticChars("following"),
215  from_obj),
216  Nothing<bool>());
217  }
218  // b. If from ≥ iterator.[[SegmentIteratorString]], throw a RangeError
219  // exception.
220  // c. Let iterator.[[SegmentIteratorPosition]] be from.
221  if (icu_break_iterator->following(from) == icu::BreakIterator::DONE) {
222  THROW_NEW_ERROR_RETURN_VALUE(
223  isolate,
224  NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
225  factory->NewStringFromStaticChars("from"),
226  factory->NewStringFromStaticChars("following"),
227  from_obj),
228  Nothing<bool>());
229  }
230  return Just(false);
231  }
232  // 4. return AdvanceSegmentIterator(iterator, forward).
233  // 4. .... or if direction is backwards and position is 0, return true.
234  // 4. If direction is forwards and position is the length of string ... return
235  // true.
236  return Just(icu_break_iterator->next() == icu::BreakIterator::DONE);
237 }
238 
239 // ecma402 #sec-segment-iterator-prototype-preceding
240 Maybe<bool> JSSegmentIterator::Preceding(
241  Isolate* isolate, Handle<JSSegmentIterator> segment_iterator,
242  Handle<Object> from_obj) {
243  Factory* factory = isolate->factory();
244  icu::BreakIterator* icu_break_iterator =
245  segment_iterator->icu_break_iterator()->raw();
246  // 3. If from is not undefined,
247  if (!from_obj->IsUndefined()) {
248  // a. Let from be ? ToIndex(from).
249  uint32_t from;
250  if (!from_obj->ToArrayIndex(&from)) {
251  THROW_NEW_ERROR_RETURN_VALUE(
252  isolate,
253  NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
254  factory->NewStringFromStaticChars("from"),
255  factory->NewStringFromStaticChars("following"),
256  from_obj),
257  Nothing<bool>());
258  }
259  // b. If from > iterator.[[SegmentIteratorString]] or from = 0, throw a
260  // RangeError exception.
261  // c. Let iterator.[[SegmentIteratorPosition]] be from.
262  uint32_t text_len =
263  static_cast<uint32_t>(icu_break_iterator->getText().getLength());
264  if (from > text_len ||
265  icu_break_iterator->preceding(from) == icu::BreakIterator::DONE) {
266  THROW_NEW_ERROR_RETURN_VALUE(
267  isolate,
268  NewRangeError(MessageTemplate::kParameterOfFunctionOutOfRange,
269  factory->NewStringFromStaticChars("from"),
270  factory->NewStringFromStaticChars("preceding"),
271  from_obj),
272  Nothing<bool>());
273  }
274  return Just(false);
275  }
276  // 4. return AdvanceSegmentIterator(iterator, backwards).
277  // 4. .... or if direction is backwards and position is 0, return true.
278  return Just(icu_break_iterator->previous() == icu::BreakIterator::DONE);
279 }
280 
281 } // namespace internal
282 } // namespace v8
Definition: libplatform.h:13