V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
runtime-regexp.cc
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <functional>
6 
7 #include "src/arguments-inl.h"
8 #include "src/conversions-inl.h"
9 #include "src/counters.h"
10 #include "src/isolate-inl.h"
11 #include "src/message-template.h"
12 #include "src/objects/js-array-inl.h"
13 #include "src/regexp/jsregexp-inl.h"
14 #include "src/regexp/jsregexp.h"
15 #include "src/regexp/regexp-utils.h"
16 #include "src/runtime/runtime-utils.h"
17 #include "src/string-builder-inl.h"
18 #include "src/string-search.h"
19 #include "src/zone/zone-chunk-list.h"
20 
21 namespace v8 {
22 namespace internal {
23 
24 namespace {
25 
26 // Returns -1 for failure.
27 uint32_t GetArgcForReplaceCallable(uint32_t num_captures,
28  bool has_named_captures) {
29  const uint32_t kAdditionalArgsWithoutNamedCaptures = 2;
30  const uint32_t kAdditionalArgsWithNamedCaptures = 3;
31  if (num_captures > Code::kMaxArguments) return -1;
32  uint32_t argc = has_named_captures
33  ? num_captures + kAdditionalArgsWithNamedCaptures
34  : num_captures + kAdditionalArgsWithoutNamedCaptures;
35  STATIC_ASSERT(Code::kMaxArguments < std::numeric_limits<uint32_t>::max() -
36  kAdditionalArgsWithNamedCaptures);
37  return (argc > Code::kMaxArguments) ? -1 : argc;
38 }
39 
40 // Looks up the capture of the given name. Returns the (1-based) numbered
41 // capture index or -1 on failure.
42 int LookupNamedCapture(const std::function<bool(String)>& name_matches,
43  FixedArray capture_name_map) {
44  // TODO(jgruber): Sort capture_name_map and do binary search via
45  // internalized strings.
46 
47  int maybe_capture_index = -1;
48  const int named_capture_count = capture_name_map->length() >> 1;
49  for (int j = 0; j < named_capture_count; j++) {
50  // The format of {capture_name_map} is documented at
51  // JSRegExp::kIrregexpCaptureNameMapIndex.
52  const int name_ix = j * 2;
53  const int index_ix = j * 2 + 1;
54 
55  String capture_name = String::cast(capture_name_map->get(name_ix));
56  if (!name_matches(capture_name)) continue;
57 
58  maybe_capture_index = Smi::ToInt(capture_name_map->get(index_ix));
59  break;
60  }
61 
62  return maybe_capture_index;
63 }
64 
65 } // namespace
66 
68  public:
69  explicit CompiledReplacement(Zone* zone)
70  : parts_(zone), replacement_substrings_(zone) {}
71 
72  // Return whether the replacement is simple.
73  bool Compile(Isolate* isolate, Handle<JSRegExp> regexp,
74  Handle<String> replacement, int capture_count,
75  int subject_length);
76 
77  // Use Apply only if Compile returned false.
78  void Apply(ReplacementStringBuilder* builder, int match_from, int match_to,
79  int32_t* match);
80 
81  // Number of distinct parts of the replacement pattern.
82  int parts() { return static_cast<int>(parts_.size()); }
83 
84  private:
85  enum PartType {
86  SUBJECT_PREFIX = 1,
87  SUBJECT_SUFFIX,
88  SUBJECT_CAPTURE,
89  REPLACEMENT_SUBSTRING,
90  REPLACEMENT_STRING,
91  EMPTY_REPLACEMENT,
92  NUMBER_OF_PART_TYPES
93  };
94 
95  struct ReplacementPart {
96  static inline ReplacementPart SubjectMatch() {
97  return ReplacementPart(SUBJECT_CAPTURE, 0);
98  }
99  static inline ReplacementPart SubjectCapture(int capture_index) {
100  return ReplacementPart(SUBJECT_CAPTURE, capture_index);
101  }
102  static inline ReplacementPart SubjectPrefix() {
103  return ReplacementPart(SUBJECT_PREFIX, 0);
104  }
105  static inline ReplacementPart SubjectSuffix(int subject_length) {
106  return ReplacementPart(SUBJECT_SUFFIX, subject_length);
107  }
108  static inline ReplacementPart ReplacementString() {
109  return ReplacementPart(REPLACEMENT_STRING, 0);
110  }
111  static inline ReplacementPart EmptyReplacement() {
112  return ReplacementPart(EMPTY_REPLACEMENT, 0);
113  }
114  static inline ReplacementPart ReplacementSubString(int from, int to) {
115  DCHECK_LE(0, from);
116  DCHECK_GT(to, from);
117  return ReplacementPart(-from, to);
118  }
119 
120  // If tag <= 0 then it is the negation of a start index of a substring of
121  // the replacement pattern, otherwise it's a value from PartType.
122  ReplacementPart(int tag, int data) : tag(tag), data(data) {
123  // Must be non-positive or a PartType value.
124  DCHECK(tag < NUMBER_OF_PART_TYPES);
125  }
126  // Either a value of PartType or a non-positive number that is
127  // the negation of an index into the replacement string.
128  int tag;
129  // The data value's interpretation depends on the value of tag:
130  // tag == SUBJECT_PREFIX ||
131  // tag == SUBJECT_SUFFIX: data is unused.
132  // tag == SUBJECT_CAPTURE: data is the number of the capture.
133  // tag == REPLACEMENT_SUBSTRING ||
134  // tag == REPLACEMENT_STRING: data is index into array of substrings
135  // of the replacement string.
136  // tag == EMPTY_REPLACEMENT: data is unused.
137  // tag <= 0: Temporary representation of the substring of the replacement
138  // string ranging over -tag .. data.
139  // Is replaced by REPLACEMENT_{SUB,}STRING when we create the
140  // substring objects.
141  int data;
142  };
143 
144  template <typename Char>
145  bool ParseReplacementPattern(ZoneChunkList<ReplacementPart>* parts,
146  Vector<Char> characters,
147  FixedArray capture_name_map, int capture_count,
148  int subject_length) {
149  // Equivalent to String::GetSubstitution, except that this method converts
150  // the replacement string into an internal representation that avoids
151  // repeated parsing when used repeatedly.
152  int length = characters.length();
153  int last = 0;
154  for (int i = 0; i < length; i++) {
155  Char c = characters[i];
156  if (c == '$') {
157  int next_index = i + 1;
158  if (next_index == length) { // No next character!
159  break;
160  }
161  Char c2 = characters[next_index];
162  switch (c2) {
163  case '$':
164  if (i > last) {
165  // There is a substring before. Include the first "$".
166  parts->push_back(
167  ReplacementPart::ReplacementSubString(last, next_index));
168  last = next_index + 1; // Continue after the second "$".
169  } else {
170  // Let the next substring start with the second "$".
171  last = next_index;
172  }
173  i = next_index;
174  break;
175  case '`':
176  if (i > last) {
177  parts->push_back(ReplacementPart::ReplacementSubString(last, i));
178  }
179  parts->push_back(ReplacementPart::SubjectPrefix());
180  i = next_index;
181  last = i + 1;
182  break;
183  case '\'':
184  if (i > last) {
185  parts->push_back(ReplacementPart::ReplacementSubString(last, i));
186  }
187  parts->push_back(ReplacementPart::SubjectSuffix(subject_length));
188  i = next_index;
189  last = i + 1;
190  break;
191  case '&':
192  if (i > last) {
193  parts->push_back(ReplacementPart::ReplacementSubString(last, i));
194  }
195  parts->push_back(ReplacementPart::SubjectMatch());
196  i = next_index;
197  last = i + 1;
198  break;
199  case '0':
200  case '1':
201  case '2':
202  case '3':
203  case '4':
204  case '5':
205  case '6':
206  case '7':
207  case '8':
208  case '9': {
209  int capture_ref = c2 - '0';
210  if (capture_ref > capture_count) {
211  i = next_index;
212  continue;
213  }
214  int second_digit_index = next_index + 1;
215  if (second_digit_index < length) {
216  // Peek ahead to see if we have two digits.
217  Char c3 = characters[second_digit_index];
218  if ('0' <= c3 && c3 <= '9') { // Double digits.
219  int double_digit_ref = capture_ref * 10 + c3 - '0';
220  if (double_digit_ref <= capture_count) {
221  next_index = second_digit_index;
222  capture_ref = double_digit_ref;
223  }
224  }
225  }
226  if (capture_ref > 0) {
227  if (i > last) {
228  parts->push_back(
229  ReplacementPart::ReplacementSubString(last, i));
230  }
231  DCHECK(capture_ref <= capture_count);
232  parts->push_back(ReplacementPart::SubjectCapture(capture_ref));
233  last = next_index + 1;
234  }
235  i = next_index;
236  break;
237  }
238  case '<': {
239  if (capture_name_map.is_null()) {
240  i = next_index;
241  break;
242  }
243 
244  // Scan until the next '>', and let the enclosed substring be the
245  // groupName.
246 
247  const int name_start_index = next_index + 1;
248  int closing_bracket_index = -1;
249  for (int j = name_start_index; j < length; j++) {
250  if (characters[j] == '>') {
251  closing_bracket_index = j;
252  break;
253  }
254  }
255 
256  // If no closing bracket is found, '$<' is treated as a string
257  // literal.
258  if (closing_bracket_index == -1) {
259  i = next_index;
260  break;
261  }
262 
263  Vector<Char> requested_name =
264  characters.SubVector(name_start_index, closing_bracket_index);
265 
266  // Let capture be ? Get(namedCaptures, groupName).
267 
268  const int capture_index = LookupNamedCapture(
269  [=](String capture_name) {
270  return capture_name->IsEqualTo(requested_name);
271  },
272  capture_name_map);
273 
274  // If capture is undefined or does not exist, replace the text
275  // through the following '>' with the empty string.
276  // Otherwise, replace the text through the following '>' with
277  // ? ToString(capture).
278 
279  DCHECK(capture_index == -1 ||
280  (1 <= capture_index && capture_index <= capture_count));
281 
282  if (i > last) {
283  parts->push_back(ReplacementPart::ReplacementSubString(last, i));
284  }
285  parts->push_back(
286  (capture_index == -1)
287  ? ReplacementPart::EmptyReplacement()
288  : ReplacementPart::SubjectCapture(capture_index));
289  last = closing_bracket_index + 1;
290  i = closing_bracket_index;
291  break;
292  }
293  default:
294  i = next_index;
295  break;
296  }
297  }
298  }
299  if (length > last) {
300  if (last == 0) {
301  // Replacement is simple. Do not use Apply to do the replacement.
302  return true;
303  } else {
304  parts->push_back(ReplacementPart::ReplacementSubString(last, length));
305  }
306  }
307  return false;
308  }
309 
311  ZoneVector<Handle<String>> replacement_substrings_;
312 };
313 
314 bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
315  Handle<String> replacement, int capture_count,
316  int subject_length) {
317  {
319  String::FlatContent content = replacement->GetFlatContent();
320  DCHECK(content.IsFlat());
321 
322  FixedArray capture_name_map;
323  if (capture_count > 0) {
324  DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
325  Object* maybe_capture_name_map = regexp->CaptureNameMap();
326  if (maybe_capture_name_map->IsFixedArray()) {
327  capture_name_map = FixedArray::cast(maybe_capture_name_map);
328  }
329  }
330 
331  bool simple;
332  if (content.IsOneByte()) {
333  simple = ParseReplacementPattern(&parts_, content.ToOneByteVector(),
334  capture_name_map, capture_count,
335  subject_length);
336  } else {
337  DCHECK(content.IsTwoByte());
338  simple = ParseReplacementPattern(&parts_, content.ToUC16Vector(),
339  capture_name_map, capture_count,
340  subject_length);
341  }
342  if (simple) return true;
343  }
344 
345  // Find substrings of replacement string and create them as String objects.
346  int substring_index = 0;
347  for (ReplacementPart& part : parts_) {
348  int tag = part.tag;
349  if (tag <= 0) { // A replacement string slice.
350  int from = -tag;
351  int to = part.data;
352  replacement_substrings_.push_back(
353  isolate->factory()->NewSubString(replacement, from, to));
354  part.tag = REPLACEMENT_SUBSTRING;
355  part.data = substring_index;
356  substring_index++;
357  } else if (tag == REPLACEMENT_STRING) {
358  replacement_substrings_.push_back(replacement);
359  part.data = substring_index;
360  substring_index++;
361  }
362  }
363  return false;
364 }
365 
366 
367 void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
368  int match_from, int match_to, int32_t* match) {
369  DCHECK_LT(0, parts_.size());
370  for (ReplacementPart& part : parts_) {
371  switch (part.tag) {
372  case SUBJECT_PREFIX:
373  if (match_from > 0) builder->AddSubjectSlice(0, match_from);
374  break;
375  case SUBJECT_SUFFIX: {
376  int subject_length = part.data;
377  if (match_to < subject_length) {
378  builder->AddSubjectSlice(match_to, subject_length);
379  }
380  break;
381  }
382  case SUBJECT_CAPTURE: {
383  int capture = part.data;
384  int from = match[capture * 2];
385  int to = match[capture * 2 + 1];
386  if (from >= 0 && to > from) {
387  builder->AddSubjectSlice(from, to);
388  }
389  break;
390  }
391  case REPLACEMENT_SUBSTRING:
392  case REPLACEMENT_STRING:
393  builder->AddString(replacement_substrings_[part.data]);
394  break;
395  case EMPTY_REPLACEMENT:
396  break;
397  default:
398  UNREACHABLE();
399  }
400  }
401 }
402 
403 void FindOneByteStringIndices(Vector<const uint8_t> subject, uint8_t pattern,
404  std::vector<int>* indices, unsigned int limit) {
405  DCHECK_LT(0, limit);
406  // Collect indices of pattern in subject using memchr.
407  // Stop after finding at most limit values.
408  const uint8_t* subject_start = subject.start();
409  const uint8_t* subject_end = subject_start + subject.length();
410  const uint8_t* pos = subject_start;
411  while (limit > 0) {
412  pos = reinterpret_cast<const uint8_t*>(
413  memchr(pos, pattern, subject_end - pos));
414  if (pos == nullptr) return;
415  indices->push_back(static_cast<int>(pos - subject_start));
416  pos++;
417  limit--;
418  }
419 }
420 
421 void FindTwoByteStringIndices(const Vector<const uc16> subject, uc16 pattern,
422  std::vector<int>* indices, unsigned int limit) {
423  DCHECK_LT(0, limit);
424  const uc16* subject_start = subject.start();
425  const uc16* subject_end = subject_start + subject.length();
426  for (const uc16* pos = subject_start; pos < subject_end && limit > 0; pos++) {
427  if (*pos == pattern) {
428  indices->push_back(static_cast<int>(pos - subject_start));
429  limit--;
430  }
431  }
432 }
433 
434 template <typename SubjectChar, typename PatternChar>
435 void FindStringIndices(Isolate* isolate, Vector<const SubjectChar> subject,
436  Vector<const PatternChar> pattern,
437  std::vector<int>* indices, unsigned int limit) {
438  DCHECK_LT(0, limit);
439  // Collect indices of pattern in subject.
440  // Stop after finding at most limit values.
441  int pattern_length = pattern.length();
442  int index = 0;
443  StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
444  while (limit > 0) {
445  index = search.Search(subject, index);
446  if (index < 0) return;
447  indices->push_back(index);
448  index += pattern_length;
449  limit--;
450  }
451 }
452 
453 void FindStringIndicesDispatch(Isolate* isolate, String subject, String pattern,
454  std::vector<int>* indices, unsigned int limit) {
455  {
456  DisallowHeapAllocation no_gc;
457  String::FlatContent subject_content = subject->GetFlatContent();
458  String::FlatContent pattern_content = pattern->GetFlatContent();
459  DCHECK(subject_content.IsFlat());
460  DCHECK(pattern_content.IsFlat());
461  if (subject_content.IsOneByte()) {
462  Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
463  if (pattern_content.IsOneByte()) {
464  Vector<const uint8_t> pattern_vector =
465  pattern_content.ToOneByteVector();
466  if (pattern_vector.length() == 1) {
467  FindOneByteStringIndices(subject_vector, pattern_vector[0], indices,
468  limit);
469  } else {
470  FindStringIndices(isolate, subject_vector, pattern_vector, indices,
471  limit);
472  }
473  } else {
474  FindStringIndices(isolate, subject_vector,
475  pattern_content.ToUC16Vector(), indices, limit);
476  }
477  } else {
478  Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
479  if (pattern_content.IsOneByte()) {
480  Vector<const uint8_t> pattern_vector =
481  pattern_content.ToOneByteVector();
482  if (pattern_vector.length() == 1) {
483  FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
484  limit);
485  } else {
486  FindStringIndices(isolate, subject_vector, pattern_vector, indices,
487  limit);
488  }
489  } else {
490  Vector<const uc16> pattern_vector = pattern_content.ToUC16Vector();
491  if (pattern_vector.length() == 1) {
492  FindTwoByteStringIndices(subject_vector, pattern_vector[0], indices,
493  limit);
494  } else {
495  FindStringIndices(isolate, subject_vector, pattern_vector, indices,
496  limit);
497  }
498  }
499  }
500  }
501 }
502 
503 namespace {
504 std::vector<int>* GetRewoundRegexpIndicesList(Isolate* isolate) {
505  std::vector<int>* list = isolate->regexp_indices();
506  list->clear();
507  return list;
508 }
509 
510 void TruncateRegexpIndicesList(Isolate* isolate) {
511  // Same size as smallest zone segment, preserving behavior from the
512  // runtime zone.
513  static const int kMaxRegexpIndicesListCapacity = 8 * KB;
514  std::vector<int>* indicies = isolate->regexp_indices();
515  if (indicies->capacity() > kMaxRegexpIndicesListCapacity) {
516  // Throw away backing storage.
517  indicies->clear();
518  indicies->shrink_to_fit();
519  }
520 }
521 } // namespace
522 
523 template <typename ResultSeqString>
524 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalAtomRegExpWithString(
525  Isolate* isolate, Handle<String> subject, Handle<JSRegExp> pattern_regexp,
526  Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
527  DCHECK(subject->IsFlat());
528  DCHECK(replacement->IsFlat());
529 
530  std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
531 
532  DCHECK_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag());
533  String pattern =
534  String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex));
535  int subject_len = subject->length();
536  int pattern_len = pattern->length();
537  int replacement_len = replacement->length();
538 
539  FindStringIndicesDispatch(isolate, *subject, pattern, indices, 0xFFFFFFFF);
540 
541  if (indices->empty()) return *subject;
542 
543  // Detect integer overflow.
544  int64_t result_len_64 = (static_cast<int64_t>(replacement_len) -
545  static_cast<int64_t>(pattern_len)) *
546  static_cast<int64_t>(indices->size()) +
547  static_cast<int64_t>(subject_len);
548  int result_len;
549  if (result_len_64 > static_cast<int64_t>(String::kMaxLength)) {
550  STATIC_ASSERT(String::kMaxLength < kMaxInt);
551  result_len = kMaxInt; // Provoke exception.
552  } else {
553  result_len = static_cast<int>(result_len_64);
554  }
555  if (result_len == 0) {
556  return ReadOnlyRoots(isolate).empty_string();
557  }
558 
559  int subject_pos = 0;
560  int result_pos = 0;
561 
562  MaybeHandle<SeqString> maybe_res;
563  if (ResultSeqString::kHasOneByteEncoding) {
564  maybe_res = isolate->factory()->NewRawOneByteString(result_len);
565  } else {
566  maybe_res = isolate->factory()->NewRawTwoByteString(result_len);
567  }
568  Handle<SeqString> untyped_res;
569  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, untyped_res, maybe_res);
570  Handle<ResultSeqString> result = Handle<ResultSeqString>::cast(untyped_res);
571 
572  DisallowHeapAllocation no_gc;
573  for (int index : *indices) {
574  // Copy non-matched subject content.
575  if (subject_pos < index) {
576  String::WriteToFlat(*subject, result->GetChars() + result_pos,
577  subject_pos, index);
578  result_pos += index - subject_pos;
579  }
580 
581  // Replace match.
582  if (replacement_len > 0) {
583  String::WriteToFlat(*replacement, result->GetChars() + result_pos, 0,
584  replacement_len);
585  result_pos += replacement_len;
586  }
587 
588  subject_pos = index + pattern_len;
589  }
590  // Add remaining subject content at the end.
591  if (subject_pos < subject_len) {
592  String::WriteToFlat(*subject, result->GetChars() + result_pos, subject_pos,
593  subject_len);
594  }
595 
596  int32_t match_indices[] = {indices->back(), indices->back() + pattern_len};
597  RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, 0,
598  match_indices);
599 
600  TruncateRegexpIndicesList(isolate);
601 
602  return *result;
603 }
604 
605 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalRegExpWithString(
606  Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
607  Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
608  DCHECK(subject->IsFlat());
609  DCHECK(replacement->IsFlat());
610 
611  int capture_count = regexp->CaptureCount();
612  int subject_length = subject->length();
613 
614  JSRegExp::Type typeTag = regexp->TypeTag();
615  if (typeTag == JSRegExp::IRREGEXP) {
616  // Ensure the RegExp is compiled so we can access the capture-name map.
617  if (RegExpImpl::IrregexpPrepare(isolate, regexp, subject) == -1) {
618  DCHECK(isolate->has_pending_exception());
619  return ReadOnlyRoots(isolate).exception();
620  }
621  }
622 
623  // CompiledReplacement uses zone allocation.
624  Zone zone(isolate->allocator(), ZONE_NAME);
625  CompiledReplacement compiled_replacement(&zone);
626  const bool simple_replace = compiled_replacement.Compile(
627  isolate, regexp, replacement, capture_count, subject_length);
628 
629  // Shortcut for simple non-regexp global replacements
630  if (typeTag == JSRegExp::ATOM && simple_replace) {
631  if (subject->HasOnlyOneByteChars() && replacement->HasOnlyOneByteChars()) {
632  return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
633  isolate, subject, regexp, replacement, last_match_info);
634  } else {
635  return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
636  isolate, subject, regexp, replacement, last_match_info);
637  }
638  }
639 
640  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
641  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
642 
643  int32_t* current_match = global_cache.FetchNext();
644  if (current_match == nullptr) {
645  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
646  return *subject;
647  }
648 
649  // Guessing the number of parts that the final result string is built
650  // from. Global regexps can match any number of times, so we guess
651  // conservatively.
652  int expected_parts = (compiled_replacement.parts() + 1) * 4 + 1;
653  ReplacementStringBuilder builder(isolate->heap(), subject, expected_parts);
654 
655  // Number of parts added by compiled replacement plus preceding
656  // string and possibly suffix after last match. It is possible for
657  // all components to use two elements when encoded as two smis.
658  const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2);
659 
660  int prev = 0;
661 
662  do {
663  builder.EnsureCapacity(parts_added_per_loop);
664 
665  int start = current_match[0];
666  int end = current_match[1];
667 
668  if (prev < start) {
669  builder.AddSubjectSlice(prev, start);
670  }
671 
672  if (simple_replace) {
673  builder.AddString(replacement);
674  } else {
675  compiled_replacement.Apply(&builder, start, end, current_match);
676  }
677  prev = end;
678 
679  current_match = global_cache.FetchNext();
680  } while (current_match != nullptr);
681 
682  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
683 
684  if (prev < subject_length) {
685  builder.EnsureCapacity(2);
686  builder.AddSubjectSlice(prev, subject_length);
687  }
688 
689  RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
690  global_cache.LastSuccessfulMatch());
691 
692  RETURN_RESULT_OR_FAILURE(isolate, builder.ToString());
693 }
694 
695 template <typename ResultSeqString>
696 V8_WARN_UNUSED_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
697  Isolate* isolate, Handle<String> subject, Handle<JSRegExp> regexp,
698  Handle<RegExpMatchInfo> last_match_info) {
699  DCHECK(subject->IsFlat());
700 
701  // Shortcut for simple non-regexp global replacements
702  if (regexp->TypeTag() == JSRegExp::ATOM) {
703  Handle<String> empty_string = isolate->factory()->empty_string();
704  if (subject->IsOneByteRepresentation()) {
705  return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
706  isolate, subject, regexp, empty_string, last_match_info);
707  } else {
708  return StringReplaceGlobalAtomRegExpWithString<SeqTwoByteString>(
709  isolate, subject, regexp, empty_string, last_match_info);
710  }
711  }
712 
713  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
714  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
715 
716  int32_t* current_match = global_cache.FetchNext();
717  if (current_match == nullptr) {
718  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
719  return *subject;
720  }
721 
722  int start = current_match[0];
723  int end = current_match[1];
724  int capture_count = regexp->CaptureCount();
725  int subject_length = subject->length();
726 
727  int new_length = subject_length - (end - start);
728  if (new_length == 0) return ReadOnlyRoots(isolate).empty_string();
729 
730  Handle<ResultSeqString> answer;
731  if (ResultSeqString::kHasOneByteEncoding) {
732  answer = Handle<ResultSeqString>::cast(
733  isolate->factory()->NewRawOneByteString(new_length).ToHandleChecked());
734  } else {
735  answer = Handle<ResultSeqString>::cast(
736  isolate->factory()->NewRawTwoByteString(new_length).ToHandleChecked());
737  }
738 
739  int prev = 0;
740  int position = 0;
741 
742  DisallowHeapAllocation no_gc;
743  do {
744  start = current_match[0];
745  end = current_match[1];
746  if (prev < start) {
747  // Add substring subject[prev;start] to answer string.
748  String::WriteToFlat(*subject, answer->GetChars() + position, prev, start);
749  position += start - prev;
750  }
751  prev = end;
752 
753  current_match = global_cache.FetchNext();
754  } while (current_match != nullptr);
755 
756  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
757 
758  RegExpImpl::SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
759  global_cache.LastSuccessfulMatch());
760 
761  if (prev < subject_length) {
762  // Add substring subject[prev;length] to answer string.
763  String::WriteToFlat(*subject, answer->GetChars() + position, prev,
764  subject_length);
765  position += subject_length - prev;
766  }
767 
768  if (position == 0) return ReadOnlyRoots(isolate).empty_string();
769 
770  // Shorten string and fill
771  int string_size = ResultSeqString::SizeFor(position);
772  int allocated_string_size = ResultSeqString::SizeFor(new_length);
773  int delta = allocated_string_size - string_size;
774 
775  answer->set_length(position);
776  if (delta == 0) return *answer;
777 
778  Address end_of_string = answer->address() + string_size;
779  Heap* heap = isolate->heap();
780 
781  // The trimming is performed on a newly allocated object, which is on a
782  // freshly allocated page or on an already swept page. Hence, the sweeper
783  // thread can not get confused with the filler creation. No synchronization
784  // needed.
785  // TODO(hpayer): We should shrink the large object page if the size
786  // of the object changed significantly.
787  if (!heap->IsLargeObject(*answer)) {
788  heap->CreateFillerObjectAt(end_of_string, delta, ClearRecordedSlots::kNo);
789  }
790  return *answer;
791 }
792 
793 namespace {
794 
795 Object* StringReplaceGlobalRegExpWithStringHelper(
796  Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
797  Handle<String> replacement, Handle<RegExpMatchInfo> last_match_info) {
798  CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
799 
800  subject = String::Flatten(isolate, subject);
801 
802  if (replacement->length() == 0) {
803  if (subject->HasOnlyOneByteChars()) {
804  return StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
805  isolate, subject, regexp, last_match_info);
806  } else {
807  return StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
808  isolate, subject, regexp, last_match_info);
809  }
810  }
811 
812  replacement = String::Flatten(isolate, replacement);
813 
814  return StringReplaceGlobalRegExpWithString(isolate, subject, regexp,
815  replacement, last_match_info);
816 }
817 
818 } // namespace
819 
820 RUNTIME_FUNCTION(Runtime_StringSplit) {
821  HandleScope handle_scope(isolate);
822  DCHECK_EQ(3, args.length());
823  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
824  CONVERT_ARG_HANDLE_CHECKED(String, pattern, 1);
825  CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[2]);
826  CHECK_LT(0, limit);
827 
828  int subject_length = subject->length();
829  int pattern_length = pattern->length();
830  CHECK_LT(0, pattern_length);
831 
832  if (limit == 0xFFFFFFFFu) {
833  FixedArray last_match_cache_unused;
834  Handle<Object> cached_answer(
835  RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
836  &last_match_cache_unused,
837  RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
838  isolate);
839  if (*cached_answer != Smi::kZero) {
840  // The cache FixedArray is a COW-array and can therefore be reused.
841  Handle<JSArray> result = isolate->factory()->NewJSArrayWithElements(
842  Handle<FixedArray>::cast(cached_answer));
843  return *result;
844  }
845  }
846 
847  // The limit can be very large (0xFFFFFFFFu), but since the pattern
848  // isn't empty, we can never create more parts than ~half the length
849  // of the subject.
850 
851  subject = String::Flatten(isolate, subject);
852  pattern = String::Flatten(isolate, pattern);
853 
854  std::vector<int>* indices = GetRewoundRegexpIndicesList(isolate);
855 
856  FindStringIndicesDispatch(isolate, *subject, *pattern, indices, limit);
857 
858  if (static_cast<uint32_t>(indices->size()) < limit) {
859  indices->push_back(subject_length);
860  }
861 
862  // The list indices now contains the end of each part to create.
863 
864  // Create JSArray of substrings separated by separator.
865  int part_count = static_cast<int>(indices->size());
866 
867  Handle<JSArray> result =
868  isolate->factory()->NewJSArray(PACKED_ELEMENTS, part_count, part_count,
869  INITIALIZE_ARRAY_ELEMENTS_WITH_HOLE);
870 
871  DCHECK(result->HasObjectElements());
872 
873  Handle<FixedArray> elements(FixedArray::cast(result->elements()), isolate);
874 
875  if (part_count == 1 && indices->at(0) == subject_length) {
876  elements->set(0, *subject);
877  } else {
878  int part_start = 0;
879  FOR_WITH_HANDLE_SCOPE(isolate, int, i = 0, i, i < part_count, i++, {
880  int part_end = indices->at(i);
881  Handle<String> substring =
882  isolate->factory()->NewProperSubString(subject, part_start, part_end);
883  elements->set(i, *substring);
884  part_start = part_end + pattern_length;
885  });
886  }
887 
888  if (limit == 0xFFFFFFFFu) {
889  if (result->HasObjectElements()) {
890  RegExpResultsCache::Enter(isolate, subject, pattern, elements,
891  isolate->factory()->empty_fixed_array(),
892  RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
893  }
894  }
895 
896  TruncateRegexpIndicesList(isolate);
897 
898  return *result;
899 }
900 
901 RUNTIME_FUNCTION(Runtime_RegExpExec) {
902  HandleScope scope(isolate);
903  DCHECK_EQ(4, args.length());
904  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
905  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
906  CONVERT_INT32_ARG_CHECKED(index, 2);
907  CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 3);
908  // Due to the way the JS calls are constructed this must be less than the
909  // length of a string, i.e. it is always a Smi. We check anyway for security.
910  CHECK_LE(0, index);
911  CHECK_GE(subject->length(), index);
912  isolate->counters()->regexp_entry_runtime()->Increment();
913  RETURN_RESULT_OR_FAILURE(isolate, RegExpImpl::Exec(isolate, regexp, subject,
914  index, last_match_info));
915 }
916 
917 RUNTIME_FUNCTION(Runtime_RegExpInternalReplace) {
918  HandleScope scope(isolate);
919  DCHECK_EQ(3, args.length());
920  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
921  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
922  CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2);
923 
924  Handle<RegExpMatchInfo> internal_match_info =
925  isolate->regexp_internal_match_info();
926 
927  return StringReplaceGlobalRegExpWithStringHelper(
928  isolate, regexp, subject, replacement, internal_match_info);
929 }
930 
931 namespace {
932 
933 class MatchInfoBackedMatch : public String::Match {
934  public:
935  MatchInfoBackedMatch(Isolate* isolate, Handle<JSRegExp> regexp,
936  Handle<String> subject,
937  Handle<RegExpMatchInfo> match_info)
938  : isolate_(isolate), match_info_(match_info) {
939  subject_ = String::Flatten(isolate, subject);
940 
941  if (regexp->TypeTag() == JSRegExp::IRREGEXP) {
942  Object* o = regexp->CaptureNameMap();
943  has_named_captures_ = o->IsFixedArray();
944  if (has_named_captures_) {
945  capture_name_map_ = handle(FixedArray::cast(o), isolate);
946  }
947  } else {
948  has_named_captures_ = false;
949  }
950  }
951 
952  Handle<String> GetMatch() override {
953  return RegExpUtils::GenericCaptureGetter(isolate_, match_info_, 0, nullptr);
954  }
955 
956  Handle<String> GetPrefix() override {
957  const int match_start = match_info_->Capture(0);
958  return isolate_->factory()->NewSubString(subject_, 0, match_start);
959  }
960 
961  Handle<String> GetSuffix() override {
962  const int match_end = match_info_->Capture(1);
963  return isolate_->factory()->NewSubString(subject_, match_end,
964  subject_->length());
965  }
966 
967  bool HasNamedCaptures() override { return has_named_captures_; }
968 
969  int CaptureCount() override {
970  return match_info_->NumberOfCaptureRegisters() / 2;
971  }
972 
973  MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
974  Handle<Object> capture_obj = RegExpUtils::GenericCaptureGetter(
975  isolate_, match_info_, i, capture_exists);
976  return (*capture_exists) ? Object::ToString(isolate_, capture_obj)
977  : isolate_->factory()->empty_string();
978  }
979 
980  MaybeHandle<String> GetNamedCapture(Handle<String> name,
981  CaptureState* state) override {
982  DCHECK(has_named_captures_);
983  const int capture_index = LookupNamedCapture(
984  [=](String capture_name) { return capture_name->Equals(*name); },
985  *capture_name_map_);
986 
987  if (capture_index == -1) {
988  *state = INVALID;
989  return name; // Arbitrary string handle.
990  }
991 
992  DCHECK(1 <= capture_index && capture_index <= CaptureCount());
993 
994  bool capture_exists;
995  Handle<String> capture_value;
996  ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_value,
997  GetCapture(capture_index, &capture_exists),
998  String);
999 
1000  if (!capture_exists) {
1001  *state = UNMATCHED;
1002  return isolate_->factory()->empty_string();
1003  } else {
1004  *state = MATCHED;
1005  return capture_value;
1006  }
1007  }
1008 
1009  private:
1010  Isolate* isolate_;
1011  Handle<String> subject_;
1012  Handle<RegExpMatchInfo> match_info_;
1013 
1014  bool has_named_captures_;
1015  Handle<FixedArray> capture_name_map_;
1016 };
1017 
1018 class VectorBackedMatch : public String::Match {
1019  public:
1020  VectorBackedMatch(Isolate* isolate, Handle<String> subject,
1021  Handle<String> match, int match_position,
1022  ZoneVector<Handle<Object>>* captures,
1023  Handle<Object> groups_obj)
1024  : isolate_(isolate),
1025  match_(match),
1026  match_position_(match_position),
1027  captures_(captures) {
1028  subject_ = String::Flatten(isolate, subject);
1029 
1030  DCHECK(groups_obj->IsUndefined(isolate) || groups_obj->IsJSReceiver());
1031  has_named_captures_ = !groups_obj->IsUndefined(isolate);
1032  if (has_named_captures_) groups_obj_ = Handle<JSReceiver>::cast(groups_obj);
1033  }
1034 
1035  Handle<String> GetMatch() override { return match_; }
1036 
1037  Handle<String> GetPrefix() override {
1038  return isolate_->factory()->NewSubString(subject_, 0, match_position_);
1039  }
1040 
1041  Handle<String> GetSuffix() override {
1042  const int match_end_position = match_position_ + match_->length();
1043  return isolate_->factory()->NewSubString(subject_, match_end_position,
1044  subject_->length());
1045  }
1046 
1047  bool HasNamedCaptures() override { return has_named_captures_; }
1048 
1049  int CaptureCount() override { return static_cast<int>(captures_->size()); }
1050 
1051  MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
1052  Handle<Object> capture_obj = captures_->at(i);
1053  if (capture_obj->IsUndefined(isolate_)) {
1054  *capture_exists = false;
1055  return isolate_->factory()->empty_string();
1056  }
1057  *capture_exists = true;
1058  return Object::ToString(isolate_, capture_obj);
1059  }
1060 
1061  MaybeHandle<String> GetNamedCapture(Handle<String> name,
1062  CaptureState* state) override {
1063  DCHECK(has_named_captures_);
1064 
1065  Maybe<bool> maybe_capture_exists =
1066  JSReceiver::HasProperty(groups_obj_, name);
1067  if (maybe_capture_exists.IsNothing()) return MaybeHandle<String>();
1068 
1069  if (!maybe_capture_exists.FromJust()) {
1070  *state = INVALID;
1071  return name; // Arbitrary string handle.
1072  }
1073 
1074  Handle<Object> capture_obj;
1075  ASSIGN_RETURN_ON_EXCEPTION(isolate_, capture_obj,
1076  Object::GetProperty(isolate_, groups_obj_, name),
1077  String);
1078  if (capture_obj->IsUndefined(isolate_)) {
1079  *state = UNMATCHED;
1080  return isolate_->factory()->empty_string();
1081  } else {
1082  *state = MATCHED;
1083  return Object::ToString(isolate_, capture_obj);
1084  }
1085  }
1086 
1087  private:
1088  Isolate* isolate_;
1089  Handle<String> subject_;
1090  Handle<String> match_;
1091  const int match_position_;
1092  ZoneVector<Handle<Object>>* captures_;
1093 
1094  bool has_named_captures_;
1095  Handle<JSReceiver> groups_obj_;
1096 };
1097 
1098 // Create the groups object (see also the RegExp result creation in
1099 // RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo).
1100 Handle<JSObject> ConstructNamedCaptureGroupsObject(
1101  Isolate* isolate, Handle<FixedArray> capture_map,
1102  const std::function<Object*(int)>& f_get_capture) {
1103  Handle<JSObject> groups = isolate->factory()->NewJSObjectWithNullProto();
1104 
1105  const int capture_count = capture_map->length() >> 1;
1106  for (int i = 0; i < capture_count; i++) {
1107  const int name_ix = i * 2;
1108  const int index_ix = i * 2 + 1;
1109 
1110  Handle<String> capture_name(String::cast(capture_map->get(name_ix)),
1111  isolate);
1112  const int capture_ix = Smi::ToInt(capture_map->get(index_ix));
1113  DCHECK(1 <= capture_ix && capture_ix <= capture_count);
1114 
1115  Handle<Object> capture_value(f_get_capture(capture_ix), isolate);
1116  DCHECK(capture_value->IsUndefined(isolate) || capture_value->IsString());
1117 
1118  JSObject::AddProperty(isolate, groups, capture_name, capture_value, NONE);
1119  }
1120 
1121  return groups;
1122 }
1123 
1124 // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
1125 // separate last match info. See comment on that function.
1126 template <bool has_capture>
1127 static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
1128  Handle<JSRegExp> regexp,
1129  Handle<RegExpMatchInfo> last_match_array,
1130  Handle<JSArray> result_array) {
1131  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1132  DCHECK_NE(has_capture, regexp->CaptureCount() == 0);
1133  DCHECK(subject->IsFlat());
1134 
1135  int capture_count = regexp->CaptureCount();
1136  int subject_length = subject->length();
1137 
1138  static const int kMinLengthToCache = 0x1000;
1139 
1140  if (subject_length > kMinLengthToCache) {
1141  FixedArray last_match_cache;
1142  Object* cached_answer = RegExpResultsCache::Lookup(
1143  isolate->heap(), *subject, regexp->data(), &last_match_cache,
1144  RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1145  if (cached_answer->IsFixedArray()) {
1146  int capture_registers = (capture_count + 1) * 2;
1147  int32_t* last_match = NewArray<int32_t>(capture_registers);
1148  for (int i = 0; i < capture_registers; i++) {
1149  last_match[i] = Smi::ToInt(last_match_cache->get(i));
1150  }
1151  Handle<FixedArray> cached_fixed_array =
1152  Handle<FixedArray>(FixedArray::cast(cached_answer), isolate);
1153  // The cache FixedArray is a COW-array and we need to return a copy.
1154  Handle<FixedArray> copied_fixed_array =
1155  isolate->factory()->CopyFixedArrayWithMap(
1156  cached_fixed_array, isolate->factory()->fixed_array_map());
1157  JSArray::SetContent(result_array, copied_fixed_array);
1158  RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1159  capture_count, last_match);
1160  DeleteArray(last_match);
1161  return *result_array;
1162  }
1163  }
1164 
1165  RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
1166  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1167 
1168  // Ensured in Runtime_RegExpExecMultiple.
1169  DCHECK(result_array->HasObjectElements());
1170  Handle<FixedArray> result_elements(FixedArray::cast(result_array->elements()),
1171  isolate);
1172  if (result_elements->length() < 16) {
1173  result_elements = isolate->factory()->NewFixedArrayWithHoles(16);
1174  }
1175 
1176  FixedArrayBuilder builder(result_elements);
1177 
1178  // Position to search from.
1179  int match_start = -1;
1180  int match_end = 0;
1181  bool first = true;
1182 
1183  // Two smis before and after the match, for very long strings.
1184  static const int kMaxBuilderEntriesPerRegExpMatch = 5;
1185 
1186  while (true) {
1187  int32_t* current_match = global_cache.FetchNext();
1188  if (current_match == nullptr) break;
1189  match_start = current_match[0];
1190  builder.EnsureCapacity(isolate, kMaxBuilderEntriesPerRegExpMatch);
1191  if (match_end < match_start) {
1192  ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1193  match_start);
1194  }
1195  match_end = current_match[1];
1196  {
1197  // Avoid accumulating new handles inside loop.
1198  HandleScope temp_scope(isolate);
1199  Handle<String> match;
1200  if (!first) {
1201  match = isolate->factory()->NewProperSubString(subject, match_start,
1202  match_end);
1203  } else {
1204  match =
1205  isolate->factory()->NewSubString(subject, match_start, match_end);
1206  first = false;
1207  }
1208 
1209  if (has_capture) {
1210  // Arguments array to replace function is match, captures, index and
1211  // subject, i.e., 3 + capture count in total. If the RegExp contains
1212  // named captures, they are also passed as the last argument.
1213 
1214  Handle<Object> maybe_capture_map(regexp->CaptureNameMap(), isolate);
1215  const bool has_named_captures = maybe_capture_map->IsFixedArray();
1216 
1217  const int argc =
1218  has_named_captures ? 4 + capture_count : 3 + capture_count;
1219 
1220  Handle<FixedArray> elements = isolate->factory()->NewFixedArray(argc);
1221  int cursor = 0;
1222 
1223  elements->set(cursor++, *match);
1224  for (int i = 1; i <= capture_count; i++) {
1225  int start = current_match[i * 2];
1226  if (start >= 0) {
1227  int end = current_match[i * 2 + 1];
1228  DCHECK(start <= end);
1229  Handle<String> substring =
1230  isolate->factory()->NewSubString(subject, start, end);
1231  elements->set(cursor++, *substring);
1232  } else {
1233  DCHECK_GT(0, current_match[i * 2 + 1]);
1234  elements->set(cursor++, ReadOnlyRoots(isolate).undefined_value());
1235  }
1236  }
1237 
1238  elements->set(cursor++, Smi::FromInt(match_start));
1239  elements->set(cursor++, *subject);
1240 
1241  if (has_named_captures) {
1242  Handle<FixedArray> capture_map =
1243  Handle<FixedArray>::cast(maybe_capture_map);
1244  Handle<JSObject> groups = ConstructNamedCaptureGroupsObject(
1245  isolate, capture_map, [=](int ix) { return elements->get(ix); });
1246  elements->set(cursor++, *groups);
1247  }
1248 
1249  DCHECK_EQ(cursor, argc);
1250  builder.Add(*isolate->factory()->NewJSArrayWithElements(elements));
1251  } else {
1252  builder.Add(*match);
1253  }
1254  }
1255  }
1256 
1257  if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
1258 
1259  if (match_start >= 0) {
1260  // Finished matching, with at least one match.
1261  if (match_end < subject_length) {
1262  ReplacementStringBuilder::AddSubjectSlice(&builder, match_end,
1263  subject_length);
1264  }
1265 
1266  RegExpImpl::SetLastMatchInfo(isolate, last_match_array, subject,
1267  capture_count,
1268  global_cache.LastSuccessfulMatch());
1269 
1270  if (subject_length > kMinLengthToCache) {
1271  // Store the last successful match into the array for caching.
1272  // TODO(yangguo): do not expose last match to JS and simplify caching.
1273  int capture_registers = (capture_count + 1) * 2;
1274  Handle<FixedArray> last_match_cache =
1275  isolate->factory()->NewFixedArray(capture_registers);
1276  int32_t* last_match = global_cache.LastSuccessfulMatch();
1277  for (int i = 0; i < capture_registers; i++) {
1278  last_match_cache->set(i, Smi::FromInt(last_match[i]));
1279  }
1280  Handle<FixedArray> result_fixed_array =
1281  FixedArray::ShrinkOrEmpty(isolate, builder.array(), builder.length());
1282  // Cache the result and copy the FixedArray into a COW array.
1283  Handle<FixedArray> copied_fixed_array =
1284  isolate->factory()->CopyFixedArrayWithMap(
1285  result_fixed_array, isolate->factory()->fixed_array_map());
1286  RegExpResultsCache::Enter(
1287  isolate, subject, handle(regexp->data(), isolate), copied_fixed_array,
1288  last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
1289  }
1290  return *builder.ToJSArray(result_array);
1291  } else {
1292  return ReadOnlyRoots(isolate).null_value(); // No matches at all.
1293  }
1294 }
1295 
1296 // Legacy implementation of RegExp.prototype[Symbol.replace] which
1297 // doesn't properly call the underlying exec method.
1298 V8_WARN_UNUSED_RESULT MaybeHandle<String> RegExpReplace(
1299  Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> string,
1300  Handle<Object> replace_obj) {
1301  // Functional fast-paths are dispatched directly by replace builtin.
1302  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1303  DCHECK(!replace_obj->IsCallable());
1304 
1305  Factory* factory = isolate->factory();
1306 
1307  const int flags = regexp->GetFlags();
1308  const bool global = (flags & JSRegExp::kGlobal) != 0;
1309  const bool sticky = (flags & JSRegExp::kSticky) != 0;
1310 
1311  Handle<String> replace;
1312  ASSIGN_RETURN_ON_EXCEPTION(isolate, replace,
1313  Object::ToString(isolate, replace_obj), String);
1314  replace = String::Flatten(isolate, replace);
1315 
1316  Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1317 
1318  if (!global) {
1319  // Non-global regexp search, string replace.
1320 
1321  uint32_t last_index = 0;
1322  if (sticky) {
1323  Handle<Object> last_index_obj(regexp->last_index(), isolate);
1324  ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
1325  Object::ToLength(isolate, last_index_obj),
1326  String);
1327  last_index = PositiveNumberToUint32(*last_index_obj);
1328  }
1329 
1330  Handle<Object> match_indices_obj(ReadOnlyRoots(isolate).null_value(),
1331  isolate);
1332 
1333  // A lastIndex exceeding the string length always always returns null
1334  // (signalling failure) in RegExpBuiltinExec, thus we can skip the call.
1335  if (last_index <= static_cast<uint32_t>(string->length())) {
1336  ASSIGN_RETURN_ON_EXCEPTION(isolate, match_indices_obj,
1337  RegExpImpl::Exec(isolate, regexp, string,
1338  last_index, last_match_info),
1339  String);
1340  }
1341 
1342  if (match_indices_obj->IsNull(isolate)) {
1343  if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1344  return string;
1345  }
1346 
1347  auto match_indices = Handle<RegExpMatchInfo>::cast(match_indices_obj);
1348 
1349  const int start_index = match_indices->Capture(0);
1350  const int end_index = match_indices->Capture(1);
1351 
1352  if (sticky)
1353  regexp->set_last_index(Smi::FromInt(end_index), SKIP_WRITE_BARRIER);
1354 
1355  IncrementalStringBuilder builder(isolate);
1356  builder.AppendString(factory->NewSubString(string, 0, start_index));
1357 
1358  if (replace->length() > 0) {
1359  MatchInfoBackedMatch m(isolate, regexp, string, match_indices);
1360  Handle<String> replacement;
1361  ASSIGN_RETURN_ON_EXCEPTION(isolate, replacement,
1362  String::GetSubstitution(isolate, &m, replace),
1363  String);
1364  builder.AppendString(replacement);
1365  }
1366 
1367  builder.AppendString(
1368  factory->NewSubString(string, end_index, string->length()));
1369  return builder.Finish();
1370  } else {
1371  // Global regexp search, string replace.
1372  DCHECK(global);
1373  RETURN_ON_EXCEPTION(isolate, RegExpUtils::SetLastIndex(isolate, regexp, 0),
1374  String);
1375 
1376  if (replace->length() == 0) {
1377  if (string->HasOnlyOneByteChars()) {
1378  Object* result =
1379  StringReplaceGlobalRegExpWithEmptyString<SeqOneByteString>(
1380  isolate, string, regexp, last_match_info);
1381  return handle(String::cast(result), isolate);
1382  } else {
1383  Object* result =
1384  StringReplaceGlobalRegExpWithEmptyString<SeqTwoByteString>(
1385  isolate, string, regexp, last_match_info);
1386  return handle(String::cast(result), isolate);
1387  }
1388  }
1389 
1390  Object* result = StringReplaceGlobalRegExpWithString(
1391  isolate, string, regexp, replace, last_match_info);
1392  if (result->IsString()) {
1393  return handle(String::cast(result), isolate);
1394  } else {
1395  return MaybeHandle<String>();
1396  }
1397  }
1398 
1399  UNREACHABLE();
1400 }
1401 
1402 } // namespace
1403 
1404 // This is only called for StringReplaceGlobalRegExpWithFunction.
1405 RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
1406  HandleScope handles(isolate);
1407  DCHECK_EQ(4, args.length());
1408 
1409  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1410  CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
1411  CONVERT_ARG_HANDLE_CHECKED(RegExpMatchInfo, last_match_info, 2);
1412  CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
1413  CHECK(result_array->HasObjectElements());
1414 
1415  subject = String::Flatten(isolate, subject);
1416  CHECK(regexp->GetFlags() & JSRegExp::kGlobal);
1417 
1418  if (regexp->CaptureCount() == 0) {
1419  return SearchRegExpMultiple<false>(isolate, subject, regexp,
1420  last_match_info, result_array);
1421  } else {
1422  return SearchRegExpMultiple<true>(isolate, subject, regexp, last_match_info,
1423  result_array);
1424  }
1425 }
1426 
1427 RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
1428  HandleScope scope(isolate);
1429  DCHECK_EQ(3, args.length());
1430  CONVERT_ARG_HANDLE_CHECKED(String, subject, 0);
1431  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1);
1432  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, replace_obj, 2);
1433 
1434  DCHECK(RegExpUtils::IsUnmodifiedRegExp(isolate, regexp));
1435  DCHECK(replace_obj->map()->is_callable());
1436 
1437  Factory* factory = isolate->factory();
1438  Handle<RegExpMatchInfo> last_match_info = isolate->regexp_last_match_info();
1439 
1440  const int flags = regexp->GetFlags();
1441  DCHECK_EQ(flags & JSRegExp::kGlobal, 0);
1442 
1443  // TODO(jgruber): This should be an easy port to CSA with massive payback.
1444 
1445  const bool sticky = (flags & JSRegExp::kSticky) != 0;
1446  uint32_t last_index = 0;
1447  if (sticky) {
1448  Handle<Object> last_index_obj(regexp->last_index(), isolate);
1449  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1450  isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1451  last_index = PositiveNumberToUint32(*last_index_obj);
1452 
1453  if (last_index > static_cast<uint32_t>(subject->length())) last_index = 0;
1454  }
1455 
1456  Handle<Object> match_indices_obj;
1457  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1458  isolate, match_indices_obj,
1459  RegExpImpl::Exec(isolate, regexp, subject, last_index, last_match_info));
1460 
1461  if (match_indices_obj->IsNull(isolate)) {
1462  if (sticky) regexp->set_last_index(Smi::kZero, SKIP_WRITE_BARRIER);
1463  return *subject;
1464  }
1465 
1466  Handle<RegExpMatchInfo> match_indices =
1467  Handle<RegExpMatchInfo>::cast(match_indices_obj);
1468 
1469  const int index = match_indices->Capture(0);
1470  const int end_of_match = match_indices->Capture(1);
1471 
1472  if (sticky)
1473  regexp->set_last_index(Smi::FromInt(end_of_match), SKIP_WRITE_BARRIER);
1474 
1475  IncrementalStringBuilder builder(isolate);
1476  builder.AppendString(factory->NewSubString(subject, 0, index));
1477 
1478  // Compute the parameter list consisting of the match, captures, index,
1479  // and subject for the replace function invocation. If the RegExp contains
1480  // named captures, they are also passed as the last argument.
1481 
1482  // The number of captures plus one for the match.
1483  const int m = match_indices->NumberOfCaptureRegisters() / 2;
1484 
1485  bool has_named_captures = false;
1486  Handle<FixedArray> capture_map;
1487  if (m > 1) {
1488  // The existence of capture groups implies IRREGEXP kind.
1489  DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
1490 
1491  Object* maybe_capture_map = regexp->CaptureNameMap();
1492  if (maybe_capture_map->IsFixedArray()) {
1493  has_named_captures = true;
1494  capture_map = handle(FixedArray::cast(maybe_capture_map), isolate);
1495  }
1496  }
1497 
1498  const uint32_t argc = GetArgcForReplaceCallable(m, has_named_captures);
1499  if (argc == static_cast<uint32_t>(-1)) {
1500  THROW_NEW_ERROR_RETURN_FAILURE(
1501  isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1502  }
1503  ScopedVector<Handle<Object>> argv(argc);
1504 
1505  int cursor = 0;
1506  for (int j = 0; j < m; j++) {
1507  bool ok;
1508  Handle<String> capture =
1509  RegExpUtils::GenericCaptureGetter(isolate, match_indices, j, &ok);
1510  if (ok) {
1511  argv[cursor++] = capture;
1512  } else {
1513  argv[cursor++] = factory->undefined_value();
1514  }
1515  }
1516 
1517  argv[cursor++] = handle(Smi::FromInt(index), isolate);
1518  argv[cursor++] = subject;
1519 
1520  if (has_named_captures) {
1521  argv[cursor++] = ConstructNamedCaptureGroupsObject(
1522  isolate, capture_map, [&argv](int ix) { return *argv[ix]; });
1523  }
1524 
1525  DCHECK_EQ(cursor, argc);
1526 
1527  Handle<Object> replacement_obj;
1528  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1529  isolate, replacement_obj,
1530  Execution::Call(isolate, replace_obj, factory->undefined_value(), argc,
1531  argv.start()));
1532 
1533  Handle<String> replacement;
1534  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1535  isolate, replacement, Object::ToString(isolate, replacement_obj));
1536 
1537  builder.AppendString(replacement);
1538  builder.AppendString(
1539  factory->NewSubString(subject, end_of_match, subject->length()));
1540 
1541  RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1542 }
1543 
1544 namespace {
1545 
1546 V8_WARN_UNUSED_RESULT MaybeHandle<Object> ToUint32(Isolate* isolate,
1547  Handle<Object> object,
1548  uint32_t* out) {
1549  if (object->IsUndefined(isolate)) {
1550  *out = kMaxUInt32;
1551  return object;
1552  }
1553 
1554  Handle<Object> number;
1555  ASSIGN_RETURN_ON_EXCEPTION(isolate, number, Object::ToNumber(isolate, object),
1556  Object);
1557  *out = NumberToUint32(*number);
1558  return object;
1559 }
1560 
1561 Handle<JSArray> NewJSArrayWithElements(Isolate* isolate,
1562  Handle<FixedArray> elems,
1563  int num_elems) {
1564  return isolate->factory()->NewJSArrayWithElements(
1565  FixedArray::ShrinkOrEmpty(isolate, elems, num_elems));
1566 }
1567 
1568 } // namespace
1569 
1570 // Slow path for:
1571 // ES#sec-regexp.prototype-@@replace
1572 // RegExp.prototype [ @@split ] ( string, limit )
1573 RUNTIME_FUNCTION(Runtime_RegExpSplit) {
1574  HandleScope scope(isolate);
1575  DCHECK_EQ(3, args.length());
1576 
1577  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1578  CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1579  CONVERT_ARG_HANDLE_CHECKED(Object, limit_obj, 2);
1580 
1581  Factory* factory = isolate->factory();
1582 
1583  Handle<JSFunction> regexp_fun = isolate->regexp_function();
1584  Handle<Object> ctor;
1585  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1586  isolate, ctor, Object::SpeciesConstructor(isolate, recv, regexp_fun));
1587 
1588  Handle<Object> flags_obj;
1589  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1590  isolate, flags_obj,
1591  JSObject::GetProperty(isolate, recv, factory->flags_string()));
1592 
1593  Handle<String> flags;
1594  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, flags,
1595  Object::ToString(isolate, flags_obj));
1596 
1597  Handle<String> u_str = factory->LookupSingleCharacterStringFromCode('u');
1598  const bool unicode = (String::IndexOf(isolate, flags, u_str, 0) >= 0);
1599 
1600  Handle<String> y_str = factory->LookupSingleCharacterStringFromCode('y');
1601  const bool sticky = (String::IndexOf(isolate, flags, y_str, 0) >= 0);
1602 
1603  Handle<String> new_flags = flags;
1604  if (!sticky) {
1605  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, new_flags,
1606  factory->NewConsString(flags, y_str));
1607  }
1608 
1609  Handle<JSReceiver> splitter;
1610  {
1611  const int argc = 2;
1612 
1613  ScopedVector<Handle<Object>> argv(argc);
1614  argv[0] = recv;
1615  argv[1] = new_flags;
1616 
1617  Handle<Object> splitter_obj;
1618  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1619  isolate, splitter_obj,
1620  Execution::New(isolate, ctor, argc, argv.start()));
1621 
1622  splitter = Handle<JSReceiver>::cast(splitter_obj);
1623  }
1624 
1625  uint32_t limit;
1626  RETURN_FAILURE_ON_EXCEPTION(isolate, ToUint32(isolate, limit_obj, &limit));
1627 
1628  const uint32_t length = string->length();
1629 
1630  if (limit == 0) return *factory->NewJSArray(0);
1631 
1632  if (length == 0) {
1633  Handle<Object> result;
1634  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1635  isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1636  factory->undefined_value()));
1637 
1638  if (!result->IsNull(isolate)) return *factory->NewJSArray(0);
1639 
1640  Handle<FixedArray> elems = factory->NewUninitializedFixedArray(1);
1641  elems->set(0, *string);
1642  return *factory->NewJSArrayWithElements(elems);
1643  }
1644 
1645  static const int kInitialArraySize = 8;
1646  Handle<FixedArray> elems = factory->NewFixedArrayWithHoles(kInitialArraySize);
1647  uint32_t num_elems = 0;
1648 
1649  uint32_t string_index = 0;
1650  uint32_t prev_string_index = 0;
1651  while (string_index < length) {
1652  RETURN_FAILURE_ON_EXCEPTION(
1653  isolate, RegExpUtils::SetLastIndex(isolate, splitter, string_index));
1654 
1655  Handle<Object> result;
1656  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1657  isolate, result, RegExpUtils::RegExpExec(isolate, splitter, string,
1658  factory->undefined_value()));
1659 
1660  if (result->IsNull(isolate)) {
1661  string_index = static_cast<uint32_t>(
1662  RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1663  continue;
1664  }
1665 
1666  Handle<Object> last_index_obj;
1667  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1668  isolate, last_index_obj, RegExpUtils::GetLastIndex(isolate, splitter));
1669 
1670  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1671  isolate, last_index_obj, Object::ToLength(isolate, last_index_obj));
1672 
1673  const uint32_t end =
1674  std::min(PositiveNumberToUint32(*last_index_obj), length);
1675  if (end == prev_string_index) {
1676  string_index = static_cast<uint32_t>(
1677  RegExpUtils::AdvanceStringIndex(string, string_index, unicode));
1678  continue;
1679  }
1680 
1681  {
1682  Handle<String> substr =
1683  factory->NewSubString(string, prev_string_index, string_index);
1684  elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1685  if (num_elems == limit) {
1686  return *NewJSArrayWithElements(isolate, elems, num_elems);
1687  }
1688  }
1689 
1690  prev_string_index = end;
1691 
1692  Handle<Object> num_captures_obj;
1693  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1694  isolate, num_captures_obj,
1695  Object::GetProperty(isolate, result,
1696  isolate->factory()->length_string()));
1697 
1698  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1699  isolate, num_captures_obj, Object::ToLength(isolate, num_captures_obj));
1700  const uint32_t num_captures = PositiveNumberToUint32(*num_captures_obj);
1701 
1702  for (uint32_t i = 1; i < num_captures; i++) {
1703  Handle<Object> capture;
1704  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1705  isolate, capture, Object::GetElement(isolate, result, i));
1706  elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, capture);
1707  if (num_elems == limit) {
1708  return *NewJSArrayWithElements(isolate, elems, num_elems);
1709  }
1710  }
1711 
1712  string_index = prev_string_index;
1713  }
1714 
1715  {
1716  Handle<String> substr =
1717  factory->NewSubString(string, prev_string_index, length);
1718  elems = FixedArray::SetAndGrow(isolate, elems, num_elems++, substr);
1719  }
1720 
1721  return *NewJSArrayWithElements(isolate, elems, num_elems);
1722 }
1723 
1724 // Slow path for:
1725 // ES#sec-regexp.prototype-@@replace
1726 // RegExp.prototype [ @@replace ] ( string, replaceValue )
1727 RUNTIME_FUNCTION(Runtime_RegExpReplace) {
1728  HandleScope scope(isolate);
1729  DCHECK_EQ(3, args.length());
1730 
1731  CONVERT_ARG_HANDLE_CHECKED(JSReceiver, recv, 0);
1732  CONVERT_ARG_HANDLE_CHECKED(String, string, 1);
1733  Handle<Object> replace_obj = args.at(2);
1734 
1735  Factory* factory = isolate->factory();
1736 
1737  string = String::Flatten(isolate, string);
1738 
1739  const bool functional_replace = replace_obj->IsCallable();
1740 
1741  // Fast-path for unmodified JSRegExps (and non-functional replace).
1742  if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
1743  // We should never get here with functional replace because unmodified
1744  // regexp and functional replace should be fully handled in CSA code.
1745  CHECK(!functional_replace);
1746  RETURN_RESULT_OR_FAILURE(
1747  isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
1748  replace_obj));
1749  }
1750 
1751  const uint32_t length = string->length();
1752 
1753  Handle<String> replace;
1754  if (!functional_replace) {
1755  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, replace,
1756  Object::ToString(isolate, replace_obj));
1757  }
1758 
1759  Handle<Object> global_obj;
1760  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1761  isolate, global_obj,
1762  JSReceiver::GetProperty(isolate, recv, factory->global_string()));
1763  const bool global = global_obj->BooleanValue(isolate);
1764 
1765  bool unicode = false;
1766  if (global) {
1767  Handle<Object> unicode_obj;
1768  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1769  isolate, unicode_obj,
1770  JSReceiver::GetProperty(isolate, recv, factory->unicode_string()));
1771  unicode = unicode_obj->BooleanValue(isolate);
1772 
1773  RETURN_FAILURE_ON_EXCEPTION(isolate,
1774  RegExpUtils::SetLastIndex(isolate, recv, 0));
1775  }
1776 
1777  Zone zone(isolate->allocator(), ZONE_NAME);
1778  ZoneVector<Handle<Object>> results(&zone);
1779 
1780  while (true) {
1781  Handle<Object> result;
1782  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1783  isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
1784  factory->undefined_value()));
1785 
1786  if (result->IsNull(isolate)) break;
1787 
1788  results.push_back(result);
1789  if (!global) break;
1790 
1791  Handle<Object> match_obj;
1792  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1793  Object::GetElement(isolate, result, 0));
1794 
1795  Handle<String> match;
1796  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1797  Object::ToString(isolate, match_obj));
1798 
1799  if (match->length() == 0) {
1800  RETURN_FAILURE_ON_EXCEPTION(isolate, RegExpUtils::SetAdvancedStringIndex(
1801  isolate, recv, string, unicode));
1802  }
1803  }
1804 
1805  // TODO(jgruber): Look into ReplacementStringBuilder instead.
1806  IncrementalStringBuilder builder(isolate);
1807  uint32_t next_source_position = 0;
1808 
1809  for (const auto& result : results) {
1810  HandleScope handle_scope(isolate);
1811  Handle<Object> captures_length_obj;
1812  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1813  isolate, captures_length_obj,
1814  Object::GetProperty(isolate, result, factory->length_string()));
1815 
1816  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1817  isolate, captures_length_obj,
1818  Object::ToLength(isolate, captures_length_obj));
1819  const uint32_t captures_length =
1820  PositiveNumberToUint32(*captures_length_obj);
1821 
1822  Handle<Object> match_obj;
1823  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
1824  Object::GetElement(isolate, result, 0));
1825 
1826  Handle<String> match;
1827  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
1828  Object::ToString(isolate, match_obj));
1829 
1830  const int match_length = match->length();
1831 
1832  Handle<Object> position_obj;
1833  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1834  isolate, position_obj,
1835  Object::GetProperty(isolate, result, factory->index_string()));
1836 
1837  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1838  isolate, position_obj, Object::ToInteger(isolate, position_obj));
1839  const uint32_t position =
1840  std::min(PositiveNumberToUint32(*position_obj), length);
1841 
1842  // Do not reserve capacity since captures_length is user-controlled.
1843  ZoneVector<Handle<Object>> captures(&zone);
1844 
1845  for (uint32_t n = 0; n < captures_length; n++) {
1846  Handle<Object> capture;
1847  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1848  isolate, capture, Object::GetElement(isolate, result, n));
1849 
1850  if (!capture->IsUndefined(isolate)) {
1851  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, capture,
1852  Object::ToString(isolate, capture));
1853  }
1854  captures.push_back(capture);
1855  }
1856 
1857  Handle<Object> groups_obj = isolate->factory()->undefined_value();
1858  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1859  isolate, groups_obj,
1860  Object::GetProperty(isolate, result, factory->groups_string()));
1861 
1862  const bool has_named_captures = !groups_obj->IsUndefined(isolate);
1863 
1864  Handle<String> replacement;
1865  if (functional_replace) {
1866  const uint32_t argc =
1867  GetArgcForReplaceCallable(captures_length, has_named_captures);
1868  if (argc == static_cast<uint32_t>(-1)) {
1869  THROW_NEW_ERROR_RETURN_FAILURE(
1870  isolate, NewRangeError(MessageTemplate::kTooManyArguments));
1871  }
1872 
1873  ScopedVector<Handle<Object>> argv(argc);
1874 
1875  int cursor = 0;
1876  for (uint32_t j = 0; j < captures_length; j++) {
1877  argv[cursor++] = captures[j];
1878  }
1879 
1880  argv[cursor++] = handle(Smi::FromInt(position), isolate);
1881  argv[cursor++] = string;
1882  if (has_named_captures) argv[cursor++] = groups_obj;
1883 
1884  DCHECK_EQ(cursor, argc);
1885 
1886  Handle<Object> replacement_obj;
1887  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1888  isolate, replacement_obj,
1889  Execution::Call(isolate, replace_obj, factory->undefined_value(),
1890  argc, argv.start()));
1891 
1892  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1893  isolate, replacement, Object::ToString(isolate, replacement_obj));
1894  } else {
1895  DCHECK(!functional_replace);
1896  if (!groups_obj->IsUndefined(isolate)) {
1897  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1898  isolate, groups_obj, JSReceiver::ToObject(isolate, groups_obj));
1899  }
1900  VectorBackedMatch m(isolate, string, match, position, &captures,
1901  groups_obj);
1902  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
1903  isolate, replacement, String::GetSubstitution(isolate, &m, replace));
1904  }
1905 
1906  if (position >= next_source_position) {
1907  builder.AppendString(
1908  factory->NewSubString(string, next_source_position, position));
1909  builder.AppendString(replacement);
1910 
1911  next_source_position = position + match_length;
1912  }
1913  }
1914 
1915  if (next_source_position < length) {
1916  builder.AppendString(
1917  factory->NewSubString(string, next_source_position, length));
1918  }
1919 
1920  RETURN_RESULT_OR_FAILURE(isolate, builder.Finish());
1921 }
1922 
1923 RUNTIME_FUNCTION(Runtime_RegExpInitializeAndCompile) {
1924  HandleScope scope(isolate);
1925  DCHECK_EQ(3, args.length());
1926  // TODO(pwong): To follow the spec more closely and simplify calling code,
1927  // this could handle the canonicalization of pattern and flags. See
1928  // https://tc39.github.io/ecma262/#sec-regexpinitialize
1929  CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
1930  CONVERT_ARG_HANDLE_CHECKED(String, source, 1);
1931  CONVERT_ARG_HANDLE_CHECKED(String, flags, 2);
1932 
1933  RETURN_FAILURE_ON_EXCEPTION(isolate,
1934  JSRegExp::Initialize(regexp, source, flags));
1935 
1936  return *regexp;
1937 }
1938 
1939 RUNTIME_FUNCTION(Runtime_IsRegExp) {
1940  SealHandleScope shs(isolate);
1941  DCHECK_EQ(1, args.length());
1942  CONVERT_ARG_CHECKED(Object, obj, 0);
1943  return isolate->heap()->ToBoolean(obj->IsJSRegExp());
1944 }
1945 
1946 } // namespace internal
1947 } // namespace v8
Definition: libplatform.h:13