5 #include "src/regexp/jsregexp.h" 10 #include "src/base/platform/platform.h" 11 #include "src/code-tracer.h" 12 #include "src/compilation-cache.h" 13 #include "src/elements.h" 14 #include "src/execution.h" 15 #include "src/heap/factory.h" 16 #include "src/isolate-inl.h" 17 #include "src/message-template.h" 18 #include "src/ostreams.h" 19 #include "src/regexp/interpreter-irregexp.h" 20 #include "src/regexp/jsregexp-inl.h" 21 #include "src/regexp/regexp-macro-assembler-irregexp.h" 22 #include "src/regexp/regexp-macro-assembler-tracer.h" 23 #include "src/regexp/regexp-macro-assembler.h" 24 #include "src/regexp/regexp-parser.h" 25 #include "src/regexp/regexp-stack.h" 26 #include "src/runtime/runtime.h" 27 #include "src/splay-tree-inl.h" 28 #include "src/string-search.h" 29 #include "src/unicode-decoder.h" 30 #include "src/unicode-inl.h" 32 #ifdef V8_INTL_SUPPORT 33 #include "unicode/uniset.h" 34 #include "unicode/utypes.h" 35 #endif // V8_INTL_SUPPORT 37 #ifndef V8_INTERPRETED_REGEXP 38 #if V8_TARGET_ARCH_IA32 39 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h" 40 #elif V8_TARGET_ARCH_X64 41 #include "src/regexp/x64/regexp-macro-assembler-x64.h" 42 #elif V8_TARGET_ARCH_ARM64 43 #include "src/regexp/arm64/regexp-macro-assembler-arm64.h" 44 #elif V8_TARGET_ARCH_ARM 45 #include "src/regexp/arm/regexp-macro-assembler-arm.h" 46 #elif V8_TARGET_ARCH_PPC 47 #include "src/regexp/ppc/regexp-macro-assembler-ppc.h" 48 #elif V8_TARGET_ARCH_S390 49 #include "src/regexp/s390/regexp-macro-assembler-s390.h" 50 #elif V8_TARGET_ARCH_MIPS 51 #include "src/regexp/mips/regexp-macro-assembler-mips.h" 52 #elif V8_TARGET_ARCH_MIPS64 53 #include "src/regexp/mips64/regexp-macro-assembler-mips64.h" 55 #error Unsupported target architecture. 64 static inline MaybeHandle<Object> ThrowRegExpException(
65 Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
66 Handle<String> error_text) {
67 THROW_NEW_ERROR(isolate, NewSyntaxError(MessageTemplate::kMalformedRegExp,
72 inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
73 Handle<String> error_text) {
74 USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
79 ContainedInLattice AddRange(ContainedInLattice containment,
83 DCHECK_EQ(1, ranges_length & 1);
84 DCHECK_EQ(String::kMaxCodePoint + 1, ranges[ranges_length - 1]);
85 if (containment == kLatticeUnknown)
return containment;
88 for (
int i = 0;
i < ranges_length; inside = !inside, last = ranges[
i],
i++) {
91 if (ranges[
i] <= new_range.from())
continue;
94 if (last <= new_range.from() && new_range.to() < ranges[
i]) {
95 return Combine(containment, inside ? kLatticeIn : kLatticeOut);
97 return kLatticeUnknown;
103 const int kMaxLookaheadForBoyerMoore = 8;
106 const int kPatternTooShortForBoyerMoore = 2;
110 static bool HasFewDifferentCharacters(Handle<String> pattern) {
111 int length = Min(kMaxLookaheadForBoyerMoore, pattern->length());
112 if (length <= kPatternTooShortForBoyerMoore)
return false;
113 const int kMod = 128;
114 bool character_found[kMod];
116 memset(&character_found[0], 0,
sizeof(character_found));
117 for (
int i = 0;
i < length;
i++) {
118 int ch = (pattern->Get(
i) & (kMod - 1));
119 if (!character_found[ch]) {
120 character_found[ch] =
true;
124 if (different * 3 > length)
return false;
132 MaybeHandle<Object> RegExpImpl::Compile(Isolate* isolate, Handle<JSRegExp> re,
133 Handle<String> pattern,
134 JSRegExp::Flags flags) {
135 DCHECK(pattern->IsFlat());
137 Zone zone(isolate->allocator(), ZONE_NAME);
138 CompilationCache* compilation_cache = isolate->compilation_cache();
139 MaybeHandle<FixedArray> maybe_cached =
140 compilation_cache->LookupRegExp(pattern, flags);
141 Handle<FixedArray> cached;
142 if (maybe_cached.ToHandle(&cached)) {
143 re->set_data(*cached);
147 PostponeInterruptsScope postpone(isolate);
148 RegExpCompileData parse_result;
149 FlatStringReader reader(isolate, pattern);
150 DCHECK(!isolate->has_pending_exception());
151 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
154 return ThrowRegExpException(isolate, re, pattern, parse_result.error);
157 bool has_been_compiled =
false;
159 if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) &&
160 !HasFewDifferentCharacters(pattern)) {
162 AtomCompile(isolate, re, pattern, flags, pattern);
163 has_been_compiled =
true;
164 }
else if (parse_result.tree->IsAtom() && !IsSticky(flags) &&
165 parse_result.capture_count == 0) {
166 RegExpAtom* atom = parse_result.tree->AsAtom();
167 Vector<const uc16> atom_pattern = atom->data();
168 Handle<String> atom_string;
169 ASSIGN_RETURN_ON_EXCEPTION(
170 isolate, atom_string,
171 isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
172 if (!IgnoreCase(atom->flags()) && !HasFewDifferentCharacters(atom_string)) {
173 AtomCompile(isolate, re, pattern, flags, atom_string);
174 has_been_compiled =
true;
177 if (!has_been_compiled) {
178 IrregexpInitialize(isolate, re, pattern, flags, parse_result.capture_count);
180 DCHECK(re->data()->IsFixedArray());
183 Handle<FixedArray> data(FixedArray::cast(re->data()), isolate);
184 compilation_cache->PutRegExp(pattern, flags, data);
189 MaybeHandle<Object> RegExpImpl::Exec(Isolate* isolate, Handle<JSRegExp> regexp,
190 Handle<String> subject,
int index,
191 Handle<RegExpMatchInfo> last_match_info) {
192 switch (regexp->TypeTag()) {
194 return AtomExec(isolate, regexp, subject, index, last_match_info);
195 case JSRegExp::IRREGEXP: {
196 return IrregexpExec(isolate, regexp, subject, index, last_match_info);
206 void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
207 Handle<String> pattern, JSRegExp::Flags flags,
208 Handle<String> match_pattern) {
209 isolate->factory()->SetRegExpAtomData(re, JSRegExp::ATOM, pattern, flags,
213 static void SetAtomLastCapture(Isolate* isolate,
214 Handle<RegExpMatchInfo> last_match_info,
215 String subject,
int from,
int to) {
216 SealHandleScope shs(isolate);
217 last_match_info->SetNumberOfCaptureRegisters(2);
218 last_match_info->SetLastSubject(subject);
219 last_match_info->SetLastInput(subject);
220 last_match_info->SetCapture(0, from);
221 last_match_info->SetCapture(1, to);
224 int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
225 Handle<String> subject,
int index, int32_t* output,
228 DCHECK_LE(index, subject->length());
230 subject = String::Flatten(isolate, subject);
231 DisallowHeapAllocation no_gc;
233 String needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex));
234 int needle_len = needle->length();
235 DCHECK(needle->IsFlat());
236 DCHECK_LT(0, needle_len);
238 if (index + needle_len > subject->length()) {
239 return RegExpImpl::RE_FAILURE;
242 for (
int i = 0;
i < output_size;
i += 2) {
243 String::FlatContent needle_content = needle->GetFlatContent();
244 String::FlatContent subject_content = subject->GetFlatContent();
245 DCHECK(needle_content.IsFlat());
246 DCHECK(subject_content.IsFlat());
249 (needle_content.IsOneByte()
250 ? (subject_content.IsOneByte()
251 ? SearchString(isolate, subject_content.ToOneByteVector(),
252 needle_content.ToOneByteVector(), index)
253 : SearchString(isolate, subject_content.ToUC16Vector(),
254 needle_content.ToOneByteVector(), index))
255 : (subject_content.IsOneByte()
256 ? SearchString(isolate, subject_content.ToOneByteVector(),
257 needle_content.ToUC16Vector(), index)
258 : SearchString(isolate, subject_content.ToUC16Vector(),
259 needle_content.ToUC16Vector(), index)));
264 output[
i+1] = index + needle_len;
268 return output_size / 2;
271 Handle<Object> RegExpImpl::AtomExec(Isolate* isolate, Handle<JSRegExp> re,
272 Handle<String> subject,
int index,
273 Handle<RegExpMatchInfo> last_match_info) {
274 static const int kNumRegisters = 2;
275 STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize);
276 int32_t* output_registers = isolate->jsregexp_static_offsets_vector();
279 AtomExecRaw(isolate, re, subject, index, output_registers, kNumRegisters);
281 if (res == RegExpImpl::RE_FAILURE)
return isolate->factory()->null_value();
283 DCHECK_EQ(res, RegExpImpl::RE_SUCCESS);
284 SealHandleScope shs(isolate);
285 SetAtomLastCapture(isolate, last_match_info, *subject, output_registers[0],
286 output_registers[1]);
287 return last_match_info;
299 bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
300 Handle<String> sample_subject,
302 Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
303 #ifdef V8_INTERPRETED_REGEXP 304 if (compiled_code->IsByteArray())
return true;
305 #else // V8_INTERPRETED_REGEXP (RegExp native code) 306 if (compiled_code->IsCode())
return true;
308 return CompileIrregexp(isolate, re, sample_subject, is_one_byte);
311 bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
312 Handle<String> sample_subject,
315 Zone zone(isolate->allocator(), ZONE_NAME);
316 PostponeInterruptsScope postpone(isolate);
318 Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
321 DCHECK(entry->IsSmi());
322 int entry_value = Smi::ToInt(entry);
323 DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
326 JSRegExp::Flags flags = re->GetFlags();
328 Handle<String> pattern(re->Pattern(), isolate);
329 pattern = String::Flatten(isolate, pattern);
330 RegExpCompileData compile_data;
331 FlatStringReader reader(isolate, pattern);
332 if (!RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
336 USE(ThrowRegExpException(isolate, re, pattern, compile_data.error));
339 RegExpEngine::CompilationResult result =
340 RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern,
341 sample_subject, is_one_byte);
342 if (result.error_message !=
nullptr) {
344 if (FLAG_abort_on_stack_or_string_length_overflow &&
345 strncmp(result.error_message,
"Stack overflow", 15) == 0) {
346 FATAL(
"Aborting on stack overflow");
348 Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
349 CStrVector(result.error_message)).ToHandleChecked();
350 ThrowRegExpException(isolate, re, error_message);
354 Handle<FixedArray> data =
355 Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
356 data->set(JSRegExp::code_index(is_one_byte), result.code);
357 SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
358 int register_max = IrregexpMaxRegisterCount(*data);
359 if (result.num_registers > register_max) {
360 SetIrregexpMaxRegisterCount(*data, result.num_registers);
366 int RegExpImpl::IrregexpMaxRegisterCount(FixedArray re) {
368 re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value();
371 void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray re,
int value) {
372 re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
375 void RegExpImpl::SetIrregexpCaptureNameMap(FixedArray re,
376 Handle<FixedArray> value) {
377 if (value.is_null()) {
378 re->set(JSRegExp::kIrregexpCaptureNameMapIndex, Smi::kZero);
380 re->set(JSRegExp::kIrregexpCaptureNameMapIndex, *value);
384 int RegExpImpl::IrregexpNumberOfCaptures(FixedArray re) {
385 return Smi::ToInt(re->get(JSRegExp::kIrregexpCaptureCountIndex));
388 int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) {
389 return Smi::ToInt(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex));
392 ByteArray RegExpImpl::IrregexpByteCode(FixedArray re,
bool is_one_byte) {
393 return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
396 Code RegExpImpl::IrregexpNativeCode(FixedArray re,
bool is_one_byte) {
397 return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
400 void RegExpImpl::IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
401 Handle<String> pattern,
402 JSRegExp::Flags flags,
int capture_count) {
404 isolate->factory()->SetRegExpIrregexpData(re, JSRegExp::IRREGEXP, pattern,
405 flags, capture_count);
408 int RegExpImpl::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
409 Handle<String> subject) {
410 DCHECK(subject->IsFlat());
413 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
414 if (!EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte))
return -1;
416 #ifdef V8_INTERPRETED_REGEXP 421 return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) +
422 (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
423 #else // V8_INTERPRETED_REGEXP 426 return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
427 #endif // V8_INTERPRETED_REGEXP 430 int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
431 Handle<String> subject,
int index,
432 int32_t* output,
int output_size) {
433 Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
436 DCHECK_LE(index, subject->length());
437 DCHECK(subject->IsFlat());
439 bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
441 #ifndef V8_INTERPRETED_REGEXP 442 DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
444 EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
445 Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
450 NativeRegExpMacroAssembler::Result res =
451 NativeRegExpMacroAssembler::Match(code,
457 if (res != NativeRegExpMacroAssembler::RETRY) {
458 DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
459 isolate->has_pending_exception());
461 static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
463 static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
464 STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
466 return static_cast<IrregexpResult
>(res);
474 IrregexpPrepare(isolate, regexp, subject);
475 is_one_byte = subject->IsOneByteRepresentationUnderneath();
478 #else // V8_INTERPRETED_REGEXP 480 DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
483 int number_of_capture_registers =
484 (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
485 int32_t* raw_output = &output[number_of_capture_registers];
489 for (
int i = number_of_capture_registers - 1;
i >= 0;
i--) {
492 Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
495 IrregexpResult result = IrregexpInterpreter::Match(isolate,
500 if (result == RE_SUCCESS) {
502 MemCopy(output, raw_output, number_of_capture_registers *
sizeof(int32_t));
504 if (result == RE_EXCEPTION) {
505 DCHECK(!isolate->has_pending_exception());
506 isolate->StackOverflow();
509 #endif // V8_INTERPRETED_REGEXP 512 MaybeHandle<Object> RegExpImpl::IrregexpExec(
513 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
514 int previous_index, Handle<RegExpMatchInfo> last_match_info) {
515 DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
517 subject = String::Flatten(isolate, subject);
520 #if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) 521 if (FLAG_trace_regexp_bytecodes) {
522 String pattern = regexp->Pattern();
523 PrintF(
"\n\nRegexp match: /%s/\n\n", pattern->ToCString().get());
524 PrintF(
"\n\nSubject string: '%s'\n\n", subject->ToCString().get());
527 int required_registers =
528 RegExpImpl::IrregexpPrepare(isolate, regexp, subject);
529 if (required_registers < 0) {
531 DCHECK(isolate->has_pending_exception());
532 return MaybeHandle<Object>();
535 int32_t* output_registers =
nullptr;
536 if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) {
537 output_registers = NewArray<int32_t>(required_registers);
539 std::unique_ptr<int32_t[]> auto_release(output_registers);
540 if (output_registers ==
nullptr) {
541 output_registers = isolate->jsregexp_static_offsets_vector();
545 RegExpImpl::IrregexpExecRaw(isolate, regexp, subject, previous_index,
546 output_registers, required_registers);
547 if (res == RE_SUCCESS) {
549 IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
550 return SetLastMatchInfo(isolate, last_match_info, subject, capture_count,
553 if (res == RE_EXCEPTION) {
554 DCHECK(isolate->has_pending_exception());
555 return MaybeHandle<Object>();
557 DCHECK(res == RE_FAILURE);
558 return isolate->factory()->null_value();
561 Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
562 Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
563 Handle<String> subject,
int capture_count, int32_t* match) {
569 int capture_register_count = (capture_count + 1) * 2;
570 Handle<RegExpMatchInfo> result = RegExpMatchInfo::ReserveCaptures(
571 isolate, last_match_info, capture_register_count);
572 result->SetNumberOfCaptureRegisters(capture_register_count);
574 if (*result != *last_match_info) {
577 if (*last_match_info == *isolate->regexp_last_match_info()) {
578 isolate->native_context()->set_regexp_last_match_info(*result);
579 }
else if (*last_match_info == *isolate->regexp_internal_match_info()) {
580 isolate->native_context()->set_regexp_internal_match_info(*result);
584 DisallowHeapAllocation no_allocation;
585 if (match !=
nullptr) {
586 for (
int i = 0;
i < capture_register_count;
i += 2) {
587 result->SetCapture(
i, match[
i]);
588 result->SetCapture(
i + 1, match[
i + 1]);
591 result->SetLastSubject(*subject);
592 result->SetLastInput(*subject);
596 RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
597 Handle<String> subject, Isolate* isolate)
598 : register_array_(nullptr),
599 register_array_size_(0),
603 #ifdef V8_INTERPRETED_REGEXP 604 bool interpreted =
true;
606 bool interpreted =
false;
607 #endif // V8_INTERPRETED_REGEXP 609 if (regexp_->TypeTag() == JSRegExp::ATOM) {
610 static const int kAtomRegistersPerMatch = 2;
611 registers_per_match_ = kAtomRegistersPerMatch;
615 registers_per_match_ =
616 RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
617 if (registers_per_match_ < 0) {
623 DCHECK(IsGlobal(regexp->GetFlags()));
625 register_array_size_ =
626 Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
627 max_matches_ = register_array_size_ / registers_per_match_;
631 register_array_size_ = registers_per_match_;
635 if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
636 register_array_ = NewArray<int32_t>(register_array_size_);
638 register_array_ = isolate->jsregexp_static_offsets_vector();
643 current_match_index_ = max_matches_ - 1;
644 num_matches_ = max_matches_;
645 DCHECK_LE(2, registers_per_match_);
646 DCHECK_GE(register_array_size_, registers_per_match_);
647 int32_t* last_match =
648 ®ister_array_[current_match_index_ * registers_per_match_];
653 int RegExpImpl::GlobalCache::AdvanceZeroLength(
int last_index) {
654 if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
655 unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
656 unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
658 return last_index + 2;
660 return last_index + 1;
813 void RegExpTree::AppendToText(RegExpText* text, Zone* zone) {
818 void RegExpAtom::AppendToText(RegExpText* text, Zone* zone) {
819 text->AddElement(TextElement::Atom(
this), zone);
823 void RegExpCharacterClass::AppendToText(RegExpText* text, Zone* zone) {
824 text->AddElement(TextElement::CharClass(
this), zone);
828 void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
829 for (
int i = 0;
i < elements()->length();
i++)
830 text->AddElement(elements()->at(
i), zone);
834 TextElement TextElement::Atom(RegExpAtom* atom) {
835 return TextElement(ATOM, atom);
839 TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
840 return TextElement(CHAR_CLASS, char_class);
844 int TextElement::length()
const {
845 switch (text_type()) {
847 return atom()->length();
856 DispatchTable* ChoiceNode::GetTable(
bool ignore_case) {
857 if (table_ ==
nullptr) {
858 table_ =
new(zone()) DispatchTable(zone());
859 DispatchTableConstructor cons(table_, ignore_case, zone());
860 cons.BuildTable(
this);
869 for (
int i = 0;
i < RegExpMacroAssembler::kTableSize;
i++) {
870 frequencies_[
i] = CharacterFrequency(
i);
874 void CountCharacter(
int character) {
875 int index = (character & RegExpMacroAssembler::kTableMask);
876 frequencies_[index].Increment();
882 int Frequency(
int in_character) {
883 DCHECK((in_character & RegExpMacroAssembler::kTableMask) == in_character);
884 if (total_samples_ < 1)
return 1;
886 (frequencies_[in_character].counter() * 128) / total_samples_;
887 return freq_in_per128;
891 class CharacterFrequency {
893 CharacterFrequency() : counter_(0), character_(-1) { }
894 explicit CharacterFrequency(
int character)
895 : counter_(0), character_(character) { }
897 void Increment() { counter_++; }
898 int counter() {
return counter_; }
899 int character() {
return character_; }
908 CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
918 int AllocateRegister() {
919 if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
920 reg_exp_too_big_ =
true;
921 return next_register_;
923 return next_register_++;
928 int UnicodeLookaroundStackRegister() {
929 if (unicode_lookaround_stack_register_ == kNoRegister) {
930 unicode_lookaround_stack_register_ = AllocateRegister();
932 return unicode_lookaround_stack_register_;
935 int UnicodeLookaroundPositionRegister() {
936 if (unicode_lookaround_position_register_ == kNoRegister) {
937 unicode_lookaround_position_register_ = AllocateRegister();
939 return unicode_lookaround_position_register_;
948 if (!node->on_work_list() && !node->label()->is_bound()) {
949 node->set_on_work_list(
true);
950 work_list_->push_back(node);
954 static const int kImplementationOffset = 0;
955 static const int kNumberOfRegistersOffset = 0;
956 static const int kCodeOffset = 1;
959 EndNode* accept() {
return accept_; }
961 static const int kMaxRecursion = 100;
962 inline int recursion_depth() {
return recursion_depth_; }
963 inline void IncrementRecursionDepth() { recursion_depth_++; }
964 inline void DecrementRecursionDepth() { recursion_depth_--; }
966 void SetRegExpTooBig() { reg_exp_too_big_ =
true; }
968 inline bool one_byte() {
return one_byte_; }
969 inline bool optimize() {
return optimize_; }
970 inline void set_optimize(
bool value) { optimize_ = value; }
971 inline bool limiting_recursion() {
return limiting_recursion_; }
972 inline void set_limiting_recursion(
bool value) {
973 limiting_recursion_ = value;
975 bool read_backward() {
return read_backward_; }
976 void set_read_backward(
bool value) { read_backward_ = value; }
979 int current_expansion_factor() {
return current_expansion_factor_; }
980 void set_current_expansion_factor(
int value) {
981 current_expansion_factor_ = value;
984 Isolate* isolate()
const {
return isolate_; }
985 Zone* zone()
const {
return zone_; }
987 static const int kNoRegister = -1;
992 int unicode_lookaround_stack_register_;
993 int unicode_lookaround_position_register_;
994 std::vector<RegExpNode*>* work_list_;
995 int recursion_depth_;
998 bool reg_exp_too_big_;
999 bool limiting_recursion_;
1001 bool read_backward_;
1002 int current_expansion_factor_;
1012 compiler->IncrementRecursionDepth();
1027 RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone,
int capture_count,
1029 : next_register_(2 * (capture_count + 1)),
1030 unicode_lookaround_stack_register_(kNoRegister),
1031 unicode_lookaround_position_register_(kNoRegister),
1032 work_list_(nullptr),
1033 recursion_depth_(0),
1034 one_byte_(one_byte),
1035 reg_exp_too_big_(false),
1036 limiting_recursion_(false),
1037 optimize_(FLAG_regexp_optimization),
1038 read_backward_(false),
1039 current_expansion_factor_(1),
1040 frequency_collator_(),
1043 accept_ =
new(zone) EndNode(EndNode::ACCEPT, zone);
1044 DCHECK_GE(RegExpMacroAssembler::kMaxRegister, next_register_ - 1);
1047 RegExpEngine::CompilationResult RegExpCompiler::Assemble(
1048 Isolate* isolate, RegExpMacroAssembler* macro_assembler, RegExpNode* start,
1049 int capture_count, Handle<String> pattern) {
1051 if (FLAG_trace_regexp_assembler)
1052 macro_assembler_ =
new RegExpMacroAssemblerTracer(isolate, macro_assembler);
1055 macro_assembler_ = macro_assembler;
1057 std::vector<RegExpNode*> work_list;
1058 work_list_ = &work_list;
1060 macro_assembler_->PushBacktrack(&fail);
1062 start->Emit(
this, &new_trace);
1063 macro_assembler_->Bind(&fail);
1064 macro_assembler_->Fail();
1065 while (!work_list.empty()) {
1066 RegExpNode* node = work_list.back();
1067 work_list.pop_back();
1068 node->set_on_work_list(
false);
1069 if (!node->label()->is_bound()) node->Emit(
this, &new_trace);
1071 if (reg_exp_too_big_) {
1072 macro_assembler_->AbortedCodeGeneration();
1073 return IrregexpRegExpTooBig(isolate_);
1076 Handle<HeapObject> code = macro_assembler_->GetCode(pattern);
1077 isolate->IncreaseTotalRegexpCodeGenerated(code->Size());
1078 work_list_ =
nullptr;
1079 #if defined(ENABLE_DISASSEMBLER) && !defined(V8_INTERPRETED_REGEXP) 1080 if (FLAG_print_code) {
1081 CodeTracer::Scope trace_scope(isolate->GetCodeTracer());
1082 OFStream os(trace_scope.file());
1083 Handle<Code>::cast(code)->Disassemble(pattern->ToCString().get(), os);
1087 if (FLAG_trace_regexp_assembler) {
1088 delete macro_assembler_;
1091 return RegExpEngine::CompilationResult(*code, next_register_);
1095 bool Trace::DeferredAction::Mentions(
int that) {
1096 if (action_type() == ActionNode::CLEAR_CAPTURES) {
1097 Interval range =
static_cast<DeferredClearCaptures*
>(
this)->range();
1098 return range.Contains(that);
1100 return reg() == that;
1105 bool Trace::mentions_reg(
int reg) {
1106 for (DeferredAction* action = actions_; action !=
nullptr;
1107 action = action->next()) {
1108 if (action->Mentions(reg))
1115 bool Trace::GetStoredPosition(
int reg,
int* cp_offset) {
1116 DCHECK_EQ(0, *cp_offset);
1117 for (DeferredAction* action = actions_; action !=
nullptr;
1118 action = action->next()) {
1119 if (action->Mentions(reg)) {
1120 if (action->action_type() == ActionNode::STORE_POSITION) {
1121 *cp_offset =
static_cast<DeferredCapture*
>(action)->cp_offset();
1132 int Trace::FindAffectedRegisters(OutSet* affected_registers,
1134 int max_register = RegExpCompiler::kNoRegister;
1135 for (DeferredAction* action = actions_; action !=
nullptr;
1136 action = action->next()) {
1137 if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
1138 Interval range =
static_cast<DeferredClearCaptures*
>(action)->range();
1139 for (
int i = range.from();
i <= range.to();
i++)
1140 affected_registers->Set(
i, zone);
1141 if (range.to() > max_register) max_register = range.to();
1143 affected_registers->Set(action->reg(), zone);
1144 if (action->reg() > max_register) max_register = action->reg();
1147 return max_register;
1151 void Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
1153 const OutSet& registers_to_pop,
1154 const OutSet& registers_to_clear) {
1155 for (
int reg = max_register; reg >= 0; reg--) {
1156 if (registers_to_pop.Get(reg)) {
1157 assembler->PopRegister(reg);
1158 }
else if (registers_to_clear.Get(reg)) {
1160 while (reg > 0 && registers_to_clear.Get(reg - 1)) {
1163 assembler->ClearRegisters(reg, clear_to);
1169 void Trace::PerformDeferredActions(RegExpMacroAssembler* assembler,
1171 const OutSet& affected_registers,
1172 OutSet* registers_to_pop,
1173 OutSet* registers_to_clear,
1176 const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
1181 for (
int reg = 0; reg <= max_register; reg++) {
1182 if (!affected_registers.Get(reg)) {
1189 enum DeferredActionUndoType { IGNORE, RESTORE, CLEAR };
1190 DeferredActionUndoType undo_action = IGNORE;
1193 bool absolute =
false;
1195 static const int kNoStore = kMinInt;
1196 int store_position = kNoStore;
1199 for (DeferredAction* action = actions_; action !=
nullptr;
1200 action = action->next()) {
1201 if (action->Mentions(reg)) {
1202 switch (action->action_type()) {
1203 case ActionNode::SET_REGISTER: {
1204 Trace::DeferredSetRegister* psr =
1205 static_cast<Trace::DeferredSetRegister*
>(action);
1207 value += psr->value();
1215 undo_action = RESTORE;
1216 DCHECK_EQ(store_position, kNoStore);
1220 case ActionNode::INCREMENT_REGISTER:
1224 DCHECK_EQ(store_position, kNoStore);
1226 undo_action = RESTORE;
1228 case ActionNode::STORE_POSITION: {
1229 Trace::DeferredCapture* pc =
1230 static_cast<Trace::DeferredCapture*
>(action);
1231 if (!clear && store_position == kNoStore) {
1232 store_position = pc->cp_offset();
1243 undo_action = IGNORE;
1245 undo_action = pc->is_capture() ? CLEAR : RESTORE;
1248 DCHECK_EQ(value, 0);
1251 case ActionNode::CLEAR_CAPTURES: {
1255 if (store_position == kNoStore) {
1258 undo_action = RESTORE;
1260 DCHECK_EQ(value, 0);
1270 if (undo_action == RESTORE) {
1272 RegExpMacroAssembler::StackCheckFlag stack_check =
1273 RegExpMacroAssembler::kNoStackLimitCheck;
1274 if (pushes == push_limit) {
1275 stack_check = RegExpMacroAssembler::kCheckStackLimit;
1279 assembler->PushRegister(reg, stack_check);
1280 registers_to_pop->Set(reg, zone);
1281 }
else if (undo_action == CLEAR) {
1282 registers_to_clear->Set(reg, zone);
1286 if (store_position != kNoStore) {
1287 assembler->WriteCurrentPositionToRegister(reg, store_position);
1289 assembler->ClearRegisters(reg, reg);
1290 }
else if (absolute) {
1291 assembler->SetRegister(reg, value);
1292 }
else if (value != 0) {
1293 assembler->AdvanceRegister(reg, value);
1302 void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
1303 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1305 DCHECK(!is_trivial());
1307 if (actions_ ==
nullptr && backtrack() ==
nullptr) {
1311 if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
1314 successor->Emit(compiler, &new_state);
1319 OutSet affected_registers;
1321 if (backtrack() !=
nullptr) {
1325 assembler->PushCurrentPosition();
1328 int max_register = FindAffectedRegisters(&affected_registers,
1330 OutSet registers_to_pop;
1331 OutSet registers_to_clear;
1332 PerformDeferredActions(assembler,
1336 ®isters_to_clear,
1338 if (cp_offset_ != 0) {
1339 assembler->AdvanceCurrentPosition(cp_offset_);
1344 assembler->PushBacktrack(&undo);
1345 if (successor->KeepRecursing(compiler)) {
1347 successor->Emit(compiler, &new_state);
1349 compiler->AddWork(successor);
1350 assembler->GoTo(successor->label());
1354 assembler->Bind(&undo);
1355 RestoreAffectedRegisters(assembler,
1358 registers_to_clear);
1359 if (backtrack() ==
nullptr) {
1360 assembler->Backtrack();
1362 assembler->PopCurrentPosition();
1363 assembler->GoTo(backtrack());
1368 void NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
1369 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1373 if (!label()->is_bound()) {
1376 assembler->Bind(label());
1381 assembler->ReadCurrentPositionFromRegister(current_position_register_);
1382 assembler->ReadStackPointerFromRegister(stack_pointer_register_);
1383 if (clear_capture_count_ > 0) {
1386 int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
1387 assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
1391 assembler->Backtrack();
1395 void EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
1396 if (!trace->is_trivial()) {
1397 trace->Flush(compiler,
this);
1400 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1401 if (!label()->is_bound()) {
1402 assembler->Bind(label());
1406 assembler->Succeed();
1409 assembler->GoTo(trace->backtrack());
1411 case NEGATIVE_SUBMATCH_SUCCESS:
1419 void GuardedAlternative::AddGuard(Guard* guard, Zone* zone) {
1420 if (guards_ ==
nullptr) guards_ =
new (zone) ZoneList<Guard*>(1, zone);
1421 guards_->Add(guard, zone);
1425 ActionNode* ActionNode::SetRegister(
int reg,
1427 RegExpNode* on_success) {
1428 ActionNode* result =
1429 new(on_success->zone()) ActionNode(SET_REGISTER, on_success);
1430 result->data_.u_store_register.reg = reg;
1431 result->data_.u_store_register.value = val;
1436 ActionNode* ActionNode::IncrementRegister(
int reg, RegExpNode* on_success) {
1437 ActionNode* result =
1438 new(on_success->zone()) ActionNode(INCREMENT_REGISTER, on_success);
1439 result->data_.u_increment_register.reg = reg;
1444 ActionNode* ActionNode::StorePosition(
int reg,
1446 RegExpNode* on_success) {
1447 ActionNode* result =
1448 new(on_success->zone()) ActionNode(STORE_POSITION, on_success);
1449 result->data_.u_position_register.reg = reg;
1450 result->data_.u_position_register.is_capture = is_capture;
1455 ActionNode* ActionNode::ClearCaptures(Interval range,
1456 RegExpNode* on_success) {
1457 ActionNode* result =
1458 new(on_success->zone()) ActionNode(CLEAR_CAPTURES, on_success);
1459 result->data_.u_clear_captures.range_from = range.from();
1460 result->data_.u_clear_captures.range_to = range.to();
1465 ActionNode* ActionNode::BeginSubmatch(
int stack_reg,
1467 RegExpNode* on_success) {
1468 ActionNode* result =
1469 new(on_success->zone()) ActionNode(BEGIN_SUBMATCH, on_success);
1470 result->data_.u_submatch.stack_pointer_register = stack_reg;
1471 result->data_.u_submatch.current_position_register = position_reg;
1476 ActionNode* ActionNode::PositiveSubmatchSuccess(
int stack_reg,
1478 int clear_register_count,
1479 int clear_register_from,
1480 RegExpNode* on_success) {
1481 ActionNode* result =
1482 new(on_success->zone()) ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
1483 result->data_.u_submatch.stack_pointer_register = stack_reg;
1484 result->data_.u_submatch.current_position_register = position_reg;
1485 result->data_.u_submatch.clear_register_count = clear_register_count;
1486 result->data_.u_submatch.clear_register_from = clear_register_from;
1491 ActionNode* ActionNode::EmptyMatchCheck(
int start_register,
1492 int repetition_register,
1493 int repetition_limit,
1494 RegExpNode* on_success) {
1495 ActionNode* result =
1496 new(on_success->zone()) ActionNode(EMPTY_MATCH_CHECK, on_success);
1497 result->data_.u_empty_match_check.start_register = start_register;
1498 result->data_.u_empty_match_check.repetition_register = repetition_register;
1499 result->data_.u_empty_match_check.repetition_limit = repetition_limit;
1504 #define DEFINE_ACCEPT(Type) \ 1505 void Type##Node::Accept(NodeVisitor* visitor) { \ 1506 visitor->Visit##Type(this); \ 1508 FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
1509 #undef DEFINE_ACCEPT 1512 void LoopChoiceNode::Accept(NodeVisitor* visitor) {
1513 visitor->VisitLoopChoice(
this);
1521 void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
1524 switch (guard->op()) {
1526 DCHECK(!trace->mentions_reg(guard->reg()));
1527 macro_assembler->IfRegisterGE(guard->reg(),
1529 trace->backtrack());
1532 DCHECK(!trace->mentions_reg(guard->reg()));
1533 macro_assembler->IfRegisterLT(guard->reg(),
1535 trace->backtrack());
1543 static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
1544 bool one_byte_subject,
1545 unibrow::uchar* letters) {
1547 isolate->jsregexp_uncanonicalize()->get(character,
'\0', letters);
1551 letters[0] = character;
1555 if (one_byte_subject) {
1557 for (
int i = 0;
i < length;
i++) {
1558 if (letters[
i] <= String::kMaxOneByteCharCode) {
1559 letters[new_length++] = letters[
i];
1562 length = new_length;
1569 static inline bool EmitSimpleCharacter(Isolate* isolate,
1570 RegExpCompiler* compiler,
1576 RegExpMacroAssembler* assembler = compiler->macro_assembler();
1577 bool bound_checked =
false;
1579 assembler->LoadCurrentCharacter(
1583 bound_checked =
true;
1585 assembler->CheckNotCharacter(c, on_failure);
1586 return bound_checked;
1592 static inline bool EmitAtomNonLetter(Isolate* isolate,
1593 RegExpCompiler* compiler,
1599 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1600 bool one_byte = compiler->one_byte();
1601 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1602 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
1609 bool checked =
false;
1612 if (one_byte && c > String::kMaxOneByteCharCodeU) {
1617 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1620 macro_assembler->CheckNotCharacter(c, on_failure);
1626 static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
1627 bool one_byte, uc16 c1, uc16 c2,
1628 Label* on_failure) {
1631 char_mask = String::kMaxOneByteCharCode;
1633 char_mask = String::kMaxUtf16CodeUnit;
1635 uc16 exor = c1 ^ c2;
1637 if (((exor - 1) & exor) == 0) {
1641 uc16 mask = char_mask ^ exor;
1642 macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
1646 uc16 diff = c2 - c1;
1647 if (((diff - 1) & diff) == 0 && c1 >= diff) {
1652 uc16 mask = char_mask ^ diff;
1653 macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
1663 typedef bool EmitCharacterFunction(Isolate* isolate,
1664 RegExpCompiler* compiler,
1673 static inline bool EmitAtomLetter(Isolate* isolate,
1674 RegExpCompiler* compiler,
1680 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
1681 bool one_byte = compiler->one_byte();
1682 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1683 int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
1684 if (length <= 1)
return false;
1688 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
1691 DCHECK_EQ(4, unibrow::Ecma262UnCanonicalize::kMaxWidth);
1694 if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
1695 chars[1], on_failure)) {
1697 macro_assembler->CheckCharacter(chars[0], &ok);
1698 macro_assembler->CheckNotCharacter(chars[1], on_failure);
1699 macro_assembler->Bind(&ok);
1704 macro_assembler->CheckCharacter(chars[3], &ok);
1707 macro_assembler->CheckCharacter(chars[0], &ok);
1708 macro_assembler->CheckCharacter(chars[1], &ok);
1709 macro_assembler->CheckNotCharacter(chars[2], on_failure);
1710 macro_assembler->Bind(&ok);
1720 static void EmitBoundaryTest(RegExpMacroAssembler* masm,
1722 Label* fall_through,
1723 Label* above_or_equal,
1725 if (below != fall_through) {
1726 masm->CheckCharacterLT(border, below);
1727 if (above_or_equal != fall_through) masm->GoTo(above_or_equal);
1729 masm->CheckCharacterGT(border - 1, above_or_equal);
1734 static void EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
1737 Label* fall_through,
1739 Label* out_of_range) {
1740 if (in_range == fall_through) {
1741 if (first == last) {
1742 masm->CheckNotCharacter(first, out_of_range);
1744 masm->CheckCharacterNotInRange(first, last, out_of_range);
1747 if (first == last) {
1748 masm->CheckCharacter(first, in_range);
1750 masm->CheckCharacterInRange(first, last, in_range);
1752 if (out_of_range != fall_through) masm->GoTo(out_of_range);
1759 static void EmitUseLookupTable(
1760 RegExpMacroAssembler* masm,
1761 ZoneList<int>* ranges,
1765 Label* fall_through,
1768 static const int kSize = RegExpMacroAssembler::kTableSize;
1769 static const int kMask = RegExpMacroAssembler::kTableMask;
1771 int base = (min_char & ~kMask);
1775 for (
int i = start_index;
i <= end_index;
i++) {
1776 DCHECK_EQ(ranges->at(
i) & ~kMask, base);
1778 DCHECK(start_index == 0 || (ranges->at(start_index - 1) & ~kMask) <= base);
1782 Label* on_bit_clear;
1784 if (even_label == fall_through) {
1785 on_bit_set = odd_label;
1786 on_bit_clear = even_label;
1789 on_bit_set = even_label;
1790 on_bit_clear = odd_label;
1793 for (
int i = 0;
i < (ranges->at(start_index) & kMask) &&
i < kSize;
i++) {
1798 for (
int i = start_index;
i < end_index;
i++) {
1799 for (j = (ranges->at(
i) & kMask); j < (ranges->at(
i + 1) & kMask); j++) {
1804 for (
int i = j;
i < kSize;
i++) {
1807 Factory* factory = masm->isolate()->factory();
1809 Handle<ByteArray> ba = factory->NewByteArray(kSize, TENURED);
1810 for (
int i = 0;
i < kSize;
i++) {
1811 ba->set(
i, templ[
i]);
1813 masm->CheckBitInTable(ba, on_bit_set);
1814 if (on_bit_clear != fall_through) masm->GoTo(on_bit_clear);
1818 static void CutOutRange(RegExpMacroAssembler* masm,
1819 ZoneList<int>* ranges,
1825 bool odd = (((cut_index - start_index) & 1) == 1);
1826 Label* in_range_label = odd ? odd_label : even_label;
1828 EmitDoubleBoundaryTest(masm,
1829 ranges->at(cut_index),
1830 ranges->at(cut_index + 1) - 1,
1834 DCHECK(!dummy.is_linked());
1838 for (
int j = cut_index; j > start_index; j--) {
1839 ranges->at(j) = ranges->at(j - 1);
1841 for (
int j = cut_index + 1; j < end_index; j++) {
1842 ranges->at(j) = ranges->at(j + 1);
1849 static void SplitSearchSpace(ZoneList<int>* ranges,
1852 int* new_start_index,
1855 static const int kSize = RegExpMacroAssembler::kTableSize;
1856 static const int kMask = RegExpMacroAssembler::kTableMask;
1858 int first = ranges->at(start_index);
1859 int last = ranges->at(end_index) - 1;
1861 *new_start_index = start_index;
1862 *border = (ranges->at(start_index) & ~kMask) + kSize;
1863 while (*new_start_index < end_index) {
1864 if (ranges->at(*new_start_index) > *border)
break;
1865 (*new_start_index)++;
1878 int binary_chop_index = (end_index + start_index) / 2;
1883 if (*border - 1 > String::kMaxOneByteCharCode &&
1884 end_index - start_index > (*new_start_index - start_index) * 2 &&
1885 last - first > kSize * 2 && binary_chop_index > *new_start_index &&
1886 ranges->at(binary_chop_index) >= first + 2 * kSize) {
1887 int scan_forward_for_section_border = binary_chop_index;;
1888 int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
1890 while (scan_forward_for_section_border < end_index) {
1891 if (ranges->at(scan_forward_for_section_border) > new_border) {
1892 *new_start_index = scan_forward_for_section_border;
1893 *border = new_border;
1896 scan_forward_for_section_border++;
1900 DCHECK(*new_start_index > start_index);
1901 *new_end_index = *new_start_index - 1;
1902 if (ranges->at(*new_end_index) == *border) {
1905 if (*border >= ranges->at(end_index)) {
1906 *border = ranges->at(end_index);
1907 *new_start_index = end_index;
1908 *new_end_index = end_index - 1;
1918 static void GenerateBranches(RegExpMacroAssembler* masm, ZoneList<int>* ranges,
1919 int start_index,
int end_index, uc32 min_char,
1920 uc32 max_char, Label* fall_through,
1921 Label* even_label, Label* odd_label) {
1922 DCHECK_LE(min_char, String::kMaxUtf16CodeUnit);
1923 DCHECK_LE(max_char, String::kMaxUtf16CodeUnit);
1925 int first = ranges->at(start_index);
1926 int last = ranges->at(end_index) - 1;
1928 DCHECK_LT(min_char, first);
1932 if (start_index == end_index) {
1933 EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
1939 if (start_index + 1 == end_index) {
1940 EmitDoubleBoundaryTest(
1941 masm, first, last, fall_through, even_label, odd_label);
1947 if (end_index - start_index <= 6) {
1950 static int kNoCutIndex = -1;
1951 int cut = kNoCutIndex;
1952 for (
int i = start_index;
i < end_index;
i++) {
1953 if (ranges->at(
i) == ranges->at(
i + 1) - 1) {
1958 if (cut == kNoCutIndex) cut = start_index;
1960 masm, ranges, start_index, end_index, cut, even_label, odd_label);
1961 DCHECK_GE(end_index - start_index, 2);
1962 GenerateBranches(masm,
1976 static const int kBits = RegExpMacroAssembler::kTableSizeBits;
1978 if ((max_char >> kBits) == (min_char >> kBits)) {
1979 EmitUseLookupTable(masm,
1990 if ((min_char >> kBits) != (first >> kBits)) {
1991 masm->CheckCharacterLT(first, odd_label);
1992 GenerateBranches(masm,
2004 int new_start_index = 0;
2005 int new_end_index = 0;
2008 SplitSearchSpace(ranges,
2016 Label* above = &handle_rest;
2017 if (border == last + 1) {
2020 above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
2021 DCHECK(new_end_index == end_index - 1);
2024 DCHECK_LE(start_index, new_end_index);
2025 DCHECK_LE(new_start_index, end_index);
2026 DCHECK_LT(start_index, new_start_index);
2027 DCHECK_LT(new_end_index, end_index);
2028 DCHECK(new_end_index + 1 == new_start_index ||
2029 (new_end_index + 2 == new_start_index &&
2030 border == ranges->at(new_end_index + 1)));
2031 DCHECK_LT(min_char, border - 1);
2032 DCHECK_LT(border, max_char);
2033 DCHECK_LT(ranges->at(new_end_index), border);
2034 DCHECK(border < ranges->at(new_start_index) ||
2035 (border == ranges->at(new_start_index) &&
2036 new_start_index == end_index &&
2037 new_end_index == end_index - 1 &&
2038 border == last + 1));
2039 DCHECK(new_start_index == 0 || border >= ranges->at(new_start_index - 1));
2041 masm->CheckCharacterGT(border - 1, above);
2043 GenerateBranches(masm,
2052 if (handle_rest.is_linked()) {
2053 masm->Bind(&handle_rest);
2054 bool flip = (new_start_index & 1) != (start_index & 1);
2055 GenerateBranches(masm,
2062 flip ? odd_label : even_label,
2063 flip ? even_label : odd_label);
2068 static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
2069 RegExpCharacterClass* cc,
bool one_byte,
2070 Label* on_failure,
int cp_offset,
bool check_offset,
2071 bool preloaded, Zone* zone) {
2072 ZoneList<CharacterRange>* ranges = cc->ranges(zone);
2073 CharacterRange::Canonicalize(ranges);
2077 max_char = String::kMaxOneByteCharCode;
2079 max_char = String::kMaxUtf16CodeUnit;
2082 int range_count = ranges->length();
2084 int last_valid_range = range_count - 1;
2085 while (last_valid_range >= 0) {
2086 CharacterRange& range = ranges->at(last_valid_range);
2087 if (range.from() <= max_char) {
2093 if (last_valid_range < 0) {
2094 if (!cc->is_negated()) {
2095 macro_assembler->GoTo(on_failure);
2098 macro_assembler->CheckPosition(cp_offset, on_failure);
2103 if (last_valid_range == 0 &&
2104 ranges->at(0).IsEverything(max_char)) {
2105 if (cc->is_negated()) {
2106 macro_assembler->GoTo(on_failure);
2110 macro_assembler->CheckPosition(cp_offset, on_failure);
2117 macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
2120 if (cc->is_standard(zone) &&
2121 macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
2133 ZoneList<int>* range_boundaries =
2134 new(zone) ZoneList<int>(last_valid_range, zone);
2136 bool zeroth_entry_is_failure = !cc->is_negated();
2138 for (
int i = 0;
i <= last_valid_range;
i++) {
2139 CharacterRange& range = ranges->at(
i);
2140 if (range.from() == 0) {
2142 zeroth_entry_is_failure = !zeroth_entry_is_failure;
2144 range_boundaries->Add(range.from(), zone);
2146 range_boundaries->Add(range.to() + 1, zone);
2148 int end_index = range_boundaries->length() - 1;
2149 if (range_boundaries->at(end_index) > max_char) {
2154 GenerateBranches(macro_assembler,
2161 zeroth_entry_is_failure ? &fall_through : on_failure,
2162 zeroth_entry_is_failure ? on_failure : &fall_through);
2163 macro_assembler->Bind(&fall_through);
2166 RegExpNode::~RegExpNode() =
default;
2168 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
2171 if (trace->stop_node() !=
nullptr) {
2175 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
2176 if (trace->is_trivial()) {
2177 if (label_.is_bound() || on_work_list() || !KeepRecursing(compiler)) {
2180 macro_assembler->GoTo(&label_);
2183 compiler->AddWork(
this);
2187 macro_assembler->Bind(&label_);
2194 if (KeepRecursing(compiler) && compiler->optimize() &&
2195 trace_count_ < kMaxCopiesCodeGenerated) {
2202 bool was_limiting = compiler->limiting_recursion();
2203 compiler->set_limiting_recursion(
true);
2204 trace->Flush(compiler,
this);
2205 compiler->set_limiting_recursion(was_limiting);
2210 bool RegExpNode::KeepRecursing(RegExpCompiler* compiler) {
2211 return !compiler->limiting_recursion() &&
2212 compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion;
2216 int ActionNode::EatsAtLeast(
int still_to_find,
2218 bool not_at_start) {
2219 if (budget <= 0)
return 0;
2220 if (action_type_ == POSITIVE_SUBMATCH_SUCCESS)
return 0;
2221 return on_success()->EatsAtLeast(still_to_find,
2227 void ActionNode::FillInBMInfo(Isolate* isolate,
int offset,
int budget,
2228 BoyerMooreLookahead* bm,
bool not_at_start) {
2229 if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
2230 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2232 SaveBMInfo(bm, not_at_start, offset);
2236 int AssertionNode::EatsAtLeast(
int still_to_find,
2238 bool not_at_start) {
2239 if (budget <= 0)
return 0;
2245 if (assertion_type() == AT_START && not_at_start)
return still_to_find;
2246 return on_success()->EatsAtLeast(still_to_find,
2252 void AssertionNode::FillInBMInfo(Isolate* isolate,
int offset,
int budget,
2253 BoyerMooreLookahead* bm,
bool not_at_start) {
2255 if (assertion_type() == AT_START && not_at_start)
return;
2256 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2257 SaveBMInfo(bm, not_at_start, offset);
2261 int BackReferenceNode::EatsAtLeast(
int still_to_find,
2263 bool not_at_start) {
2264 if (read_backward())
return 0;
2265 if (budget <= 0)
return 0;
2266 return on_success()->EatsAtLeast(still_to_find,
2272 int TextNode::EatsAtLeast(
int still_to_find,
2274 bool not_at_start) {
2275 if (read_backward())
return 0;
2276 int answer = Length();
2277 if (answer >= still_to_find)
return answer;
2278 if (budget <= 0)
return answer;
2280 return answer + on_success()->EatsAtLeast(still_to_find - answer,
2286 int NegativeLookaroundChoiceNode::EatsAtLeast(
int still_to_find,
int budget,
2287 bool not_at_start) {
2288 if (budget <= 0)
return 0;
2291 RegExpNode* node = alternatives_->at(1).node();
2292 return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
2296 void NegativeLookaroundChoiceNode::GetQuickCheckDetails(
2297 QuickCheckDetails* details, RegExpCompiler* compiler,
int filled_in,
2298 bool not_at_start) {
2301 RegExpNode* node = alternatives_->at(1).node();
2302 return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
2306 int ChoiceNode::EatsAtLeastHelper(
int still_to_find,
2308 RegExpNode* ignore_this_node,
2309 bool not_at_start) {
2310 if (budget <= 0)
return 0;
2312 int choice_count = alternatives_->length();
2313 budget = (budget - 1) / choice_count;
2314 for (
int i = 0;
i < choice_count;
i++) {
2315 RegExpNode* node = alternatives_->at(
i).node();
2316 if (node == ignore_this_node)
continue;
2317 int node_eats_at_least =
2318 node->EatsAtLeast(still_to_find, budget, not_at_start);
2319 if (node_eats_at_least < min) min = node_eats_at_least;
2320 if (min == 0)
return 0;
2326 int LoopChoiceNode::EatsAtLeast(
int still_to_find,
2328 bool not_at_start) {
2329 return EatsAtLeastHelper(still_to_find,
2336 int ChoiceNode::EatsAtLeast(
int still_to_find,
2338 bool not_at_start) {
2339 return EatsAtLeastHelper(still_to_find, budget,
nullptr, not_at_start);
2354 bool QuickCheckDetails::Rationalize(
bool asc) {
2355 bool found_useful_op =
false;
2358 char_mask = String::kMaxOneByteCharCode;
2360 char_mask = String::kMaxUtf16CodeUnit;
2365 for (
int i = 0;
i < characters_;
i++) {
2366 Position* pos = &positions_[
i];
2367 if ((pos->mask & String::kMaxOneByteCharCode) != 0) {
2368 found_useful_op =
true;
2370 mask_ |= (pos->mask & char_mask) << char_shift;
2371 value_ |= (pos->value & char_mask) << char_shift;
2372 char_shift += asc ? 8 : 16;
2374 return found_useful_op;
2378 bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
2379 Trace* bounds_check_trace,
2381 bool preload_has_checked_bounds,
2382 Label* on_possible_success,
2383 QuickCheckDetails* details,
2384 bool fall_through_on_failure) {
2385 if (details->characters() == 0)
return false;
2386 GetQuickCheckDetails(
2387 details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
2388 if (details->cannot_match())
return false;
2389 if (!details->Rationalize(compiler->one_byte()))
return false;
2390 DCHECK(details->characters() == 1 ||
2391 compiler->macro_assembler()->CanReadUnaligned());
2395 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2397 if (trace->characters_preloaded() != details->characters()) {
2398 DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
2403 assembler->LoadCurrentCharacter(trace->cp_offset(),
2404 bounds_check_trace->backtrack(),
2405 !preload_has_checked_bounds,
2406 details->characters());
2410 bool need_mask =
true;
2412 if (details->characters() == 1) {
2416 if (compiler->one_byte()) {
2417 char_mask = String::kMaxOneByteCharCode;
2419 char_mask = String::kMaxUtf16CodeUnit;
2421 if ((mask & char_mask) == char_mask) need_mask =
false;
2426 static const uint32_t kTwoByteMask = 0xFFFF;
2427 static const uint32_t kFourByteMask = 0xFFFFFFFF;
2428 if (details->characters() == 2 && compiler->one_byte()) {
2429 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask =
false;
2430 }
else if (details->characters() == 1 && !compiler->one_byte()) {
2431 if ((mask & kTwoByteMask) == kTwoByteMask) need_mask =
false;
2433 if (mask == kFourByteMask) need_mask =
false;
2437 if (fall_through_on_failure) {
2439 assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
2441 assembler->CheckCharacter(value, on_possible_success);
2445 assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
2447 assembler->CheckNotCharacter(value, trace->backtrack());
2462 void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
2463 RegExpCompiler* compiler,
2464 int characters_filled_in,
2465 bool not_at_start) {
2468 if (read_backward())
return;
2469 Isolate* isolate = compiler->macro_assembler()->isolate();
2470 DCHECK(characters_filled_in < details->characters());
2471 int characters = details->characters();
2473 if (compiler->one_byte()) {
2474 char_mask = String::kMaxOneByteCharCode;
2476 char_mask = String::kMaxUtf16CodeUnit;
2478 for (
int k = 0; k < elements()->length(); k++) {
2479 TextElement elm = elements()->at(k);
2480 if (elm.text_type() == TextElement::ATOM) {
2481 Vector<const uc16> quarks = elm.atom()->data();
2482 for (
int i = 0;
i < characters &&
i < quarks.length();
i++) {
2483 QuickCheckDetails::Position* pos =
2484 details->positions(characters_filled_in);
2486 if (elm.atom()->ignore_case()) {
2487 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
2488 int length = GetCaseIndependentLetters(isolate, c,
2489 compiler->one_byte(), chars);
2493 details->set_cannot_match();
2494 pos->determines_perfectly =
false;
2501 pos->mask = char_mask;
2503 pos->determines_perfectly =
true;
2507 for (
int j = 1; j < length; j++) {
2508 uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
2509 common_bits ^= differing_bits;
2510 bits &= common_bits;
2516 uint32_t one_zero = (common_bits | ~char_mask);
2517 if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
2518 pos->determines_perfectly =
true;
2520 pos->mask = common_bits;
2527 if (c > char_mask) {
2528 details->set_cannot_match();
2529 pos->determines_perfectly =
false;
2532 pos->mask = char_mask;
2534 pos->determines_perfectly =
true;
2536 characters_filled_in++;
2537 DCHECK(characters_filled_in <= details->characters());
2538 if (characters_filled_in == details->characters()) {
2543 QuickCheckDetails::Position* pos =
2544 details->positions(characters_filled_in);
2545 RegExpCharacterClass* tree = elm.char_class();
2546 ZoneList<CharacterRange>* ranges = tree->ranges(zone());
2547 DCHECK(!ranges->is_empty());
2548 if (tree->is_negated()) {
2556 int first_range = 0;
2557 while (ranges->at(first_range).from() > char_mask) {
2559 if (first_range == ranges->length()) {
2560 details->set_cannot_match();
2561 pos->determines_perfectly =
false;
2565 CharacterRange range = ranges->at(first_range);
2566 uc16 from = range.from();
2567 uc16 to = range.to();
2568 if (to > char_mask) {
2571 uint32_t differing_bits = (from ^ to);
2574 if ((differing_bits & (differing_bits + 1)) == 0 &&
2575 from + differing_bits == to) {
2576 pos->determines_perfectly =
true;
2578 uint32_t common_bits = ~SmearBitsRight(differing_bits);
2579 uint32_t bits = (from & common_bits);
2580 for (
int i = first_range + 1;
i < ranges->length();
i++) {
2581 CharacterRange range = ranges->at(
i);
2582 uc16 from = range.from();
2583 uc16 to = range.to();
2584 if (from > char_mask)
continue;
2585 if (to > char_mask) to = char_mask;
2591 pos->determines_perfectly =
false;
2592 uint32_t new_common_bits = (from ^ to);
2593 new_common_bits = ~SmearBitsRight(new_common_bits);
2594 common_bits &= new_common_bits;
2595 bits &= new_common_bits;
2596 uint32_t differing_bits = (from & common_bits) ^ bits;
2597 common_bits ^= differing_bits;
2598 bits &= common_bits;
2600 pos->mask = common_bits;
2603 characters_filled_in++;
2604 DCHECK(characters_filled_in <= details->characters());
2605 if (characters_filled_in == details->characters()) {
2610 DCHECK(characters_filled_in != details->characters());
2611 if (!details->cannot_match()) {
2612 on_success()-> GetQuickCheckDetails(details,
2614 characters_filled_in,
2620 void QuickCheckDetails::Clear() {
2621 for (
int i = 0;
i < characters_;
i++) {
2622 positions_[
i].mask = 0;
2623 positions_[
i].value = 0;
2624 positions_[
i].determines_perfectly =
false;
2630 void QuickCheckDetails::Advance(
int by,
bool one_byte) {
2631 if (by >= characters_ || by < 0) {
2632 DCHECK_IMPLIES(by < 0, characters_ == 0);
2636 DCHECK_LE(characters_ - by, 4);
2637 DCHECK_LE(characters_, 4);
2638 for (
int i = 0;
i < characters_ - by;
i++) {
2639 positions_[
i] = positions_[by +
i];
2641 for (
int i = characters_ - by;
i < characters_;
i++) {
2642 positions_[
i].mask = 0;
2643 positions_[
i].value = 0;
2644 positions_[
i].determines_perfectly =
false;
2653 void QuickCheckDetails::Merge(QuickCheckDetails* other,
int from_index) {
2654 DCHECK(characters_ == other->characters_);
2655 if (other->cannot_match_) {
2658 if (cannot_match_) {
2662 for (
int i = from_index;
i < characters_;
i++) {
2663 QuickCheckDetails::Position* pos = positions(
i);
2664 QuickCheckDetails::Position* other_pos = other->positions(
i);
2665 if (pos->mask != other_pos->mask ||
2666 pos->value != other_pos->value ||
2667 !other_pos->determines_perfectly) {
2670 pos->determines_perfectly =
false;
2672 pos->mask &= other_pos->mask;
2673 pos->value &= pos->mask;
2674 other_pos->value &= pos->mask;
2675 uc16 differing_bits = (pos->value ^ other_pos->value);
2676 pos->mask &= ~differing_bits;
2677 pos->value &= pos->mask;
2685 DCHECK(!info->visited);
2686 info->visited =
true;
2689 info_->visited =
false;
2695 RegExpNode* SeqRegExpNode::FilterOneByte(
int depth) {
2696 if (info()->replacement_calculated)
return replacement();
2697 if (depth < 0)
return this;
2698 DCHECK(!info()->visited);
2700 return FilterSuccessor(depth - 1);
2703 RegExpNode* SeqRegExpNode::FilterSuccessor(
int depth) {
2704 RegExpNode* next = on_success_->FilterOneByte(depth - 1);
2705 if (next ==
nullptr)
return set_replacement(
nullptr);
2707 return set_replacement(
this);
2711 static inline bool RangeContainsLatin1Equivalents(CharacterRange range) {
2713 return range.Contains(0x039C) || range.Contains(0x03BC) ||
2714 range.Contains(0x0178);
2718 static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
2719 for (
int i = 0;
i < ranges->length();
i++) {
2721 if (RangeContainsLatin1Equivalents(ranges->at(
i)))
return true;
2726 RegExpNode* TextNode::FilterOneByte(
int depth) {
2727 if (info()->replacement_calculated)
return replacement();
2728 if (depth < 0)
return this;
2729 DCHECK(!info()->visited);
2730 VisitMarker marker(info());
2731 int element_count = elements()->length();
2732 for (
int i = 0;
i < element_count;
i++) {
2733 TextElement elm = elements()->at(
i);
2734 if (elm.text_type() == TextElement::ATOM) {
2735 Vector<const uc16> quarks = elm.atom()->data();
2736 for (
int j = 0; j < quarks.length(); j++) {
2737 uint16_t c = quarks[j];
2738 if (elm.atom()->ignore_case()) {
2739 c = unibrow::Latin1::TryConvertToLatin1(c);
2741 if (c > unibrow::Latin1::kMaxChar)
return set_replacement(
nullptr);
2743 uint16_t* writable_quarks =
const_cast<uint16_t*
>(quarks.start());
2744 writable_quarks[j] = c;
2747 DCHECK(elm.text_type() == TextElement::CHAR_CLASS);
2748 RegExpCharacterClass* cc = elm.char_class();
2749 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
2750 CharacterRange::Canonicalize(ranges);
2752 int range_count = ranges->length();
2753 if (cc->is_negated()) {
2754 if (range_count != 0 &&
2755 ranges->at(0).from() == 0 &&
2756 ranges->at(0).to() >= String::kMaxOneByteCharCode) {
2758 if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
2760 return set_replacement(
nullptr);
2763 if (range_count == 0 ||
2764 ranges->at(0).from() > String::kMaxOneByteCharCode) {
2766 if (IgnoreCase(cc->flags()) && RangesContainLatin1Equivalents(ranges))
2768 return set_replacement(
nullptr);
2773 return FilterSuccessor(depth - 1);
2776 RegExpNode* LoopChoiceNode::FilterOneByte(
int depth) {
2777 if (info()->replacement_calculated)
return replacement();
2778 if (depth < 0)
return this;
2779 if (info()->visited)
return this;
2781 VisitMarker marker(info());
2783 RegExpNode* continue_replacement = continue_node_->FilterOneByte(depth - 1);
2786 if (continue_replacement ==
nullptr)
return set_replacement(
nullptr);
2789 return ChoiceNode::FilterOneByte(depth - 1);
2792 RegExpNode* ChoiceNode::FilterOneByte(
int depth) {
2793 if (info()->replacement_calculated)
return replacement();
2794 if (depth < 0)
return this;
2795 if (info()->visited)
return this;
2796 VisitMarker marker(info());
2797 int choice_count = alternatives_->length();
2799 for (
int i = 0;
i < choice_count;
i++) {
2800 GuardedAlternative alternative = alternatives_->at(
i);
2801 if (alternative.guards() !=
nullptr &&
2802 alternative.guards()->length() != 0) {
2803 set_replacement(
this);
2809 RegExpNode* survivor =
nullptr;
2810 for (
int i = 0;
i < choice_count;
i++) {
2811 GuardedAlternative alternative = alternatives_->at(
i);
2812 RegExpNode* replacement = alternative.node()->FilterOneByte(depth - 1);
2813 DCHECK(replacement !=
this);
2814 if (replacement !=
nullptr) {
2815 alternatives_->at(
i).set_node(replacement);
2817 survivor = replacement;
2820 if (surviving < 2)
return set_replacement(survivor);
2822 set_replacement(
this);
2823 if (surviving == choice_count) {
2828 ZoneList<GuardedAlternative>* new_alternatives =
2829 new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
2830 for (
int i = 0;
i < choice_count;
i++) {
2831 RegExpNode* replacement =
2832 alternatives_->at(
i).node()->FilterOneByte(depth - 1);
2833 if (replacement !=
nullptr) {
2834 alternatives_->at(
i).set_node(replacement);
2835 new_alternatives->Add(alternatives_->at(
i), zone());
2838 alternatives_ = new_alternatives;
2842 RegExpNode* NegativeLookaroundChoiceNode::FilterOneByte(
int depth) {
2843 if (info()->replacement_calculated)
return replacement();
2844 if (depth < 0)
return this;
2845 if (info()->visited)
return this;
2846 VisitMarker marker(info());
2849 RegExpNode* node = alternatives_->at(1).node();
2850 RegExpNode* replacement = node->FilterOneByte(depth - 1);
2851 if (replacement ==
nullptr)
return set_replacement(
nullptr);
2852 alternatives_->at(1).set_node(replacement);
2854 RegExpNode* neg_node = alternatives_->at(0).node();
2855 RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1);
2858 if (neg_replacement ==
nullptr)
return set_replacement(replacement);
2859 alternatives_->at(0).set_node(neg_replacement);
2860 return set_replacement(
this);
2864 void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2865 RegExpCompiler* compiler,
2866 int characters_filled_in,
2867 bool not_at_start) {
2868 if (body_can_be_zero_length_ || info()->visited)
return;
2869 VisitMarker marker(info());
2870 return ChoiceNode::GetQuickCheckDetails(details,
2872 characters_filled_in,
2877 void LoopChoiceNode::FillInBMInfo(Isolate* isolate,
int offset,
int budget,
2878 BoyerMooreLookahead* bm,
bool not_at_start) {
2879 if (body_can_be_zero_length_ || budget <= 0) {
2880 bm->SetRest(offset);
2881 SaveBMInfo(bm, not_at_start, offset);
2884 ChoiceNode::FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
2885 SaveBMInfo(bm, not_at_start, offset);
2889 void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
2890 RegExpCompiler* compiler,
2891 int characters_filled_in,
2892 bool not_at_start) {
2893 not_at_start = (not_at_start || not_at_start_);
2894 int choice_count = alternatives_->length();
2895 DCHECK_LT(0, choice_count);
2896 alternatives_->at(0).node()->GetQuickCheckDetails(details,
2898 characters_filled_in,
2900 for (
int i = 1;
i < choice_count;
i++) {
2901 QuickCheckDetails new_details(details->characters());
2902 RegExpNode* node = alternatives_->at(
i).node();
2903 node->GetQuickCheckDetails(&new_details, compiler,
2904 characters_filled_in,
2907 details->Merge(&new_details, characters_filled_in);
2913 static void EmitWordCheck(RegExpMacroAssembler* assembler,
2916 bool fall_through_on_word) {
2917 if (assembler->CheckSpecialCharacterClass(
2918 fall_through_on_word ?
'w' :
'W',
2919 fall_through_on_word ? non_word : word)) {
2923 assembler->CheckCharacterGT(
'z', non_word);
2924 assembler->CheckCharacterLT(
'0', non_word);
2925 assembler->CheckCharacterGT(
'a' - 1, word);
2926 assembler->CheckCharacterLT(
'9' + 1, word);
2927 assembler->CheckCharacterLT(
'A', non_word);
2928 assembler->CheckCharacterLT(
'Z' + 1, word);
2929 if (fall_through_on_word) {
2930 assembler->CheckNotCharacter(
'_', non_word);
2932 assembler->CheckCharacter(
'_', word);
2939 static void EmitHat(RegExpCompiler* compiler,
2940 RegExpNode* on_success,
2942 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2945 Trace new_trace(*trace);
2946 new_trace.InvalidateCurrentCharacter();
2949 if (new_trace.cp_offset() == 0) {
2952 assembler->CheckAtStart(&ok);
2956 assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
2957 new_trace.backtrack(),
2959 if (!assembler->CheckSpecialCharacterClass(
'n',
2960 new_trace.backtrack())) {
2962 if (!compiler->one_byte()) {
2963 assembler->CheckCharacterAfterAnd(0x2028, 0xFFFE, &ok);
2965 assembler->CheckCharacter(
'\n', &ok);
2966 assembler->CheckNotCharacter(
'\r', new_trace.backtrack());
2968 assembler->Bind(&ok);
2969 on_success->Emit(compiler, &new_trace);
2974 void AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace) {
2975 RegExpMacroAssembler* assembler = compiler->macro_assembler();
2976 Isolate* isolate = assembler->isolate();
2977 Trace::TriBool next_is_word_character = Trace::UNKNOWN;
2978 bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
2979 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
2980 if (lookahead ==
nullptr) {
2982 Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
2985 if (eats_at_least >= 1) {
2986 BoyerMooreLookahead* bm =
2987 new(zone()) BoyerMooreLookahead(eats_at_least, compiler, zone());
2988 FillInBMInfo(isolate, 0, kRecursionBudget, bm, not_at_start);
2989 if (bm->at(0)->is_non_word())
2990 next_is_word_character = Trace::FALSE_VALUE;
2991 if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
2994 if (lookahead->at(0)->is_non_word())
2995 next_is_word_character = Trace::FALSE_VALUE;
2996 if (lookahead->at(0)->is_word())
2997 next_is_word_character = Trace::TRUE_VALUE;
2999 bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
3000 if (next_is_word_character == Trace::UNKNOWN) {
3001 Label before_non_word;
3003 if (trace->characters_preloaded() != 1) {
3004 assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
3007 EmitWordCheck(assembler, &before_word, &before_non_word,
false);
3009 assembler->Bind(&before_non_word);
3011 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
3012 assembler->GoTo(&ok);
3014 assembler->Bind(&before_word);
3015 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
3016 assembler->Bind(&ok);
3017 }
else if (next_is_word_character == Trace::TRUE_VALUE) {
3018 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
3020 DCHECK(next_is_word_character == Trace::FALSE_VALUE);
3021 BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
3026 void AssertionNode::BacktrackIfPrevious(
3027 RegExpCompiler* compiler,
3029 AssertionNode::IfPrevious backtrack_if_previous) {
3030 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3031 Trace new_trace(*trace);
3032 new_trace.InvalidateCurrentCharacter();
3034 Label fall_through, dummy;
3036 Label* non_word = backtrack_if_previous == kIsNonWord ?
3037 new_trace.backtrack() :
3039 Label* word = backtrack_if_previous == kIsNonWord ?
3041 new_trace.backtrack();
3043 if (new_trace.cp_offset() == 0) {
3046 assembler->CheckAtStart(non_word);
3050 assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy,
false);
3051 EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord);
3053 assembler->Bind(&fall_through);
3054 on_success()->Emit(compiler, &new_trace);
3058 void AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
3059 RegExpCompiler* compiler,
3061 bool not_at_start) {
3062 if (assertion_type_ == AT_START && not_at_start) {
3063 details->set_cannot_match();
3066 return on_success()->GetQuickCheckDetails(details,
3073 void AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3074 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3075 switch (assertion_type_) {
3078 assembler->CheckPosition(trace->cp_offset(), &ok);
3079 assembler->GoTo(trace->backtrack());
3080 assembler->Bind(&ok);
3084 if (trace->at_start() == Trace::FALSE_VALUE) {
3085 assembler->GoTo(trace->backtrack());
3088 if (trace->at_start() == Trace::UNKNOWN) {
3089 assembler->CheckNotAtStart(trace->cp_offset(), trace->backtrack());
3090 Trace at_start_trace = *trace;
3091 at_start_trace.set_at_start(Trace::TRUE_VALUE);
3092 on_success()->Emit(compiler, &at_start_trace);
3098 EmitHat(compiler, on_success(), trace);
3101 case AT_NON_BOUNDARY: {
3102 EmitBoundaryCheck(compiler, trace);
3106 on_success()->Emit(compiler, trace);
3110 static bool DeterminedAlready(QuickCheckDetails* quick_check,
int offset) {
3111 if (quick_check ==
nullptr)
return false;
3112 if (offset >= quick_check->characters())
return false;
3113 return quick_check->positions(offset)->determines_perfectly;
3117 static void UpdateBoundsCheck(
int index,
int* checked_up_to) {
3118 if (index > *checked_up_to) {
3119 *checked_up_to = index;
3153 void TextNode::TextEmitPass(RegExpCompiler* compiler,
3154 TextEmitPassType pass,
3157 bool first_element_checked,
3158 int* checked_up_to) {
3159 RegExpMacroAssembler* assembler = compiler->macro_assembler();
3160 Isolate* isolate = assembler->isolate();
3161 bool one_byte = compiler->one_byte();
3162 Label* backtrack = trace->backtrack();
3163 QuickCheckDetails* quick_check = trace->quick_check_performed();
3164 int element_count = elements()->length();
3165 int backward_offset = read_backward() ? -Length() : 0;
3166 for (
int i = preloaded ? 0 : element_count - 1;
i >= 0;
i--) {
3167 TextElement elm = elements()->at(
i);
3168 int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
3169 if (elm.text_type() == TextElement::ATOM) {
3170 if (SkipPass(pass, elm.atom()->ignore_case()))
continue;
3171 Vector<const uc16> quarks = elm.atom()->data();
3172 for (
int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
3173 if (first_element_checked &&
i == 0 && j == 0)
continue;
3174 if (DeterminedAlready(quick_check, elm.cp_offset() + j))
continue;
3175 EmitCharacterFunction* emit_function =
nullptr;
3176 uc16 quark = quarks[j];
3177 if (elm.atom()->ignore_case()) {
3181 quark = unibrow::Latin1::TryConvertToLatin1(quark);
3184 case NON_LATIN1_MATCH:
3186 if (quark > String::kMaxOneByteCharCode) {
3187 assembler->GoTo(backtrack);
3191 case NON_LETTER_CHARACTER_MATCH:
3192 emit_function = &EmitAtomNonLetter;
3194 case SIMPLE_CHARACTER_MATCH:
3195 emit_function = &EmitSimpleCharacter;
3197 case CASE_CHARACTER_MATCH:
3198 emit_function = &EmitAtomLetter;
3203 if (emit_function !=
nullptr) {
3204 bool bounds_check = *checked_up_to < cp_offset + j || read_backward();
3205 bool bound_checked =
3206 emit_function(isolate, compiler, quark, backtrack, cp_offset + j,
3207 bounds_check, preloaded);
3208 if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
3212 DCHECK_EQ(TextElement::CHAR_CLASS, elm.text_type());
3213 if (pass == CHARACTER_CLASS_MATCH) {
3214 if (first_element_checked &&
i == 0)
continue;
3215 if (DeterminedAlready(quick_check, elm.cp_offset()))
continue;
3216 RegExpCharacterClass* cc = elm.char_class();
3217 bool bounds_check = *checked_up_to < cp_offset || read_backward();
3218 EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
3219 bounds_check, preloaded, zone());
3220 UpdateBoundsCheck(cp_offset, checked_up_to);
3227 int TextNode::Length() {
3228 TextElement elm = elements()->last();
3229 DCHECK_LE(0, elm.cp_offset());
3230 return elm.cp_offset() + elm.length();
3233 bool TextNode::SkipPass(TextEmitPassType pass,
bool ignore_case) {
3235 return pass == SIMPLE_CHARACTER_MATCH;
3237 return pass == NON_LETTER_CHARACTER_MATCH || pass == CASE_CHARACTER_MATCH;
3241 TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
3242 ZoneList<CharacterRange>* ranges,
3244 RegExpNode* on_success,
3245 JSRegExp::Flags flags) {
3246 DCHECK_NOT_NULL(ranges);
3247 ZoneList<TextElement>* elms =
new (zone) ZoneList<TextElement>(1, zone);
3248 elms->Add(TextElement::CharClass(
3249 new (zone) RegExpCharacterClass(zone, ranges, flags)),
3251 return new (zone) TextNode(elms, read_backward, on_success);
3254 TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
3255 CharacterRange trail,
3257 RegExpNode* on_success,
3258 JSRegExp::Flags flags) {
3259 ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
3260 ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
3261 ZoneList<TextElement>* elms =
new (zone) ZoneList<TextElement>(2, zone);
3262 elms->Add(TextElement::CharClass(
3263 new (zone) RegExpCharacterClass(zone, lead_ranges, flags)),
3265 elms->Add(TextElement::CharClass(
3266 new (zone) RegExpCharacterClass(zone, trail_ranges, flags)),
3268 return new (zone) TextNode(elms, read_backward, on_success);
3278 void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3279 LimitResult limit_result = LimitVersions(compiler, trace);
3280 if (limit_result == DONE)
return;
3281 DCHECK(limit_result == CONTINUE);
3283 if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
3284 compiler->SetRegExpTooBig();
3288 if (compiler->one_byte()) {
3290 TextEmitPass(compiler, NON_LATIN1_MATCH,
false, trace,
false, &dummy);
3293 bool first_elt_done =
false;
3294 int bound_checked_to = trace->cp_offset() - 1;
3295 bound_checked_to += trace->bound_checked_up_to();
3299 if (trace->characters_preloaded() == 1) {
3300 for (
int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3301 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass),
true, trace,
3302 false, &bound_checked_to);
3304 first_elt_done =
true;
3307 for (
int pass = kFirstRealPass; pass <= kLastPass; pass++) {
3308 TextEmitPass(compiler, static_cast<TextEmitPassType>(pass),
false, trace,
3309 first_elt_done, &bound_checked_to);
3312 Trace successor_trace(*trace);
3314 successor_trace.AdvanceCurrentPositionInTrace(
3315 read_backward() ? -Length() : Length(), compiler);
3316 successor_trace.set_at_start(read_backward() ? Trace::UNKNOWN
3317 : Trace::FALSE_VALUE);
3318 RecursionCheck rc(compiler);
3319 on_success()->Emit(compiler, &successor_trace);
3323 void Trace::InvalidateCurrentCharacter() {
3324 characters_preloaded_ = 0;
3328 void Trace::AdvanceCurrentPositionInTrace(
int by, RegExpCompiler* compiler) {
3332 characters_preloaded_ = 0;
3336 quick_check_performed_.Advance(by, compiler->one_byte());
3338 if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
3339 compiler->SetRegExpTooBig();
3342 bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
3346 void TextNode::MakeCaseIndependent(Isolate* isolate,
bool is_one_byte) {
3347 int element_count = elements()->length();
3348 for (
int i = 0;
i < element_count;
i++) {
3349 TextElement elm = elements()->at(
i);
3350 if (elm.text_type() == TextElement::CHAR_CLASS) {
3351 RegExpCharacterClass* cc = elm.char_class();
3352 #ifdef V8_INTL_SUPPORT 3353 bool case_equivalents_already_added =
3354 NeedsUnicodeCaseEquivalents(cc->flags());
3356 bool case_equivalents_already_added =
false;
3358 if (IgnoreCase(cc->flags()) && !case_equivalents_already_added) {
3361 if (cc->is_standard(zone()))
continue;
3362 ZoneList<CharacterRange>* ranges = cc->ranges(zone());
3363 CharacterRange::AddCaseEquivalents(isolate, zone(), ranges,
3371 int TextNode::GreedyLoopTextLength() {
return Length(); }
3374 RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
3375 RegExpCompiler* compiler) {
3376 if (read_backward())
return nullptr;
3377 if (elements()->length() != 1)
return nullptr;
3378 TextElement elm = elements()->at(0);
3379 if (elm.text_type() != TextElement::CHAR_CLASS)
return nullptr;
3380 RegExpCharacterClass* node = elm.char_class();
3381 ZoneList<CharacterRange>* ranges = node->ranges(zone());
3382 CharacterRange::Canonicalize(ranges);
3383 if (node->is_negated()) {
3384 return ranges->length() == 0 ? on_success() : nullptr;
3386 if (ranges->length() != 1)
return nullptr;
3388 if (compiler->one_byte()) {
3389 max_char = String::kMaxOneByteCharCode;
3391 max_char = String::kMaxUtf16CodeUnit;
3393 return ranges->at(0).IsEverything(max_char) ? on_success() : nullptr;
3401 int ChoiceNode::GreedyLoopTextLengthForAlternative(
3402 GuardedAlternative* alternative) {
3404 RegExpNode* node = alternative->node();
3407 int recursion_depth = 0;
3408 while (node !=
this) {
3409 if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
3410 return kNodeIsTooComplexForGreedyLoops;
3412 int node_length = node->GreedyLoopTextLength();
3413 if (node_length == kNodeIsTooComplexForGreedyLoops) {
3414 return kNodeIsTooComplexForGreedyLoops;
3416 length += node_length;
3417 SeqRegExpNode* seq_node =
static_cast<SeqRegExpNode*
>(node);
3418 node = seq_node->on_success();
3420 return read_backward() ? -length : length;
3424 void LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt) {
3425 DCHECK_NULL(loop_node_);
3426 AddAlternative(alt);
3427 loop_node_ = alt.node();
3431 void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
3432 DCHECK_NULL(continue_node_);
3433 AddAlternative(alt);
3434 continue_node_ = alt.node();
3438 void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3439 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3440 if (trace->stop_node() ==
this) {
3443 GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
3444 DCHECK_NE(kNodeIsTooComplexForGreedyLoops, text_length);
3447 DCHECK(trace->cp_offset() == text_length);
3448 macro_assembler->AdvanceCurrentPosition(text_length);
3449 macro_assembler->GoTo(trace->loop_label());
3452 DCHECK_NULL(trace->stop_node());
3453 if (!trace->is_trivial()) {
3454 trace->Flush(compiler,
this);
3457 ChoiceNode::Emit(compiler, trace);
3461 int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
3462 int eats_at_least) {
3463 int preload_characters = Min(4, eats_at_least);
3464 DCHECK_LE(preload_characters, 4);
3465 if (compiler->macro_assembler()->CanReadUnaligned()) {
3466 bool one_byte = compiler->one_byte();
3471 if (preload_characters == 3) preload_characters = 2;
3473 if (preload_characters > 2) preload_characters = 2;
3476 if (preload_characters > 1) preload_characters = 1;
3478 return preload_characters;
3487 : possible_success(),
3488 expects_preload(
false),
3490 quick_check_details() { }
3491 Label possible_success;
3492 bool expects_preload;
3503 : alt_gens_(count, zone) {
3504 for (
int i = 0;
i < count &&
i < kAFew;
i++) {
3505 alt_gens_.Add(a_few_alt_gens_ +
i, zone);
3507 for (
int i = kAFew;
i < count;
i++) {
3512 for (
int i = kAFew;
i < alt_gens_.length();
i++) {
3513 delete alt_gens_[
i];
3514 alt_gens_[
i] =
nullptr;
3519 return alt_gens_[
i];
3523 static const int kAFew = 10;
3529 static const uc32 kRangeEndMarker = 0x110000;
3534 static const int kSpaceRanges[] = {
3535 '\t',
'\r' + 1,
' ',
' ' + 1, 0x00A0, 0x00A1, 0x1680,
3536 0x1681, 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030,
3537 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00, kRangeEndMarker};
3538 static const int kSpaceRangeCount = arraysize(kSpaceRanges);
3540 static const int kWordRanges[] = {
3541 '0',
'9' + 1,
'A',
'Z' + 1,
'_',
'_' + 1,
'a',
'z' + 1, kRangeEndMarker};
3542 static const int kWordRangeCount = arraysize(kWordRanges);
3543 static const int kDigitRanges[] = {
'0',
'9' + 1, kRangeEndMarker};
3544 static const int kDigitRangeCount = arraysize(kDigitRanges);
3545 static const int kSurrogateRanges[] = {
3546 kLeadSurrogateStart, kLeadSurrogateStart + 1, kRangeEndMarker};
3547 static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
3548 static const int kLineTerminatorRanges[] = {
3549 0x000A, 0x000B, 0x000D, 0x000E, 0x2028, 0x202A, kRangeEndMarker};
3550 static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
3552 void BoyerMoorePositionInfo::Set(
int character) {
3553 SetInterval(Interval(character, character));
3557 void BoyerMoorePositionInfo::SetInterval(
const Interval& interval) {
3558 s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
3559 w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
3560 d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
3562 AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
3563 if (interval.to() - interval.from() >= kMapSize - 1) {
3564 if (map_count_ != kMapSize) {
3565 map_count_ = kMapSize;
3566 for (
int i = 0;
i < kMapSize;
i++) map_->at(
i) =
true;
3570 for (
int i = interval.from();
i <= interval.to();
i++) {
3571 int mod_character = (
i & kMask);
3572 if (!map_->at(mod_character)) {
3574 map_->at(mod_character) =
true;
3576 if (map_count_ == kMapSize)
return;
3581 void BoyerMoorePositionInfo::SetAll() {
3582 s_ = w_ = d_ = kLatticeUnknown;
3583 if (map_count_ != kMapSize) {
3584 map_count_ = kMapSize;
3585 for (
int i = 0;
i < kMapSize;
i++) map_->at(
i) =
true;
3590 BoyerMooreLookahead::BoyerMooreLookahead(
3591 int length, RegExpCompiler* compiler, Zone* zone)
3593 compiler_(compiler) {
3594 if (compiler->one_byte()) {
3595 max_char_ = String::kMaxOneByteCharCode;
3597 max_char_ = String::kMaxUtf16CodeUnit;
3599 bitmaps_ =
new(zone) ZoneList<BoyerMoorePositionInfo*>(length, zone);
3600 for (
int i = 0;
i < length;
i++) {
3601 bitmaps_->Add(
new(zone) BoyerMoorePositionInfo(zone), zone);
3609 bool BoyerMooreLookahead::FindWorthwhileInterval(
int* from,
int* to) {
3610 int biggest_points = 0;
3613 const int kMaxMax = 32;
3614 for (
int max_number_of_chars = 4;
3615 max_number_of_chars < kMaxMax;
3616 max_number_of_chars *= 2) {
3618 FindBestInterval(max_number_of_chars, biggest_points, from, to);
3620 if (biggest_points == 0)
return false;
3631 int BoyerMooreLookahead::FindBestInterval(
3632 int max_number_of_chars,
int old_biggest_points,
int* from,
int* to) {
3633 int biggest_points = old_biggest_points;
3634 static const int kSize = RegExpMacroAssembler::kTableSize;
3635 for (
int i = 0;
i < length_; ) {
3636 while (
i < length_ && Count(
i) > max_number_of_chars)
i++;
3637 if (
i == length_)
break;
3638 int remembered_from =
i;
3639 bool union_map[kSize];
3640 for (
int j = 0; j < kSize; j++) union_map[j] =
false;
3641 while (
i < length_ && Count(
i) <= max_number_of_chars) {
3642 BoyerMoorePositionInfo* map = bitmaps_->at(
i);
3643 for (
int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
3647 for (
int j = 0; j < kSize; j++) {
3654 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
3662 bool in_quickcheck_range =
3663 ((
i - remembered_from < 4) ||
3664 (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
3667 int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
3668 int points = (
i - remembered_from) * probability;
3669 if (points > biggest_points) {
3670 *from = remembered_from;
3672 biggest_points = points;
3675 return biggest_points;
3684 int BoyerMooreLookahead::GetSkipTable(
int min_lookahead,
3686 Handle<ByteArray> boolean_skip_table) {
3687 const int kSize = RegExpMacroAssembler::kTableSize;
3689 const int kSkipArrayEntry = 0;
3690 const int kDontSkipArrayEntry = 1;
3692 for (
int i = 0;
i < kSize;
i++) {
3693 boolean_skip_table->set(
i, kSkipArrayEntry);
3695 int skip = max_lookahead + 1 - min_lookahead;
3697 for (
int i = max_lookahead;
i >= min_lookahead;
i--) {
3698 BoyerMoorePositionInfo* map = bitmaps_->at(
i);
3699 for (
int j = 0; j < kSize; j++) {
3701 boolean_skip_table->set(j, kDontSkipArrayEntry);
3711 void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
3712 const int kSize = RegExpMacroAssembler::kTableSize;
3714 int min_lookahead = 0;
3715 int max_lookahead = 0;
3717 if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead))
return;
3719 bool found_single_character =
false;
3720 int single_character = 0;
3721 for (
int i = max_lookahead;
i >= min_lookahead;
i--) {
3722 BoyerMoorePositionInfo* map = bitmaps_->at(
i);
3723 if (map->map_count() > 1 ||
3724 (found_single_character && map->map_count() != 0)) {
3725 found_single_character =
false;
3728 for (
int j = 0; j < kSize; j++) {
3730 found_single_character =
true;
3731 single_character = j;
3737 int lookahead_width = max_lookahead + 1 - min_lookahead;
3739 if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
3744 if (found_single_character) {
3747 masm->LoadCurrentCharacter(max_lookahead, &cont,
true);
3748 if (max_char_ > kSize) {
3749 masm->CheckCharacterAfterAnd(single_character,
3750 RegExpMacroAssembler::kTableMask,
3753 masm->CheckCharacter(single_character, &cont);
3755 masm->AdvanceCurrentPosition(lookahead_width);
3761 Factory* factory = masm->isolate()->factory();
3762 Handle<ByteArray> boolean_skip_table = factory->NewByteArray(kSize, TENURED);
3763 int skip_distance = GetSkipTable(
3764 min_lookahead, max_lookahead, boolean_skip_table);
3765 DCHECK_NE(0, skip_distance);
3769 masm->LoadCurrentCharacter(max_lookahead, &cont,
true);
3770 masm->CheckBitInTable(boolean_skip_table, &cont);
3771 masm->AdvanceCurrentPosition(skip_distance);
3851 GreedyLoopState::GreedyLoopState(
bool not_at_start) {
3852 counter_backtrack_trace_.set_backtrack(&label_);
3853 if (not_at_start) counter_backtrack_trace_.set_at_start(Trace::FALSE_VALUE);
3857 void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
3859 int choice_count = alternatives_->length();
3860 for (
int i = 0;
i < choice_count - 1;
i++) {
3861 GuardedAlternative alternative = alternatives_->at(
i);
3862 ZoneList<Guard*>* guards = alternative.guards();
3863 int guard_count = (guards ==
nullptr) ? 0 : guards->length();
3864 for (
int j = 0; j < guard_count; j++) {
3865 DCHECK(!trace->mentions_reg(guards->at(j)->reg()));
3872 void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
3873 Trace* current_trace,
3874 PreloadState* state) {
3875 if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
3877 state->eats_at_least_ =
3878 EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
3879 current_trace->at_start() == Trace::FALSE_VALUE);
3881 state->preload_characters_ =
3882 CalculatePreloadCharacters(compiler, state->eats_at_least_);
3884 state->preload_is_current_ =
3885 (current_trace->characters_preloaded() == state->preload_characters_);
3886 state->preload_has_checked_bounds_ = state->preload_is_current_;
3890 void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
3891 int choice_count = alternatives_->length();
3893 if (choice_count == 1 && alternatives_->at(0).guards() ==
nullptr) {
3894 alternatives_->at(0).node()->Emit(compiler, trace);
3898 AssertGuardsMentionRegisters(trace);
3900 LimitResult limit_result = LimitVersions(compiler, trace);
3901 if (limit_result == DONE)
return;
3902 DCHECK(limit_result == CONTINUE);
3906 if (trace->flush_budget() == 0 && trace->actions() !=
nullptr) {
3907 trace->Flush(compiler,
this);
3911 RecursionCheck rc(compiler);
3913 PreloadState preload;
3915 GreedyLoopState greedy_loop_state(not_at_start());
3917 int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
3918 AlternativeGenerationList alt_gens(choice_count, zone());
3920 if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
3921 trace = EmitGreedyLoop(compiler,
3930 Label second_choice;
3931 compiler->macro_assembler()->Bind(&second_choice);
3933 preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
3935 EmitChoices(compiler,
3945 int new_flush_budget = trace->flush_budget() / choice_count;
3946 for (
int i = 0;
i < choice_count;
i++) {
3947 AlternativeGeneration* alt_gen = alt_gens.at(
i);
3948 Trace new_trace(*trace);
3952 if (new_trace.actions() !=
nullptr) {
3953 new_trace.set_flush_budget(new_flush_budget);
3955 bool next_expects_preload =
3956 i == choice_count - 1 ? false : alt_gens.at(
i + 1)->expects_preload;
3957 EmitOutOfLineContinuation(compiler,
3959 alternatives_->at(
i),
3961 preload.preload_characters_,
3962 next_expects_preload);
3967 Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
3969 AlternativeGenerationList* alt_gens,
3970 PreloadState* preload,
3971 GreedyLoopState* greedy_loop_state,
3973 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
3981 DCHECK(trace->stop_node() ==
nullptr);
3982 macro_assembler->PushCurrentPosition();
3983 Label greedy_match_failed;
3984 Trace greedy_match_trace;
3985 if (not_at_start()) greedy_match_trace.set_at_start(Trace::FALSE_VALUE);
3986 greedy_match_trace.set_backtrack(&greedy_match_failed);
3988 macro_assembler->Bind(&loop_label);
3989 greedy_match_trace.set_stop_node(
this);
3990 greedy_match_trace.set_loop_label(&loop_label);
3991 alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
3992 macro_assembler->Bind(&greedy_match_failed);
3994 Label second_choice;
3995 macro_assembler->Bind(&second_choice);
3997 Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
3999 EmitChoices(compiler,
4005 macro_assembler->Bind(greedy_loop_state->label());
4007 macro_assembler->CheckGreedyLoop(trace->backtrack());
4009 macro_assembler->AdvanceCurrentPosition(-text_length);
4010 macro_assembler->GoTo(&second_choice);
4014 int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
4016 int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
4017 if (alternatives_->length() != 2)
return eats_at_least;
4019 GuardedAlternative alt1 = alternatives_->at(1);
4020 if (alt1.guards() !=
nullptr && alt1.guards()->length() != 0) {
4021 return eats_at_least;
4023 RegExpNode* eats_anything_node = alt1.node();
4024 if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) !=
this) {
4025 return eats_at_least;
4034 DCHECK(trace->is_trivial());
4036 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
4037 Isolate* isolate = macro_assembler->isolate();
4045 BoyerMooreLookahead* bm = bm_info(
false);
4046 if (bm ==
nullptr) {
4047 eats_at_least = Min(kMaxLookaheadForBoyerMoore,
4048 EatsAtLeast(kMaxLookaheadForBoyerMoore,
4051 if (eats_at_least >= 1) {
4052 bm =
new(zone()) BoyerMooreLookahead(eats_at_least,
4055 GuardedAlternative alt0 = alternatives_->at(0);
4056 alt0.node()->FillInBMInfo(isolate, 0, kRecursionBudget, bm,
false);
4059 if (bm !=
nullptr) {
4060 bm->EmitSkipInstructions(macro_assembler);
4062 return eats_at_least;
4066 void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
4067 AlternativeGenerationList* alt_gens,
4070 PreloadState* preload) {
4071 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
4072 SetUpPreLoad(compiler, trace, preload);
4076 int choice_count = alternatives_->length();
4078 int new_flush_budget = trace->flush_budget() / choice_count;
4080 for (
int i = first_choice;
i < choice_count;
i++) {
4081 bool is_last =
i == choice_count - 1;
4082 bool fall_through_on_failure = !is_last;
4083 GuardedAlternative alternative = alternatives_->at(
i);
4084 AlternativeGeneration* alt_gen = alt_gens->at(
i);
4085 alt_gen->quick_check_details.set_characters(preload->preload_characters_);
4086 ZoneList<Guard*>* guards = alternative.guards();
4087 int guard_count = (guards ==
nullptr) ? 0 : guards->length();
4088 Trace new_trace(*trace);
4089 new_trace.set_characters_preloaded(preload->preload_is_current_ ?
4090 preload->preload_characters_ :
4092 if (preload->preload_has_checked_bounds_) {
4093 new_trace.set_bound_checked_up_to(preload->preload_characters_);
4095 new_trace.quick_check_performed()->Clear();
4096 if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
4098 new_trace.set_backtrack(&alt_gen->after);
4100 alt_gen->expects_preload = preload->preload_is_current_;
4101 bool generate_full_check_inline =
false;
4102 if (compiler->optimize() &&
4103 try_to_emit_quick_check_for_alternative(
i == 0) &&
4104 alternative.node()->EmitQuickCheck(
4105 compiler, trace, &new_trace, preload->preload_has_checked_bounds_,
4106 &alt_gen->possible_success, &alt_gen->quick_check_details,
4107 fall_through_on_failure)) {
4109 preload->preload_is_current_ =
true;
4110 preload->preload_has_checked_bounds_ =
true;
4113 if (!fall_through_on_failure) {
4114 macro_assembler->Bind(&alt_gen->possible_success);
4115 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
4116 new_trace.set_characters_preloaded(preload->preload_characters_);
4117 new_trace.set_bound_checked_up_to(preload->preload_characters_);
4118 generate_full_check_inline =
true;
4120 }
else if (alt_gen->quick_check_details.cannot_match()) {
4121 if (!fall_through_on_failure) {
4122 macro_assembler->GoTo(trace->backtrack());
4131 if (
i != first_choice) {
4132 alt_gen->expects_preload =
false;
4133 new_trace.InvalidateCurrentCharacter();
4135 generate_full_check_inline =
true;
4137 if (generate_full_check_inline) {
4138 if (new_trace.actions() !=
nullptr) {
4139 new_trace.set_flush_budget(new_flush_budget);
4141 for (
int j = 0; j < guard_count; j++) {
4142 GenerateGuard(macro_assembler, guards->at(j), &new_trace);
4144 alternative.node()->Emit(compiler, &new_trace);
4145 preload->preload_is_current_ =
false;
4147 macro_assembler->Bind(&alt_gen->after);
4152 void ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
4154 GuardedAlternative alternative,
4155 AlternativeGeneration* alt_gen,
4156 int preload_characters,
4157 bool next_expects_preload) {
4158 if (!alt_gen->possible_success.is_linked())
return;
4160 RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
4161 macro_assembler->Bind(&alt_gen->possible_success);
4162 Trace out_of_line_trace(*trace);
4163 out_of_line_trace.set_characters_preloaded(preload_characters);
4164 out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
4165 if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
4166 ZoneList<Guard*>* guards = alternative.guards();
4167 int guard_count = (guards ==
nullptr) ? 0 : guards->length();
4168 if (next_expects_preload) {
4169 Label reload_current_char;
4170 out_of_line_trace.set_backtrack(&reload_current_char);
4171 for (
int j = 0; j < guard_count; j++) {
4172 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
4174 alternative.node()->Emit(compiler, &out_of_line_trace);
4175 macro_assembler->Bind(&reload_current_char);
4179 macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
nullptr,
false,
4180 preload_characters);
4181 macro_assembler->GoTo(&(alt_gen->after));
4183 out_of_line_trace.set_backtrack(&(alt_gen->after));
4184 for (
int j = 0; j < guard_count; j++) {
4185 GenerateGuard(macro_assembler, guards->at(j), &out_of_line_trace);
4187 alternative.node()->Emit(compiler, &out_of_line_trace);
4192 void ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
4193 RegExpMacroAssembler* assembler = compiler->macro_assembler();
4194 LimitResult limit_result = LimitVersions(compiler, trace);
4195 if (limit_result == DONE)
return;
4196 DCHECK(limit_result == CONTINUE);
4198 RecursionCheck rc(compiler);
4200 switch (action_type_) {
4201 case STORE_POSITION: {
4202 Trace::DeferredCapture
4203 new_capture(data_.u_position_register.reg,
4204 data_.u_position_register.is_capture,
4206 Trace new_trace = *trace;
4207 new_trace.add_action(&new_capture);
4208 on_success()->Emit(compiler, &new_trace);
4211 case INCREMENT_REGISTER: {
4212 Trace::DeferredIncrementRegister
4213 new_increment(data_.u_increment_register.reg);
4214 Trace new_trace = *trace;
4215 new_trace.add_action(&new_increment);
4216 on_success()->Emit(compiler, &new_trace);
4219 case SET_REGISTER: {
4220 Trace::DeferredSetRegister
4221 new_set(data_.u_store_register.reg, data_.u_store_register.value);
4222 Trace new_trace = *trace;
4223 new_trace.add_action(&new_set);
4224 on_success()->Emit(compiler, &new_trace);
4227 case CLEAR_CAPTURES: {
4228 Trace::DeferredClearCaptures
4229 new_capture(Interval(data_.u_clear_captures.range_from,
4230 data_.u_clear_captures.range_to));
4231 Trace new_trace = *trace;
4232 new_trace.add_action(&new_capture);
4233 on_success()->Emit(compiler, &new_trace);
4236 case BEGIN_SUBMATCH:
4237 if (!trace->is_trivial()) {
4238 trace->Flush(compiler,
this);
4240 assembler->WriteCurrentPositionToRegister(
4241 data_.u_submatch.current_position_register, 0);
4242 assembler->WriteStackPointerToRegister(
4243 data_.u_submatch.stack_pointer_register);
4244 on_success()->Emit(compiler, trace);
4247 case EMPTY_MATCH_CHECK: {
4248 int start_pos_reg = data_.u_empty_match_check.start_register;
4250 int rep_reg = data_.u_empty_match_check.repetition_register;
4251 bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
4252 bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
4253 if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
4256 assembler->GoTo(trace->backtrack());
4257 }
else if (know_dist && stored_pos < trace->cp_offset()) {
4260 on_success()->Emit(compiler, trace);
4261 }
else if (!trace->is_trivial()) {
4262 trace->Flush(compiler,
this);
4264 Label skip_empty_check;
4268 int limit = data_.u_empty_match_check.repetition_limit;
4269 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
4273 assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
4274 trace->backtrack());
4275 assembler->Bind(&skip_empty_check);
4276 on_success()->Emit(compiler, trace);
4280 case POSITIVE_SUBMATCH_SUCCESS: {
4281 if (!trace->is_trivial()) {
4282 trace->Flush(compiler,
this);
4285 assembler->ReadCurrentPositionFromRegister(
4286 data_.u_submatch.current_position_register);
4287 assembler->ReadStackPointerFromRegister(
4288 data_.u_submatch.stack_pointer_register);
4289 int clear_register_count = data_.u_submatch.clear_register_count;
4290 if (clear_register_count == 0) {
4291 on_success()->Emit(compiler, trace);
4294 int clear_registers_from = data_.u_submatch.clear_register_from;
4295 Label clear_registers_backtrack;
4296 Trace new_trace = *trace;
4297 new_trace.set_backtrack(&clear_registers_backtrack);
4298 on_success()->Emit(compiler, &new_trace);
4300 assembler->Bind(&clear_registers_backtrack);
4301 int clear_registers_to = clear_registers_from + clear_register_count - 1;
4302 assembler->ClearRegisters(clear_registers_from, clear_registers_to);
4304 DCHECK(trace->backtrack() ==
nullptr);
4305 assembler->Backtrack();
4314 void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
4315 RegExpMacroAssembler* assembler = compiler->macro_assembler();
4316 if (!trace->is_trivial()) {
4317 trace->Flush(compiler,
this);
4321 LimitResult limit_result = LimitVersions(compiler, trace);
4322 if (limit_result == DONE)
return;
4323 DCHECK(limit_result == CONTINUE);
4325 RecursionCheck rc(compiler);
4327 DCHECK_EQ(start_reg_ + 1, end_reg_);
4328 if (IgnoreCase(flags_)) {
4329 assembler->CheckNotBackReferenceIgnoreCase(
4330 start_reg_, read_backward(), IsUnicode(flags_), trace->backtrack());
4332 assembler->CheckNotBackReference(start_reg_, read_backward(),
4333 trace->backtrack());
4336 if (read_backward()) trace->set_at_start(Trace::UNKNOWN);
4339 if (IsUnicode(flags_) && !compiler->one_byte()) {
4340 assembler->CheckNotInSurrogatePair(trace->cp_offset(), trace->backtrack());
4342 on_success()->Emit(compiler, trace);
4353 class DotPrinter:
public NodeVisitor {
4355 DotPrinter(std::ostream& os,
bool ignore_case)
4357 ignore_case_(ignore_case) {}
4358 void PrintNode(
const char* label, RegExpNode* node);
4359 void Visit(RegExpNode* node);
4360 void PrintAttributes(RegExpNode* from);
4361 void PrintOnFailure(RegExpNode* from, RegExpNode* to);
4362 #define DECLARE_VISIT(Type) \ 4363 virtual void Visit##Type(Type##Node* that); 4364 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
4365 #undef DECLARE_VISIT 4372 void DotPrinter::PrintNode(
const char* label, RegExpNode* node) {
4373 os_ <<
"digraph G {\n graph [label=\"";
4374 for (
int i = 0; label[
i];
i++) {
4389 os_ <<
"}" << std::endl;
4393 void DotPrinter::Visit(RegExpNode* node) {
4394 if (node->info()->visited)
return;
4395 node->info()->visited =
true;
4400 void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
4401 os_ <<
" n" << from <<
" -> n" << on_failure <<
" [style=dotted];\n";
4406 class TableEntryBodyPrinter {
4408 TableEntryBodyPrinter(std::ostream& os, ChoiceNode* choice)
4411 void Call(uc16 from, DispatchTable::Entry entry) {
4412 OutSet* out_set = entry.out_set();
4413 for (
unsigned i = 0;
i < OutSet::kFirstLimit;
i++) {
4414 if (out_set->Get(
i)) {
4415 os_ <<
" n" << choice() <<
":s" << from <<
"o" <<
i <<
" -> n" 4416 << choice()->alternatives()->at(
i).node() <<
";\n";
4421 ChoiceNode* choice() {
return choice_; }
4423 ChoiceNode* choice_;
4427 class TableEntryHeaderPrinter {
4429 explicit TableEntryHeaderPrinter(std::ostream& os)
4432 void Call(uc16 from, DispatchTable::Entry entry) {
4438 os_ <<
"{\\" << AsUC16(from) <<
"-\\" << AsUC16(entry.to()) <<
"|{";
4439 OutSet* out_set = entry.out_set();
4441 for (
unsigned i = 0;
i < OutSet::kFirstLimit;
i++) {
4442 if (out_set->Get(
i)) {
4443 if (priority > 0) os_ <<
"|";
4444 os_ <<
"<s" << from <<
"o" <<
i <<
"> " << priority;
4457 class AttributePrinter {
4459 explicit AttributePrinter(std::ostream& os)
4462 void PrintSeparator() {
4469 void PrintBit(
const char* name,
bool value) {
4472 os_ <<
"{" << name <<
"}";
4474 void PrintPositive(
const char* name,
int value) {
4475 if (value < 0)
return;
4477 os_ <<
"{" << name <<
"|" << value <<
"}";
4486 void DotPrinter::PrintAttributes(RegExpNode* that) {
4487 os_ <<
" a" << that <<
" [shape=Mrecord, color=grey, fontcolor=grey, " 4488 <<
"margin=0.1, fontsize=10, label=\"{";
4489 AttributePrinter printer(os_);
4490 NodeInfo* info = that->info();
4491 printer.PrintBit(
"NI", info->follows_newline_interest);
4492 printer.PrintBit(
"WI", info->follows_word_interest);
4493 printer.PrintBit(
"SI", info->follows_start_interest);
4494 Label* label = that->label();
4495 if (label->is_bound())
4496 printer.PrintPositive(
"@", label->pos());
4498 <<
" a" << that <<
" -> n" << that
4499 <<
" [style=dashed, color=grey, arrowhead=none];\n";
4503 static const bool kPrintDispatchTable =
false;
4504 void DotPrinter::VisitChoice(ChoiceNode* that) {
4505 if (kPrintDispatchTable) {
4506 os_ <<
" n" << that <<
" [shape=Mrecord, label=\"";
4507 TableEntryHeaderPrinter header_printer(os_);
4508 that->GetTable(ignore_case_)->ForEach(&header_printer);
4510 PrintAttributes(that);
4511 TableEntryBodyPrinter body_printer(os_, that);
4512 that->GetTable(ignore_case_)->ForEach(&body_printer);
4514 os_ <<
" n" << that <<
" [shape=Mrecord, label=\"?\"];\n";
4515 for (
int i = 0;
i < that->alternatives()->length();
i++) {
4516 GuardedAlternative alt = that->alternatives()->at(
i);
4517 os_ <<
" n" << that <<
" -> n" << alt.node();
4520 for (
int i = 0;
i < that->alternatives()->length();
i++) {
4521 GuardedAlternative alt = that->alternatives()->at(
i);
4522 alt.node()->Accept(
this);
4527 void DotPrinter::VisitText(TextNode* that) {
4528 Zone* zone = that->zone();
4529 os_ <<
" n" << that <<
" [label=\"";
4530 for (
int i = 0;
i < that->elements()->length();
i++) {
4531 if (
i > 0) os_ <<
" ";
4532 TextElement elm = that->elements()->at(
i);
4533 switch (elm.text_type()) {
4534 case TextElement::ATOM: {
4535 Vector<const uc16> data = elm.atom()->data();
4536 for (
int i = 0;
i < data.length();
i++) {
4537 os_ << static_cast<char>(data[
i]);
4541 case TextElement::CHAR_CLASS: {
4542 RegExpCharacterClass* node = elm.char_class();
4544 if (node->is_negated()) os_ <<
"^";
4545 for (
int j = 0; j < node->ranges(zone)->length(); j++) {
4546 CharacterRange range = node->ranges(zone)->at(j);
4547 os_ << AsUC16(range.from()) <<
"-" << AsUC16(range.to());
4556 os_ <<
"\", shape=box, peripheries=2];\n";
4557 PrintAttributes(that);
4558 os_ <<
" n" << that <<
" -> n" << that->on_success() <<
";\n";
4559 Visit(that->on_success());
4563 void DotPrinter::VisitBackReference(BackReferenceNode* that) {
4564 os_ <<
" n" << that <<
" [label=\"$" << that->start_register() <<
"..$" 4565 << that->end_register() <<
"\", shape=doubleoctagon];\n";
4566 PrintAttributes(that);
4567 os_ <<
" n" << that <<
" -> n" << that->on_success() <<
";\n";
4568 Visit(that->on_success());
4572 void DotPrinter::VisitEnd(EndNode* that) {
4573 os_ <<
" n" << that <<
" [style=bold, shape=point];\n";
4574 PrintAttributes(that);
4578 void DotPrinter::VisitAssertion(AssertionNode* that) {
4579 os_ <<
" n" << that <<
" [";
4580 switch (that->assertion_type()) {
4581 case AssertionNode::AT_END:
4582 os_ <<
"label=\"$\", shape=septagon";
4584 case AssertionNode::AT_START:
4585 os_ <<
"label=\"^\", shape=septagon";
4587 case AssertionNode::AT_BOUNDARY:
4588 os_ <<
"label=\"\\b\", shape=septagon";
4590 case AssertionNode::AT_NON_BOUNDARY:
4591 os_ <<
"label=\"\\B\", shape=septagon";
4593 case AssertionNode::AFTER_NEWLINE:
4594 os_ <<
"label=\"(?<=\\n)\", shape=septagon";
4598 PrintAttributes(that);
4599 RegExpNode* successor = that->on_success();
4600 os_ <<
" n" << that <<
" -> n" << successor <<
";\n";
4605 void DotPrinter::VisitAction(ActionNode* that) {
4606 os_ <<
" n" << that <<
" [";
4607 switch (that->action_type_) {
4608 case ActionNode::SET_REGISTER:
4609 os_ <<
"label=\"$" << that->data_.u_store_register.reg
4610 <<
":=" << that->data_.u_store_register.value <<
"\", shape=octagon";
4612 case ActionNode::INCREMENT_REGISTER:
4613 os_ <<
"label=\"$" << that->data_.u_increment_register.reg
4614 <<
"++\", shape=octagon";
4616 case ActionNode::STORE_POSITION:
4617 os_ <<
"label=\"$" << that->data_.u_position_register.reg
4618 <<
":=$pos\", shape=octagon";
4620 case ActionNode::BEGIN_SUBMATCH:
4621 os_ <<
"label=\"$" << that->data_.u_submatch.current_position_register
4622 <<
":=$pos,begin\", shape=septagon";
4624 case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
4625 os_ <<
"label=\"escape\", shape=septagon";
4627 case ActionNode::EMPTY_MATCH_CHECK:
4628 os_ <<
"label=\"$" << that->data_.u_empty_match_check.start_register
4629 <<
"=$pos?,$" << that->data_.u_empty_match_check.repetition_register
4630 <<
"<" << that->data_.u_empty_match_check.repetition_limit
4631 <<
"?\", shape=septagon";
4633 case ActionNode::CLEAR_CAPTURES: {
4634 os_ <<
"label=\"clear $" << that->data_.u_clear_captures.range_from
4635 <<
" to $" << that->data_.u_clear_captures.range_to
4636 <<
"\", shape=septagon";
4641 PrintAttributes(that);
4642 RegExpNode* successor = that->on_success();
4643 os_ <<
" n" << that <<
" -> n" << successor <<
";\n";
4648 class DispatchTableDumper {
4650 explicit DispatchTableDumper(std::ostream& os) : os_(os) {}
4651 void Call(uc16 key, DispatchTable::Entry entry);
4657 void DispatchTableDumper::Call(uc16 key, DispatchTable::Entry entry) {
4658 os_ <<
"[" << AsUC16(key) <<
"-" << AsUC16(entry.to()) <<
"]: {";
4659 OutSet*
set = entry.out_set();
4661 for (
unsigned i = 0;
i < OutSet::kFirstLimit;
i++) {
4675 void DispatchTable::Dump() {
4676 OFStream os(stderr);
4677 DispatchTableDumper dumper(os);
4678 tree()->ForEach(&dumper);
4682 void RegExpEngine::DotPrint(
const char* label,
4686 DotPrinter printer(os, ignore_case);
4687 printer.PrintNode(label, node);
4697 RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
4698 RegExpNode* on_success) {
4699 ZoneList<TextElement>* elms =
4700 new(compiler->zone()) ZoneList<TextElement>(1, compiler->zone());
4701 elms->Add(TextElement::Atom(
this), compiler->zone());
4702 return new (compiler->zone())
4703 TextNode(elms, compiler->read_backward(), on_success);
4707 RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
4708 RegExpNode* on_success) {
4709 return new (compiler->zone())
4710 TextNode(elements(), compiler->read_backward(), on_success);
4714 static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
4715 const int* special_class,
4718 DCHECK_EQ(kRangeEndMarker, special_class[length]);
4719 DCHECK_NE(0, ranges->length());
4720 DCHECK_NE(0, length);
4721 DCHECK_NE(0, special_class[0]);
4722 if (ranges->length() != (length >> 1) + 1) {
4725 CharacterRange range = ranges->at(0);
4726 if (range.from() != 0) {
4729 for (
int i = 0;
i < length;
i += 2) {
4730 if (special_class[
i] != (range.to() + 1)) {
4733 range = ranges->at((
i >> 1) + 1);
4734 if (special_class[
i+1] != range.from()) {
4738 if (range.to() != String::kMaxCodePoint) {
4745 static bool CompareRanges(ZoneList<CharacterRange>* ranges,
4746 const int* special_class,
4749 DCHECK_EQ(kRangeEndMarker, special_class[length]);
4750 if (ranges->length() * 2 != length) {
4753 for (
int i = 0;
i < length;
i += 2) {
4754 CharacterRange range = ranges->at(
i >> 1);
4755 if (range.from() != special_class[
i] ||
4756 range.to() != special_class[
i + 1] - 1) {
4764 bool RegExpCharacterClass::is_standard(Zone* zone) {
4770 if (set_.is_standard()) {
4773 if (CompareRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4774 set_.set_standard_set_type(
's');
4777 if (CompareInverseRanges(set_.ranges(zone), kSpaceRanges, kSpaceRangeCount)) {
4778 set_.set_standard_set_type(
'S');
4781 if (CompareInverseRanges(set_.ranges(zone),
4782 kLineTerminatorRanges,
4783 kLineTerminatorRangeCount)) {
4784 set_.set_standard_set_type(
'.');
4787 if (CompareRanges(set_.ranges(zone),
4788 kLineTerminatorRanges,
4789 kLineTerminatorRangeCount)) {
4790 set_.set_standard_set_type(
'n');
4793 if (CompareRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4794 set_.set_standard_set_type(
'w');
4797 if (CompareInverseRanges(set_.ranges(zone), kWordRanges, kWordRangeCount)) {
4798 set_.set_standard_set_type(
'W');
4805 UnicodeRangeSplitter::UnicodeRangeSplitter(Zone* zone,
4806 ZoneList<CharacterRange>* base)
4810 lead_surrogates_(nullptr),
4811 trail_surrogates_(nullptr),
4823 for (
int i = 0;
i < base->length();
i++) {
4824 table_.AddRange(base->at(
i), kBase, zone_);
4827 table_.AddRange(CharacterRange::Range(0, kLeadSurrogateStart - 1),
4828 kBmpCodePoints, zone_);
4829 table_.AddRange(CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd),
4830 kLeadSurrogates, zone_);
4832 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
4833 kTrailSurrogates, zone_);
4835 CharacterRange::Range(kTrailSurrogateEnd + 1, kNonBmpStart - 1),
4836 kBmpCodePoints, zone_);
4837 table_.AddRange(CharacterRange::Range(kNonBmpStart, kNonBmpEnd),
4838 kNonBmpCodePoints, zone_);
4839 table_.ForEach(
this);
4843 void UnicodeRangeSplitter::Call(uc32 from, DispatchTable::Entry entry) {
4844 OutSet* outset = entry.out_set();
4845 if (!outset->Get(kBase))
return;
4846 ZoneList<CharacterRange>** target =
nullptr;
4847 if (outset->Get(kBmpCodePoints)) {
4849 }
else if (outset->Get(kLeadSurrogates)) {
4850 target = &lead_surrogates_;
4851 }
else if (outset->Get(kTrailSurrogates)) {
4852 target = &trail_surrogates_;
4854 DCHECK(outset->Get(kNonBmpCodePoints));
4857 if (*target ==
nullptr)
4858 *target =
new (zone_) ZoneList<CharacterRange>(2, zone_);
4859 (*target)->Add(CharacterRange::Range(entry.from(), entry.to()), zone_);
4862 void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
4863 RegExpNode* on_success, UnicodeRangeSplitter* splitter) {
4864 ZoneList<CharacterRange>* bmp = splitter->bmp();
4865 if (bmp ==
nullptr)
return;
4866 JSRegExp::Flags default_flags = JSRegExp::Flags();
4867 result->AddAlternative(GuardedAlternative(TextNode::CreateForCharacterRanges(
4868 compiler->zone(), bmp, compiler->read_backward(), on_success,
4872 void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
4873 RegExpNode* on_success,
4874 UnicodeRangeSplitter* splitter) {
4875 ZoneList<CharacterRange>* non_bmp = splitter->non_bmp();
4876 if (non_bmp ==
nullptr)
return;
4877 DCHECK(!compiler->one_byte());
4878 Zone* zone = compiler->zone();
4879 JSRegExp::Flags default_flags = JSRegExp::Flags();
4880 CharacterRange::Canonicalize(non_bmp);
4881 for (
int i = 0;
i < non_bmp->length();
i++) {
4887 uc32 from = non_bmp->at(
i).from();
4888 uc32 to = non_bmp->at(
i).to();
4889 uc16 from_l = unibrow::Utf16::LeadSurrogate(from);
4890 uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
4891 uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
4892 uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
4893 if (from_l == to_l) {
4895 result->AddAlternative(
4896 GuardedAlternative(TextNode::CreateForSurrogatePair(
4897 zone, CharacterRange::Singleton(from_l),
4898 CharacterRange::Range(from_t, to_t), compiler->read_backward(),
4899 on_success, default_flags)));
4901 if (from_t != kTrailSurrogateStart) {
4903 result->AddAlternative(
4904 GuardedAlternative(TextNode::CreateForSurrogatePair(
4905 zone, CharacterRange::Singleton(from_l),
4906 CharacterRange::Range(from_t, kTrailSurrogateEnd),
4907 compiler->read_backward(), on_success, default_flags)));
4910 if (to_t != kTrailSurrogateEnd) {
4912 result->AddAlternative(
4913 GuardedAlternative(TextNode::CreateForSurrogatePair(
4914 zone, CharacterRange::Singleton(to_l),
4915 CharacterRange::Range(kTrailSurrogateStart, to_t),
4916 compiler->read_backward(), on_success, default_flags)));
4919 if (from_l <= to_l) {
4921 result->AddAlternative(
4922 GuardedAlternative(TextNode::CreateForSurrogatePair(
4923 zone, CharacterRange::Range(from_l, to_l),
4924 CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
4925 compiler->read_backward(), on_success, default_flags)));
4931 RegExpNode* NegativeLookaroundAgainstReadDirectionAndMatch(
4932 RegExpCompiler* compiler, ZoneList<CharacterRange>* lookbehind,
4933 ZoneList<CharacterRange>* match, RegExpNode* on_success,
bool read_backward,
4934 JSRegExp::Flags flags) {
4935 Zone* zone = compiler->zone();
4936 RegExpNode* match_node = TextNode::CreateForCharacterRanges(
4937 zone, match, read_backward, on_success, flags);
4938 int stack_register = compiler->UnicodeLookaroundStackRegister();
4939 int position_register = compiler->UnicodeLookaroundPositionRegister();
4940 RegExpLookaround::Builder lookaround(
false, match_node, stack_register,
4942 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
4943 zone, lookbehind, !read_backward, lookaround.on_match_success(), flags);
4944 return lookaround.ForMatch(negative_match);
4947 RegExpNode* MatchAndNegativeLookaroundInReadDirection(
4948 RegExpCompiler* compiler, ZoneList<CharacterRange>* match,
4949 ZoneList<CharacterRange>* lookahead, RegExpNode* on_success,
4950 bool read_backward, JSRegExp::Flags flags) {
4951 Zone* zone = compiler->zone();
4952 int stack_register = compiler->UnicodeLookaroundStackRegister();
4953 int position_register = compiler->UnicodeLookaroundPositionRegister();
4954 RegExpLookaround::Builder lookaround(
false, on_success, stack_register,
4956 RegExpNode* negative_match = TextNode::CreateForCharacterRanges(
4957 zone, lookahead, read_backward, lookaround.on_match_success(), flags);
4958 return TextNode::CreateForCharacterRanges(
4959 zone, match, read_backward, lookaround.ForMatch(negative_match), flags);
4962 void AddLoneLeadSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
4963 RegExpNode* on_success,
4964 UnicodeRangeSplitter* splitter) {
4965 JSRegExp::Flags default_flags = JSRegExp::Flags();
4966 ZoneList<CharacterRange>* lead_surrogates = splitter->lead_surrogates();
4967 if (lead_surrogates ==
nullptr)
return;
4968 Zone* zone = compiler->zone();
4970 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
4971 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
4974 if (compiler->read_backward()) {
4977 match = NegativeLookaroundAgainstReadDirectionAndMatch(
4978 compiler, trail_surrogates, lead_surrogates, on_success,
true,
4983 match = MatchAndNegativeLookaroundInReadDirection(
4984 compiler, lead_surrogates, trail_surrogates, on_success,
false,
4987 result->AddAlternative(GuardedAlternative(match));
4990 void AddLoneTrailSurrogates(RegExpCompiler* compiler, ChoiceNode* result,
4991 RegExpNode* on_success,
4992 UnicodeRangeSplitter* splitter) {
4993 JSRegExp::Flags default_flags = JSRegExp::Flags();
4994 ZoneList<CharacterRange>* trail_surrogates = splitter->trail_surrogates();
4995 if (trail_surrogates ==
nullptr)
return;
4996 Zone* zone = compiler->zone();
4998 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
4999 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
5002 if (compiler->read_backward()) {
5005 match = MatchAndNegativeLookaroundInReadDirection(
5006 compiler, trail_surrogates, lead_surrogates, on_success,
true,
5011 match = NegativeLookaroundAgainstReadDirectionAndMatch(
5012 compiler, lead_surrogates, trail_surrogates, on_success,
false,
5015 result->AddAlternative(GuardedAlternative(match));
5018 RegExpNode* UnanchoredAdvance(RegExpCompiler* compiler,
5019 RegExpNode* on_success) {
5021 DCHECK(!compiler->read_backward());
5022 Zone* zone = compiler->zone();
5027 ZoneList<CharacterRange>* range = CharacterRange::List(
5028 zone, CharacterRange::Range(0, String::kMaxUtf16CodeUnit));
5029 JSRegExp::Flags default_flags = JSRegExp::Flags();
5030 return TextNode::CreateForCharacterRanges(zone, range,
false, on_success,
5034 void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
5035 #ifdef V8_INTL_SUPPORT 5036 DCHECK(CharacterRange::IsCanonical(ranges));
5044 if (ranges->length() == 1 && ranges->at(0).IsEverything(kNonBmpEnd))
return;
5047 icu::UnicodeSet
set;
5048 for (
int i = 0;
i < ranges->length();
i++) {
5049 set.add(ranges->at(
i).from(), ranges->at(
i).to());
5052 set.closeOver(USET_CASE_INSENSITIVE);
5056 set.removeAllStrings();
5057 for (
int i = 0;
i <
set.getRangeCount();
i++) {
5058 ranges->Add(CharacterRange::Range(
set.getRangeStart(
i),
set.getRangeEnd(
i)),
5062 CharacterRange::Canonicalize(ranges);
5063 #endif // V8_INTL_SUPPORT 5067 RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
5068 RegExpNode* on_success) {
5069 set_.Canonicalize();
5070 Zone* zone = compiler->zone();
5071 ZoneList<CharacterRange>* ranges = this->ranges(zone);
5072 if (NeedsUnicodeCaseEquivalents(flags_)) {
5073 AddUnicodeCaseEquivalents(ranges, zone);
5075 if (IsUnicode(flags_) && !compiler->one_byte() &&
5076 !contains_split_surrogate()) {
5078 ZoneList<CharacterRange>* negated =
5079 new (zone) ZoneList<CharacterRange>(2, zone);
5080 CharacterRange::Negate(ranges, negated, zone);
5083 if (ranges->length() == 0) {
5084 JSRegExp::Flags default_flags;
5085 RegExpCharacterClass* fail =
5086 new (zone) RegExpCharacterClass(zone, ranges, default_flags);
5087 return new (zone) TextNode(fail, compiler->read_backward(), on_success);
5089 if (standard_type() ==
'*') {
5090 return UnanchoredAdvance(compiler, on_success);
5092 ChoiceNode* result =
new (zone) ChoiceNode(2, zone);
5093 UnicodeRangeSplitter splitter(zone, ranges);
5094 AddBmpCharacters(compiler, result, on_success, &splitter);
5095 AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
5096 AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
5097 AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
5101 return new (zone) TextNode(
this, compiler->read_backward(), on_success);
5106 int CompareFirstChar(RegExpTree*
const* a, RegExpTree*
const* b) {
5107 RegExpAtom* atom1 = (*a)->AsAtom();
5108 RegExpAtom* atom2 = (*b)->AsAtom();
5109 uc16 character1 = atom1->data().at(0);
5110 uc16 character2 = atom2->data().at(0);
5111 if (character1 < character2)
return -1;
5112 if (character1 > character2)
return 1;
5117 static unibrow::uchar Canonical(
5120 unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
5121 int length = canonicalize->get(c,
'\0', chars);
5122 DCHECK_LE(length, 1);
5123 unibrow::uchar canonical = c;
5124 if (length == 1) canonical = chars[0];
5129 int CompareFirstCharCaseIndependent(
5131 RegExpTree*
const* a, RegExpTree*
const* b) {
5132 RegExpAtom* atom1 = (*a)->AsAtom();
5133 RegExpAtom* atom2 = (*b)->AsAtom();
5134 unibrow::uchar character1 = atom1->data().at(0);
5135 unibrow::uchar character2 = atom2->data().at(0);
5136 if (character1 == character2)
return 0;
5137 if (character1 >=
'a' || character2 >=
'a') {
5138 character1 = Canonical(canonicalize, character1);
5139 character2 = Canonical(canonicalize, character2);
5141 return static_cast<int>(character1) - static_cast<int>(character2);
5148 bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
5149 ZoneList<RegExpTree*>* alternatives = this->alternatives();
5150 int length = alternatives->length();
5151 bool found_consecutive_atoms =
false;
5152 for (
int i = 0;
i < length;
i++) {
5153 while (
i < length) {
5154 RegExpTree* alternative = alternatives->at(
i);
5155 if (alternative->IsAtom())
break;
5159 if (
i == length)
break;
5161 JSRegExp::Flags flags = alternatives->at(
i)->AsAtom()->flags();
5163 while (
i < length) {
5164 RegExpTree* alternative = alternatives->at(
i);
5165 if (!alternative->IsAtom())
break;
5166 if (alternative->AsAtom()->flags() != flags)
break;
5175 DCHECK_LT(first_atom, alternatives->length());
5176 DCHECK_LE(
i, alternatives->length());
5177 DCHECK_LE(first_atom,
i);
5178 if (IgnoreCase(flags)) {
5180 compiler->isolate()->regexp_macro_assembler_canonicalize();
5181 auto compare_closure =
5182 [canonicalize](RegExpTree*
const* a, RegExpTree*
const* b) {
5183 return CompareFirstCharCaseIndependent(canonicalize, a, b);
5185 alternatives->StableSort(compare_closure, first_atom,
i - first_atom);
5187 alternatives->StableSort(CompareFirstChar, first_atom,
i - first_atom);
5189 if (
i - first_atom > 1) found_consecutive_atoms =
true;
5191 return found_consecutive_atoms;
5196 void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
5197 Zone* zone = compiler->zone();
5198 ZoneList<RegExpTree*>* alternatives = this->alternatives();
5199 int length = alternatives->length();
5203 while (
i < length) {
5204 RegExpTree* alternative = alternatives->at(
i);
5205 if (!alternative->IsAtom()) {
5206 alternatives->at(write_posn++) = alternatives->at(
i);
5210 RegExpAtom*
const atom = alternative->AsAtom();
5211 JSRegExp::Flags flags = atom->flags();
5212 unibrow::uchar common_prefix = atom->data().at(0);
5213 int first_with_prefix =
i;
5214 int prefix_length = atom->length();
5216 while (
i < length) {
5217 alternative = alternatives->at(
i);
5218 if (!alternative->IsAtom())
break;
5219 RegExpAtom*
const atom = alternative->AsAtom();
5220 if (atom->flags() != flags)
break;
5221 unibrow::uchar new_prefix = atom->data().at(0);
5222 if (new_prefix != common_prefix) {
5223 if (!IgnoreCase(flags))
break;
5225 compiler->isolate()->regexp_macro_assembler_canonicalize();
5226 new_prefix = Canonical(canonicalize, new_prefix);
5227 common_prefix = Canonical(canonicalize, common_prefix);
5228 if (new_prefix != common_prefix)
break;
5230 prefix_length = Min(prefix_length, atom->length());
5233 if (
i > first_with_prefix + 2) {
5239 int run_length =
i - first_with_prefix;
5240 RegExpAtom*
const atom = alternatives->at(first_with_prefix)->AsAtom();
5241 for (
int j = 1; j < run_length && prefix_length > 1; j++) {
5242 RegExpAtom* old_atom =
5243 alternatives->at(j + first_with_prefix)->AsAtom();
5244 for (
int k = 1; k < prefix_length; k++) {
5245 if (atom->data().at(k) != old_atom->data().at(k)) {
5251 RegExpAtom* prefix =
new (zone)
5252 RegExpAtom(atom->data().SubVector(0, prefix_length), flags);
5253 ZoneList<RegExpTree*>* pair =
new (zone) ZoneList<RegExpTree*>(2, zone);
5254 pair->Add(prefix, zone);
5255 ZoneList<RegExpTree*>* suffixes =
5256 new (zone) ZoneList<RegExpTree*>(run_length, zone);
5257 for (
int j = 0; j < run_length; j++) {
5258 RegExpAtom* old_atom =
5259 alternatives->at(j + first_with_prefix)->AsAtom();
5260 int len = old_atom->length();
5261 if (len == prefix_length) {
5262 suffixes->Add(
new (zone) RegExpEmpty(), zone);
5264 RegExpTree* suffix =
new (zone) RegExpAtom(
5265 old_atom->data().SubVector(prefix_length, old_atom->length()),
5267 suffixes->Add(suffix, zone);
5270 pair->Add(
new (zone) RegExpDisjunction(suffixes), zone);
5271 alternatives->at(write_posn++) =
new (zone) RegExpAlternative(pair);
5274 for (
int j = first_with_prefix; j <
i; j++) {
5275 alternatives->at(write_posn++) = alternatives->at(j);
5279 alternatives->Rewind(write_posn);
5284 void RegExpDisjunction::FixSingleCharacterDisjunctions(
5285 RegExpCompiler* compiler) {
5286 Zone* zone = compiler->zone();
5287 ZoneList<RegExpTree*>* alternatives = this->alternatives();
5288 int length = alternatives->length();
5292 while (
i < length) {
5293 RegExpTree* alternative = alternatives->at(
i);
5294 if (!alternative->IsAtom()) {
5295 alternatives->at(write_posn++) = alternatives->at(
i);
5299 RegExpAtom*
const atom = alternative->AsAtom();
5300 if (atom->length() != 1) {
5301 alternatives->at(write_posn++) = alternatives->at(
i);
5305 JSRegExp::Flags flags = atom->flags();
5306 DCHECK_IMPLIES(IsUnicode(flags),
5307 !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
5308 bool contains_trail_surrogate =
5309 unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
5310 int first_in_run =
i;
5314 while (
i < length) {
5315 alternative = alternatives->at(
i);
5316 if (!alternative->IsAtom())
break;
5317 RegExpAtom*
const atom = alternative->AsAtom();
5318 if (atom->length() != 1)
break;
5319 if (atom->flags() != flags)
break;
5320 DCHECK_IMPLIES(IsUnicode(flags),
5321 !unibrow::Utf16::IsLeadSurrogate(atom->data().at(0)));
5322 contains_trail_surrogate |=
5323 unibrow::Utf16::IsTrailSurrogate(atom->data().at(0));
5326 if (
i > first_in_run + 1) {
5328 int run_length =
i - first_in_run;
5329 ZoneList<CharacterRange>* ranges =
5330 new (zone) ZoneList<CharacterRange>(2, zone);
5331 for (
int j = 0; j < run_length; j++) {
5332 RegExpAtom* old_atom = alternatives->at(j + first_in_run)->AsAtom();
5333 DCHECK_EQ(old_atom->length(), 1);
5334 ranges->Add(CharacterRange::Singleton(old_atom->data().at(0)), zone);
5336 RegExpCharacterClass::CharacterClassFlags character_class_flags;
5337 if (IsUnicode(flags) && contains_trail_surrogate) {
5338 character_class_flags = RegExpCharacterClass::CONTAINS_SPLIT_SURROGATE;
5340 alternatives->at(write_posn++) =
new (zone)
5341 RegExpCharacterClass(zone, ranges, flags, character_class_flags);
5344 for (
int j = first_in_run; j <
i; j++) {
5345 alternatives->at(write_posn++) = alternatives->at(j);
5349 alternatives->Rewind(write_posn);
5353 RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
5354 RegExpNode* on_success) {
5355 ZoneList<RegExpTree*>* alternatives = this->alternatives();
5357 if (alternatives->length() > 2) {
5358 bool found_consecutive_atoms = SortConsecutiveAtoms(compiler);
5359 if (found_consecutive_atoms) RationalizeConsecutiveAtoms(compiler);
5360 FixSingleCharacterDisjunctions(compiler);
5361 if (alternatives->length() == 1) {
5362 return alternatives->at(0)->ToNode(compiler, on_success);
5366 int length = alternatives->length();
5368 ChoiceNode* result =
5369 new(compiler->zone()) ChoiceNode(length, compiler->zone());
5370 for (
int i = 0;
i < length;
i++) {
5371 GuardedAlternative alternative(alternatives->at(
i)->ToNode(compiler,
5373 result->AddAlternative(alternative);
5379 RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
5380 RegExpNode* on_success) {
5381 return ToNode(min(),
5394 static const int kMaxExpansionFactor = 6;
5396 : compiler_(compiler),
5397 saved_expansion_factor_(compiler->current_expansion_factor()),
5398 ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
5399 DCHECK_LT(0, factor);
5400 if (ok_to_expand_) {
5401 if (factor > kMaxExpansionFactor) {
5403 ok_to_expand_ =
false;
5404 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
5406 int new_factor = saved_expansion_factor_ * factor;
5407 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
5408 compiler->set_current_expansion_factor(new_factor);
5414 compiler_->set_current_expansion_factor(saved_expansion_factor_);
5417 bool ok_to_expand() {
return ok_to_expand_; }
5421 int saved_expansion_factor_;
5428 RegExpNode* RegExpQuantifier::ToNode(
int min,
5434 bool not_at_start) {
5455 static const int kMaxUnrolledMinMatches = 3;
5456 static const int kMaxUnrolledMaxMatches = 3;
5457 if (max == 0)
return on_success;
5458 bool body_can_be_empty = (body->min_match() == 0);
5459 int body_start_reg = RegExpCompiler::kNoRegister;
5460 Interval capture_registers = body->CaptureRegisters();
5461 bool needs_capture_clearing = !capture_registers.is_empty();
5462 Zone* zone = compiler->zone();
5464 if (body_can_be_empty) {
5465 body_start_reg = compiler->AllocateRegister();
5466 }
else if (compiler->optimize() && !needs_capture_clearing) {
5470 RegExpExpansionLimiter limiter(
5471 compiler, min + ((max != min) ? 1 : 0));
5472 if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
5473 int new_max = (max == kInfinity) ? max : max - min;
5476 RegExpNode* answer = ToNode(
5477 0, new_max, is_greedy, body, compiler, on_success,
true);
5481 for (
int i = 0;
i < min;
i++) {
5482 answer = body->ToNode(compiler, answer);
5487 if (max <= kMaxUnrolledMaxMatches && min == 0) {
5489 RegExpExpansionLimiter limiter(compiler, max);
5490 if (limiter.ok_to_expand()) {
5492 RegExpNode* answer = on_success;
5493 for (
int i = 0;
i < max;
i++) {
5494 ChoiceNode* alternation =
new(zone) ChoiceNode(2, zone);
5496 alternation->AddAlternative(
5497 GuardedAlternative(body->ToNode(compiler, answer)));
5498 alternation->AddAlternative(GuardedAlternative(on_success));
5500 alternation->AddAlternative(GuardedAlternative(on_success));
5501 alternation->AddAlternative(
5502 GuardedAlternative(body->ToNode(compiler, answer)));
5504 answer = alternation;
5505 if (not_at_start && !compiler->read_backward()) {
5506 alternation->set_not_at_start();
5513 bool has_min = min > 0;
5514 bool has_max = max < RegExpTree::kInfinity;
5515 bool needs_counter = has_min || has_max;
5516 int reg_ctr = needs_counter
5517 ? compiler->AllocateRegister()
5518 : RegExpCompiler::kNoRegister;
5519 LoopChoiceNode* center =
new (zone)
5520 LoopChoiceNode(body->min_match() == 0, compiler->read_backward(), zone);
5521 if (not_at_start && !compiler->read_backward()) center->set_not_at_start();
5522 RegExpNode* loop_return = needs_counter
5523 ?
static_cast<RegExpNode*
>(ActionNode::IncrementRegister(reg_ctr, center))
5524 : static_cast<RegExpNode*>(center);
5525 if (body_can_be_empty) {
5528 loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
5533 RegExpNode* body_node = body->ToNode(compiler, loop_return);
5534 if (body_can_be_empty) {
5537 body_node = ActionNode::StorePosition(body_start_reg,
false, body_node);
5539 if (needs_capture_clearing) {
5541 body_node = ActionNode::ClearCaptures(capture_registers, body_node);
5543 GuardedAlternative body_alt(body_node);
5546 new(zone) Guard(reg_ctr, Guard::LT, max);
5547 body_alt.AddGuard(body_guard, zone);
5549 GuardedAlternative rest_alt(on_success);
5551 Guard* rest_guard =
new(compiler->zone()) Guard(reg_ctr, Guard::GEQ, min);
5552 rest_alt.AddGuard(rest_guard, zone);
5555 center->AddLoopAlternative(body_alt);
5556 center->AddContinueAlternative(rest_alt);
5558 center->AddContinueAlternative(rest_alt);
5559 center->AddLoopAlternative(body_alt);
5561 if (needs_counter) {
5562 return ActionNode::SetRegister(reg_ctr, 0, center);
5571 RegExpNode* BoundaryAssertionAsLookaround(RegExpCompiler* compiler,
5572 RegExpNode* on_success,
5573 RegExpAssertion::AssertionType type,
5574 JSRegExp::Flags flags) {
5575 DCHECK(NeedsUnicodeCaseEquivalents(flags));
5576 Zone* zone = compiler->zone();
5577 ZoneList<CharacterRange>* word_range =
5578 new (zone) ZoneList<CharacterRange>(2, zone);
5579 CharacterRange::AddClassEscape(
'w', word_range,
true, zone);
5580 int stack_register = compiler->UnicodeLookaroundStackRegister();
5581 int position_register = compiler->UnicodeLookaroundPositionRegister();
5582 ChoiceNode* result =
new (zone) ChoiceNode(2, zone);
5585 for (
int i = 0;
i < 2;
i++) {
5586 bool lookbehind_for_word =
i == 0;
5587 bool lookahead_for_word =
5588 (type == RegExpAssertion::BOUNDARY) ^ lookbehind_for_word;
5590 RegExpLookaround::Builder lookbehind(lookbehind_for_word, on_success,
5591 stack_register, position_register);
5592 RegExpNode* backward = TextNode::CreateForCharacterRanges(
5593 zone, word_range,
true, lookbehind.on_match_success(), flags);
5595 RegExpLookaround::Builder lookahead(lookahead_for_word,
5596 lookbehind.ForMatch(backward),
5597 stack_register, position_register);
5598 RegExpNode* forward = TextNode::CreateForCharacterRanges(
5599 zone, word_range,
false, lookahead.on_match_success(), flags);
5600 result->AddAlternative(GuardedAlternative(lookahead.ForMatch(forward)));
5606 RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
5607 RegExpNode* on_success) {
5609 Zone* zone = compiler->zone();
5611 switch (assertion_type()) {
5613 return AssertionNode::AfterNewline(on_success);
5614 case START_OF_INPUT:
5615 return AssertionNode::AtStart(on_success);
5617 return NeedsUnicodeCaseEquivalents(flags_)
5618 ? BoundaryAssertionAsLookaround(compiler, on_success, BOUNDARY,
5620 : AssertionNode::AtBoundary(on_success);
5622 return NeedsUnicodeCaseEquivalents(flags_)
5623 ? BoundaryAssertionAsLookaround(compiler, on_success,
5624 NON_BOUNDARY, flags_)
5625 : AssertionNode::AtNonBoundary(on_success);
5627 return AssertionNode::AtEnd(on_success);
5632 int stack_pointer_register = compiler->AllocateRegister();
5633 int position_register = compiler->AllocateRegister();
5635 ChoiceNode* result =
new(zone) ChoiceNode(2, zone);
5637 ZoneList<CharacterRange>* newline_ranges =
5638 new(zone) ZoneList<CharacterRange>(3, zone);
5639 CharacterRange::AddClassEscape(
'n', newline_ranges,
false, zone);
5640 JSRegExp::Flags default_flags = JSRegExp::Flags();
5641 RegExpCharacterClass* newline_atom =
5642 new (zone) RegExpCharacterClass(
'n', default_flags);
5643 TextNode* newline_matcher =
new (zone) TextNode(
5644 newline_atom,
false, ActionNode::PositiveSubmatchSuccess(
5645 stack_pointer_register, position_register,
5650 RegExpNode* end_of_line = ActionNode::BeginSubmatch(
5651 stack_pointer_register,
5655 GuardedAlternative eol_alternative(end_of_line);
5656 result->AddAlternative(eol_alternative);
5657 GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
5658 result->AddAlternative(end_alternative);
5668 RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
5669 RegExpNode* on_success) {
5670 return new (compiler->zone())
5671 BackReferenceNode(RegExpCapture::StartRegister(index()),
5672 RegExpCapture::EndRegister(index()), flags_,
5673 compiler->read_backward(), on_success);
5677 RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
5678 RegExpNode* on_success) {
5683 RegExpLookaround::Builder::Builder(
bool is_positive, RegExpNode* on_success,
5684 int stack_pointer_register,
5685 int position_register,
5686 int capture_register_count,
5687 int capture_register_start)
5688 : is_positive_(is_positive),
5689 on_success_(on_success),
5690 stack_pointer_register_(stack_pointer_register),
5691 position_register_(position_register) {
5693 on_match_success_ = ActionNode::PositiveSubmatchSuccess(
5694 stack_pointer_register, position_register, capture_register_count,
5695 capture_register_start, on_success_);
5697 Zone* zone = on_success_->zone();
5698 on_match_success_ =
new (zone) NegativeSubmatchSuccess(
5699 stack_pointer_register, position_register, capture_register_count,
5700 capture_register_start, zone);
5705 RegExpNode* RegExpLookaround::Builder::ForMatch(RegExpNode* match) {
5707 return ActionNode::BeginSubmatch(stack_pointer_register_,
5708 position_register_, match);
5710 Zone* zone = on_success_->zone();
5716 ChoiceNode* choice_node =
new (zone) NegativeLookaroundChoiceNode(
5717 GuardedAlternative(match), GuardedAlternative(on_success_), zone);
5718 return ActionNode::BeginSubmatch(stack_pointer_register_,
5719 position_register_, choice_node);
5724 RegExpNode* RegExpLookaround::ToNode(RegExpCompiler* compiler,
5725 RegExpNode* on_success) {
5726 int stack_pointer_register = compiler->AllocateRegister();
5727 int position_register = compiler->AllocateRegister();
5729 const int registers_per_capture = 2;
5730 const int register_of_first_capture = 2;
5731 int register_count = capture_count_ * registers_per_capture;
5732 int register_start =
5733 register_of_first_capture + capture_from_ * registers_per_capture;
5736 bool was_reading_backward = compiler->read_backward();
5737 compiler->set_read_backward(type() == LOOKBEHIND);
5738 Builder builder(is_positive(), on_success, stack_pointer_register,
5739 position_register, register_count, register_start);
5740 RegExpNode* match = body_->ToNode(compiler, builder.on_match_success());
5741 result = builder.ForMatch(match);
5742 compiler->set_read_backward(was_reading_backward);
5747 RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
5748 RegExpNode* on_success) {
5749 return ToNode(body(), index(), compiler, on_success);
5753 RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
5755 RegExpCompiler* compiler,
5756 RegExpNode* on_success) {
5757 DCHECK_NOT_NULL(body);
5758 int start_reg = RegExpCapture::StartRegister(index);
5759 int end_reg = RegExpCapture::EndRegister(index);
5760 if (compiler->read_backward()) std::swap(start_reg, end_reg);
5761 RegExpNode* store_end = ActionNode::StorePosition(end_reg,
true, on_success);
5762 RegExpNode* body_node = body->ToNode(compiler, store_end);
5763 return ActionNode::StorePosition(start_reg,
true, body_node);
5767 RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
5768 RegExpNode* on_success) {
5769 ZoneList<RegExpTree*>* children = nodes();
5770 RegExpNode* current = on_success;
5771 if (compiler->read_backward()) {
5772 for (
int i = 0;
i < children->length();
i++) {
5773 current = children->at(
i)->ToNode(compiler, current);
5776 for (
int i = children->length() - 1;
i >= 0;
i--) {
5777 current = children->at(
i)->ToNode(compiler, current);
5784 static void AddClass(
const int* elmv,
5786 ZoneList<CharacterRange>* ranges,
5789 DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
5790 for (
int i = 0;
i < elmc;
i += 2) {
5791 DCHECK(elmv[
i] < elmv[
i + 1]);
5792 ranges->Add(CharacterRange::Range(elmv[
i], elmv[
i + 1] - 1), zone);
5797 static void AddClassNegated(
const int *elmv,
5799 ZoneList<CharacterRange>* ranges,
5802 DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
5803 DCHECK_NE(0x0000, elmv[0]);
5804 DCHECK_NE(String::kMaxCodePoint, elmv[elmc - 1]);
5806 for (
int i = 0;
i < elmc;
i += 2) {
5807 DCHECK(last <= elmv[
i] - 1);
5808 DCHECK(elmv[
i] < elmv[
i + 1]);
5809 ranges->Add(CharacterRange::Range(last, elmv[
i] - 1), zone);
5812 ranges->Add(CharacterRange::Range(last, String::kMaxCodePoint), zone);
5815 void CharacterRange::AddClassEscape(
char type, ZoneList<CharacterRange>* ranges,
5816 bool add_unicode_case_equivalents,
5818 if (add_unicode_case_equivalents && (type ==
'w' || type ==
'W')) {
5822 ZoneList<CharacterRange>* new_ranges =
5823 new (zone) ZoneList<CharacterRange>(2, zone);
5824 AddClass(kWordRanges, kWordRangeCount, new_ranges, zone);
5825 AddUnicodeCaseEquivalents(new_ranges, zone);
5827 ZoneList<CharacterRange>* negated =
5828 new (zone) ZoneList<CharacterRange>(2, zone);
5829 CharacterRange::Negate(new_ranges, negated, zone);
5830 new_ranges = negated;
5832 ranges->AddAll(*new_ranges, zone);
5835 AddClassEscape(type, ranges, zone);
5838 void CharacterRange::AddClassEscape(
char type, ZoneList<CharacterRange>* ranges,
5842 AddClass(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5845 AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges, zone);
5848 AddClass(kWordRanges, kWordRangeCount, ranges, zone);
5851 AddClassNegated(kWordRanges, kWordRangeCount, ranges, zone);
5854 AddClass(kDigitRanges, kDigitRangeCount, ranges, zone);
5857 AddClassNegated(kDigitRanges, kDigitRangeCount, ranges, zone);
5860 AddClassNegated(kLineTerminatorRanges,
5861 kLineTerminatorRangeCount,
5869 ranges->Add(CharacterRange::Everything(), zone);
5874 AddClass(kLineTerminatorRanges,
5875 kLineTerminatorRangeCount,
5885 Vector<const int> CharacterRange::GetWordBounds() {
5886 return Vector<const int>(kWordRanges, kWordRangeCount - 1);
5890 void CharacterRange::AddCaseEquivalents(Isolate* isolate, Zone* zone,
5891 ZoneList<CharacterRange>* ranges,
5893 CharacterRange::Canonicalize(ranges);
5894 int range_count = ranges->length();
5895 for (
int i = 0;
i < range_count;
i++) {
5896 CharacterRange range = ranges->at(
i);
5897 uc32 bottom = range.from();
5898 if (bottom > String::kMaxUtf16CodeUnit)
continue;
5899 uc32 top = Min(range.to(), String::kMaxUtf16CodeUnit);
5901 if (bottom >= kLeadSurrogateStart && top <= kTrailSurrogateEnd)
continue;
5902 if (is_one_byte && !RangeContainsLatin1Equivalents(range)) {
5903 if (bottom > String::kMaxOneByteCharCode)
continue;
5904 if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
5906 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5907 if (top == bottom) {
5909 int length = isolate->jsregexp_uncanonicalize()->get(bottom,
'\0', chars);
5910 for (
int i = 0;
i < length;
i++) {
5911 uc32 chr = chars[
i];
5912 if (chr != bottom) {
5913 ranges->Add(CharacterRange::Singleton(chars[
i]), zone);
5933 unibrow::uchar equivalents[unibrow::Ecma262UnCanonicalize::kMaxWidth];
5935 while (pos <= top) {
5937 isolate->jsregexp_canonrange()->get(pos,
'\0', equivalents);
5942 DCHECK_EQ(1, length);
5943 block_end = equivalents[0];
5945 int end = (block_end > top) ? top : block_end;
5946 length = isolate->jsregexp_uncanonicalize()->get(block_end,
'\0',
5948 for (
int i = 0;
i < length;
i++) {
5949 uc32 c = equivalents[
i];
5950 uc32 range_from = c - (block_end - pos);
5951 uc32 range_to = c - (block_end - end);
5952 if (!(bottom <= range_from && range_to <= top)) {
5953 ranges->Add(CharacterRange::Range(range_from, range_to), zone);
5963 bool CharacterRange::IsCanonical(ZoneList<CharacterRange>* ranges) {
5964 DCHECK_NOT_NULL(ranges);
5965 int n = ranges->length();
5966 if (n <= 1)
return true;
5967 int max = ranges->at(0).to();
5968 for (
int i = 1;
i < n;
i++) {
5969 CharacterRange next_range = ranges->at(
i);
5970 if (next_range.from() <= max + 1)
return false;
5971 max = next_range.to();
5977 ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
5978 if (ranges_ ==
nullptr) {
5979 ranges_ =
new(zone) ZoneList<CharacterRange>(2, zone);
5980 CharacterRange::AddClassEscape(standard_set_type_, ranges_,
false, zone);
5988 static void MoveRanges(ZoneList<CharacterRange>* list,
5994 for (
int i = count - 1;
i >= 0;
i--) {
5995 list->at(to +
i) = list->at(from +
i);
5998 for (
int i = 0;
i < count;
i++) {
5999 list->at(to +
i) = list->at(from +
i);
6005 static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list,
6007 CharacterRange insert) {
6013 uc32 from = insert.from();
6014 uc32 to = insert.to();
6016 int end_pos = count;
6017 for (
int i = count - 1;
i >= 0;
i--) {
6018 CharacterRange current = list->at(
i);
6019 if (current.from() > to + 1) {
6021 }
else if (current.to() + 1 < from) {
6034 if (start_pos == end_pos) {
6036 if (start_pos < count) {
6037 MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
6039 list->at(start_pos) = insert;
6042 if (start_pos + 1 == end_pos) {
6044 CharacterRange to_replace = list->at(start_pos);
6045 int new_from = Min(to_replace.from(), from);
6046 int new_to = Max(to_replace.to(), to);
6047 list->at(start_pos) = CharacterRange::Range(new_from, new_to);
6053 int new_from = Min(list->at(start_pos).from(), from);
6054 int new_to = Max(list->at(end_pos - 1).to(), to);
6055 if (end_pos < count) {
6056 MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
6058 list->at(start_pos) = CharacterRange::Range(new_from, new_to);
6059 return count - (end_pos - start_pos) + 1;
6063 void CharacterSet::Canonicalize() {
6066 if (ranges_ ==
nullptr)
return;
6067 CharacterRange::Canonicalize(ranges_);
6071 void CharacterRange::Canonicalize(ZoneList<CharacterRange>* character_ranges) {
6072 if (character_ranges->length() <= 1)
return;
6075 int n = character_ranges->length();
6076 int max = character_ranges->at(0).to();
6079 CharacterRange current = character_ranges->at(
i);
6080 if (current.from() <= max + 1) {
6095 int num_canonical =
i;
6097 num_canonical = InsertRangeInCanonicalList(character_ranges,
6099 character_ranges->at(read));
6102 character_ranges->Rewind(num_canonical);
6104 DCHECK(CharacterRange::IsCanonical(character_ranges));
6108 void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
6109 ZoneList<CharacterRange>* negated_ranges,
6111 DCHECK(CharacterRange::IsCanonical(ranges));
6112 DCHECK_EQ(0, negated_ranges->length());
6113 int range_count = ranges->length();
6116 if (range_count > 0 && ranges->at(0).from() == 0) {
6117 from = ranges->at(0).to() + 1;
6120 while (
i < range_count) {
6121 CharacterRange range = ranges->at(
i);
6122 negated_ranges->Add(CharacterRange::Range(from, range.from() - 1), zone);
6123 from = range.to() + 1;
6126 if (from < String::kMaxCodePoint) {
6127 negated_ranges->Add(CharacterRange::Range(from, String::kMaxCodePoint),
6137 OutSet* OutSet::Extend(
unsigned value, Zone* zone) {
6140 if (successors(zone) !=
nullptr) {
6141 for (
int i = 0;
i < successors(zone)->length();
i++) {
6142 OutSet* successor = successors(zone)->at(
i);
6143 if (successor->Get(value))
6147 successors_ =
new(zone) ZoneList<OutSet*>(2, zone);
6149 OutSet* result =
new(zone) OutSet(first_, remaining_);
6150 result->Set(value, zone);
6151 successors(zone)->Add(result, zone);
6156 void OutSet::Set(
unsigned value, Zone *zone) {
6157 if (value < kFirstLimit) {
6158 first_ |= (1 << value);
6160 if (remaining_ ==
nullptr)
6161 remaining_ =
new(zone) ZoneList<unsigned>(1, zone);
6162 if (remaining_->is_empty() || !remaining_->Contains(value))
6163 remaining_->Add(value, zone);
6168 bool OutSet::Get(
unsigned value)
const {
6169 if (value < kFirstLimit) {
6170 return (first_ & (1 << value)) != 0;
6171 }
else if (remaining_ ==
nullptr) {
6174 return remaining_->Contains(value);
6179 const uc32 DispatchTable::Config::kNoKey = unibrow::Utf8::kBadChar;
6182 void DispatchTable::AddRange(CharacterRange full_range,
int value,
6184 CharacterRange current = full_range;
6185 if (tree()->is_empty()) {
6187 ZoneSplayTree<Config>::Locator loc;
6188 bool inserted = tree()->Insert(current.from(), &loc);
6191 loc.set_value(Entry(current.from(), current.to(),
6192 empty()->Extend(value, zone)));
6197 ZoneSplayTree<Config>::Locator loc;
6198 if (tree()->FindGreatestLessThan(current.from(), &loc)) {
6199 Entry* entry = &loc.value();
6204 if (entry->from() < current.from() && entry->to() >= current.from()) {
6207 CharacterRange left =
6208 CharacterRange::Range(entry->from(), current.from() - 1);
6209 CharacterRange right = CharacterRange::Range(current.from(), entry->to());
6212 entry->set_to(left.to());
6216 ZoneSplayTree<Config>::Locator loc;
6217 bool inserted = tree()->Insert(right.from(), &loc);
6220 loc.set_value(Entry(right.from(),
6225 while (current.is_valid()) {
6226 if (tree()->FindLeastGreaterThan(current.from(), &loc) &&
6227 (loc.value().from() <= current.to()) &&
6228 (loc.value().to() >= current.from())) {
6229 Entry* entry = &loc.value();
6233 if (current.from() < entry->from()) {
6234 ZoneSplayTree<Config>::Locator ins;
6235 bool inserted = tree()->Insert(current.from(), &ins);
6238 ins.set_value(Entry(current.from(),
6240 empty()->Extend(value, zone)));
6241 current.set_from(entry->from());
6243 DCHECK_EQ(current.from(), entry->from());
6246 if (entry->to() > current.to()) {
6247 ZoneSplayTree<Config>::Locator ins;
6248 bool inserted = tree()->Insert(current.to() + 1, &ins);
6251 ins.set_value(Entry(current.to() + 1,
6254 entry->set_to(current.to());
6256 DCHECK(entry->to() <= current.to());
6260 entry->AddValue(value, zone);
6261 DCHECK(entry->to() + 1 > current.from());
6262 current.set_from(entry->to() + 1);
6265 ZoneSplayTree<Config>::Locator ins;
6266 bool inserted = tree()->Insert(current.from(), &ins);
6269 ins.set_value(Entry(current.from(),
6271 empty()->Extend(value, zone)));
6278 OutSet* DispatchTable::Get(uc32 value) {
6279 ZoneSplayTree<Config>::Locator loc;
6280 if (!tree()->FindGreatestLessThan(value, &loc))
6282 Entry* entry = &loc.value();
6283 if (value <= entry->to())
6284 return entry->out_set();
6294 void Analysis::EnsureAnalyzed(RegExpNode* that) {
6295 StackLimitCheck check(isolate());
6296 if (check.HasOverflowed()) {
6297 fail(
"Stack overflow");
6300 if (that->info()->been_analyzed || that->info()->being_analyzed)
6302 that->info()->being_analyzed =
true;
6304 that->info()->being_analyzed =
false;
6305 that->info()->been_analyzed =
true;
6309 void Analysis::VisitEnd(EndNode* that) {
6314 void TextNode::CalculateOffsets() {
6315 int element_count = elements()->length();
6319 for (
int i = 0;
i < element_count;
i++) {
6320 TextElement& elm = elements()->at(
i);
6321 elm.set_cp_offset(cp_offset);
6322 cp_offset += elm.length();
6327 void Analysis::VisitText(TextNode* that) {
6328 that->MakeCaseIndependent(isolate(), is_one_byte_);
6329 EnsureAnalyzed(that->on_success());
6330 if (!has_failed()) {
6331 that->CalculateOffsets();
6336 void Analysis::VisitAction(ActionNode* that) {
6337 RegExpNode* target = that->on_success();
6338 EnsureAnalyzed(target);
6339 if (!has_failed()) {
6342 that->info()->AddFromFollowing(target->info());
6347 void Analysis::VisitChoice(ChoiceNode* that) {
6348 NodeInfo* info = that->info();
6349 for (
int i = 0;
i < that->alternatives()->length();
i++) {
6350 RegExpNode* node = that->alternatives()->at(
i).node();
6351 EnsureAnalyzed(node);
6352 if (has_failed())
return;
6355 info->AddFromFollowing(node->info());
6360 void Analysis::VisitLoopChoice(LoopChoiceNode* that) {
6361 NodeInfo* info = that->info();
6362 for (
int i = 0;
i < that->alternatives()->length();
i++) {
6363 RegExpNode* node = that->alternatives()->at(
i).node();
6364 if (node != that->loop_node()) {
6365 EnsureAnalyzed(node);
6366 if (has_failed())
return;
6367 info->AddFromFollowing(node->info());
6372 EnsureAnalyzed(that->loop_node());
6373 if (!has_failed()) {
6374 info->AddFromFollowing(that->loop_node()->info());
6379 void Analysis::VisitBackReference(BackReferenceNode* that) {
6380 EnsureAnalyzed(that->on_success());
6384 void Analysis::VisitAssertion(AssertionNode* that) {
6385 EnsureAnalyzed(that->on_success());
6389 void BackReferenceNode::FillInBMInfo(Isolate* isolate,
int offset,
int budget,
6390 BoyerMooreLookahead* bm,
6391 bool not_at_start) {
6394 bm->SetRest(offset);
6395 SaveBMInfo(bm, not_at_start, offset);
6399 STATIC_ASSERT(BoyerMoorePositionInfo::kMapSize ==
6400 RegExpMacroAssembler::kTableSize);
6403 void ChoiceNode::FillInBMInfo(Isolate* isolate,
int offset,
int budget,
6404 BoyerMooreLookahead* bm,
bool not_at_start) {
6405 ZoneList<GuardedAlternative>* alts = alternatives();
6406 budget = (budget - 1) / alts->length();
6407 for (
int i = 0;
i < alts->length();
i++) {
6408 GuardedAlternative& alt = alts->at(
i);
6409 if (alt.guards() !=
nullptr && alt.guards()->length() != 0) {
6410 bm->SetRest(offset);
6411 SaveBMInfo(bm, not_at_start, offset);
6414 alt.node()->FillInBMInfo(isolate, offset, budget, bm, not_at_start);
6416 SaveBMInfo(bm, not_at_start, offset);
6420 void TextNode::FillInBMInfo(Isolate* isolate,
int initial_offset,
int budget,
6421 BoyerMooreLookahead* bm,
bool not_at_start) {
6422 if (initial_offset >= bm->length())
return;
6423 int offset = initial_offset;
6424 int max_char = bm->max_char();
6425 for (
int i = 0;
i < elements()->length();
i++) {
6426 if (offset >= bm->length()) {
6427 if (initial_offset == 0) set_bm_info(not_at_start, bm);
6430 TextElement text = elements()->at(
i);
6431 if (text.text_type() == TextElement::ATOM) {
6432 RegExpAtom* atom = text.atom();
6433 for (
int j = 0; j < atom->length(); j++, offset++) {
6434 if (offset >= bm->length()) {
6435 if (initial_offset == 0) set_bm_info(not_at_start, bm);
6438 uc16 character = atom->data()[j];
6439 if (IgnoreCase(atom->flags())) {
6440 unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
6441 int length = GetCaseIndependentLetters(
6442 isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
6444 for (
int j = 0; j < length; j++) {
6445 bm->Set(offset, chars[j]);
6448 if (character <= max_char) bm->Set(offset, character);
6452 DCHECK_EQ(TextElement::CHAR_CLASS, text.text_type());
6453 RegExpCharacterClass* char_class = text.char_class();
6454 ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
6455 if (char_class->is_negated()) {
6458 for (
int k = 0; k < ranges->length(); k++) {
6459 CharacterRange& range = ranges->at(k);
6460 if (range.from() > max_char)
continue;
6461 int to = Min(max_char, static_cast<int>(range.to()));
6462 bm->SetInterval(offset, Interval(range.from(), to));
6468 if (offset >= bm->length()) {
6469 if (initial_offset == 0) set_bm_info(not_at_start, bm);
6472 on_success()->FillInBMInfo(isolate, offset, budget - 1, bm,
6474 if (initial_offset == 0) set_bm_info(not_at_start, bm);
6482 void DispatchTableConstructor::VisitEnd(EndNode* that) {
6483 AddRange(CharacterRange::Everything());
6487 void DispatchTableConstructor::BuildTable(ChoiceNode* node) {
6488 node->set_being_calculated(
true);
6489 ZoneList<GuardedAlternative>* alternatives = node->alternatives();
6490 for (
int i = 0;
i < alternatives->length();
i++) {
6491 set_choice_index(
i);
6492 alternatives->at(
i).node()->Accept(
this);
6494 node->set_being_calculated(
false);
6501 : constructor_(constructor) { }
6509 constructor_->AddRange(CharacterRange::Range(from, entry.to()));
6513 void DispatchTableConstructor::VisitChoice(ChoiceNode* node) {
6514 if (node->being_calculated())
6516 DispatchTable* table = node->GetTable(ignore_case_);
6517 AddDispatchRange adder(
this);
6518 table->ForEach(&adder);
6522 void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
6525 AddRange(CharacterRange::Everything());
6529 void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
6530 RegExpNode* target = that->on_success();
6531 target->Accept(
this);
6535 static int CompareRangeByFrom(
const CharacterRange* a,
6536 const CharacterRange* b) {
6537 return Compare<uc16>(a->from(), b->from());
6541 void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
6542 ranges->Sort(CompareRangeByFrom);
6544 for (
int i = 0;
i < ranges->length();
i++) {
6545 CharacterRange range = ranges->at(
i);
6546 if (last < range.from())
6547 AddRange(CharacterRange::Range(last, range.from() - 1));
6548 if (range.to() >= last) {
6549 if (range.to() == String::kMaxCodePoint) {
6552 last = range.to() + 1;
6556 AddRange(CharacterRange::Range(last, String::kMaxCodePoint));
6560 void DispatchTableConstructor::VisitText(TextNode* that) {
6561 TextElement elm = that->elements()->at(0);
6562 switch (elm.text_type()) {
6563 case TextElement::ATOM: {
6564 uc16 c = elm.atom()->data()[0];
6565 AddRange(CharacterRange::Range(c, c));
6568 case TextElement::CHAR_CLASS: {
6569 RegExpCharacterClass* tree = elm.char_class();
6570 ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
6571 if (tree->is_negated()) {
6574 for (
int i = 0;
i < ranges->length();
i++)
6575 AddRange(ranges->at(
i));
6586 void DispatchTableConstructor::VisitAction(ActionNode* that) {
6587 RegExpNode* target = that->on_success();
6588 target->Accept(
this);
6591 RegExpNode* OptionallyStepBackToLeadSurrogate(RegExpCompiler* compiler,
6592 RegExpNode* on_success,
6593 JSRegExp::Flags flags) {
6596 DCHECK(!compiler->read_backward());
6597 Zone* zone = compiler->zone();
6598 ZoneList<CharacterRange>* lead_surrogates = CharacterRange::List(
6599 zone, CharacterRange::Range(kLeadSurrogateStart, kLeadSurrogateEnd));
6600 ZoneList<CharacterRange>* trail_surrogates = CharacterRange::List(
6601 zone, CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd));
6603 ChoiceNode* optional_step_back =
new (zone) ChoiceNode(2, zone);
6605 int stack_register = compiler->UnicodeLookaroundStackRegister();
6606 int position_register = compiler->UnicodeLookaroundPositionRegister();
6607 RegExpNode* step_back = TextNode::CreateForCharacterRanges(
6608 zone, lead_surrogates,
true, on_success, flags);
6609 RegExpLookaround::Builder builder(
true, step_back, stack_register,
6611 RegExpNode* match_trail = TextNode::CreateForCharacterRanges(
6612 zone, trail_surrogates,
false, builder.on_match_success(), flags);
6614 optional_step_back->AddAlternative(
6615 GuardedAlternative(builder.ForMatch(match_trail)));
6616 optional_step_back->AddAlternative(GuardedAlternative(on_success));
6618 return optional_step_back;
6622 RegExpEngine::CompilationResult RegExpEngine::Compile(
6623 Isolate* isolate, Zone* zone, RegExpCompileData* data,
6624 JSRegExp::Flags flags, Handle<String> pattern,
6625 Handle<String> sample_subject,
bool is_one_byte) {
6626 if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
6627 return IrregexpRegExpTooBig(isolate);
6629 bool is_sticky = IsSticky(flags);
6630 bool is_global = IsGlobal(flags);
6631 bool is_unicode = IsUnicode(flags);
6632 RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
6634 if (compiler.optimize())
6635 compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
6638 static const int kSampleSize = 128;
6640 sample_subject = String::Flatten(isolate, sample_subject);
6641 int chars_sampled = 0;
6642 int half_way = (sample_subject->length() - kSampleSize) / 2;
6643 for (
int i = Max(0, half_way);
6644 i < sample_subject->length() && chars_sampled < kSampleSize;
6645 i++, chars_sampled++) {
6646 compiler.frequency_collator()->CountCharacter(sample_subject->Get(
i));
6650 RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
6654 RegExpNode* node = captured_body;
6655 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
6656 bool is_start_anchored = data->tree->IsAnchoredAtStart();
6657 int max_length = data->tree->max_match();
6658 if (!is_start_anchored && !is_sticky) {
6661 JSRegExp::Flags default_flags = JSRegExp::Flags();
6662 RegExpNode* loop_node = RegExpQuantifier::ToNode(
6663 0, RegExpTree::kInfinity,
false,
6664 new (zone) RegExpCharacterClass(
'*', default_flags), &compiler,
6665 captured_body, data->contains_anchor);
6667 if (data->contains_anchor) {
6670 ChoiceNode* first_step_node =
new(zone) ChoiceNode(2, zone);
6671 first_step_node->AddAlternative(GuardedAlternative(captured_body));
6672 first_step_node->AddAlternative(GuardedAlternative(
new (zone) TextNode(
6673 new (zone) RegExpCharacterClass(
'*', default_flags),
false,
6675 node = first_step_node;
6681 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
6684 if (node !=
nullptr) {
6685 node = node->FilterOneByte(RegExpCompiler::kMaxRecursion);
6687 }
else if (is_unicode && (is_global || is_sticky)) {
6688 node = OptionallyStepBackToLeadSurrogate(&compiler, node, flags);
6691 if (node ==
nullptr) node =
new (zone) EndNode(EndNode::BACKTRACK, zone);
6693 Analysis analysis(isolate, is_one_byte);
6694 analysis.EnsureAnalyzed(node);
6695 if (analysis.has_failed()) {
6696 const char* error_message = analysis.error_message();
6697 return CompilationResult(isolate, error_message);
6701 #ifndef V8_INTERPRETED_REGEXP 6704 NativeRegExpMacroAssembler::Mode mode =
6705 is_one_byte ? NativeRegExpMacroAssembler::LATIN1
6706 : NativeRegExpMacroAssembler::UC16;
6708 #if V8_TARGET_ARCH_IA32 6709 RegExpMacroAssemblerIA32 macro_assembler(isolate, zone, mode,
6710 (data->capture_count + 1) * 2);
6711 #elif V8_TARGET_ARCH_X64 6712 RegExpMacroAssemblerX64 macro_assembler(isolate, zone, mode,
6713 (data->capture_count + 1) * 2);
6714 #elif V8_TARGET_ARCH_ARM 6715 RegExpMacroAssemblerARM macro_assembler(isolate, zone, mode,
6716 (data->capture_count + 1) * 2);
6717 #elif V8_TARGET_ARCH_ARM64 6718 RegExpMacroAssemblerARM64 macro_assembler(isolate, zone, mode,
6719 (data->capture_count + 1) * 2);
6720 #elif V8_TARGET_ARCH_S390 6721 RegExpMacroAssemblerS390 macro_assembler(isolate, zone, mode,
6722 (data->capture_count + 1) * 2);
6723 #elif V8_TARGET_ARCH_PPC 6724 RegExpMacroAssemblerPPC macro_assembler(isolate, zone, mode,
6725 (data->capture_count + 1) * 2);
6726 #elif V8_TARGET_ARCH_MIPS 6727 RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode,
6728 (data->capture_count + 1) * 2);
6729 #elif V8_TARGET_ARCH_MIPS64 6730 RegExpMacroAssemblerMIPS macro_assembler(isolate, zone, mode,
6731 (data->capture_count + 1) * 2);
6733 #error "Unsupported architecture" 6736 #else // V8_INTERPRETED_REGEXP 6738 EmbeddedVector<byte, 1024> codes;
6739 RegExpMacroAssemblerIrregexp macro_assembler(isolate, codes, zone);
6740 #endif // V8_INTERPRETED_REGEXP 6742 macro_assembler.set_slow_safe(TooMuchRegExpCode(isolate, pattern));
6746 static const int kMaxBacksearchLimit = 1024;
6747 if (is_end_anchored && !is_start_anchored && !is_sticky &&
6748 max_length < kMaxBacksearchLimit) {
6749 macro_assembler.SetCurrentPositionFromEnd(max_length);
6753 RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
6754 if (data->tree->min_match() > 0) {
6755 mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
6756 }
else if (is_unicode) {
6757 mode = RegExpMacroAssembler::GLOBAL_UNICODE;
6759 macro_assembler.set_global_mode(mode);
6762 return compiler.Assemble(isolate, ¯o_assembler, node, data->capture_count,
6766 bool RegExpEngine::TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
6767 Heap* heap = isolate->heap();
6768 bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
6769 if (heap->isolate()->total_regexp_code_generated() >
6770 RegExpImpl::kRegExpCompiledLimit &&
6771 heap->CommittedMemoryExecutable() >
6772 RegExpImpl::kRegExpExecutableMemoryLimit) {
6778 Object* RegExpResultsCache::Lookup(Heap* heap, String key_string,
6779 Object* key_pattern,
6780 FixedArray* last_match_cache,
6781 ResultsCacheType type) {
6783 if (!key_string->IsInternalizedString())
return Smi::kZero;
6784 if (type == STRING_SPLIT_SUBSTRINGS) {
6785 DCHECK(key_pattern->IsString());
6786 if (!key_pattern->IsInternalizedString())
return Smi::kZero;
6787 cache = heap->string_split_cache();
6789 DCHECK(type == REGEXP_MULTIPLE_INDICES);
6790 DCHECK(key_pattern->IsFixedArray());
6791 cache = heap->regexp_multiple_cache();
6794 uint32_t hash = key_string->Hash();
6795 uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
6796 ~(kArrayEntriesPerCacheEntry - 1));
6797 if (cache->get(index + kStringOffset) != key_string ||
6798 cache->get(index + kPatternOffset) != key_pattern) {
6800 ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
6801 if (cache->get(index + kStringOffset) != key_string ||
6802 cache->get(index + kPatternOffset) != key_pattern) {
6807 *last_match_cache = FixedArray::cast(cache->get(index + kLastMatchOffset));
6808 return cache->get(index + kArrayOffset);
6811 void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
6812 Handle<Object> key_pattern,
6813 Handle<FixedArray> value_array,
6814 Handle<FixedArray> last_match_cache,
6815 ResultsCacheType type) {
6816 Factory* factory = isolate->factory();
6817 Handle<FixedArray> cache;
6818 if (!key_string->IsInternalizedString())
return;
6819 if (type == STRING_SPLIT_SUBSTRINGS) {
6820 DCHECK(key_pattern->IsString());
6821 if (!key_pattern->IsInternalizedString())
return;
6822 cache = factory->string_split_cache();
6824 DCHECK(type == REGEXP_MULTIPLE_INDICES);
6825 DCHECK(key_pattern->IsFixedArray());
6826 cache = factory->regexp_multiple_cache();
6829 uint32_t hash = key_string->Hash();
6830 uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
6831 ~(kArrayEntriesPerCacheEntry - 1));
6832 if (cache->get(index + kStringOffset) == Smi::kZero) {
6833 cache->set(index + kStringOffset, *key_string);
6834 cache->set(index + kPatternOffset, *key_pattern);
6835 cache->set(index + kArrayOffset, *value_array);
6836 cache->set(index + kLastMatchOffset, *last_match_cache);
6839 ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
6840 if (cache->get(index2 + kStringOffset) == Smi::kZero) {
6841 cache->set(index2 + kStringOffset, *key_string);
6842 cache->set(index2 + kPatternOffset, *key_pattern);
6843 cache->set(index2 + kArrayOffset, *value_array);
6844 cache->set(index2 + kLastMatchOffset, *last_match_cache);
6846 cache->set(index2 + kStringOffset, Smi::kZero);
6847 cache->set(index2 + kPatternOffset, Smi::kZero);
6848 cache->set(index2 + kArrayOffset, Smi::kZero);
6849 cache->set(index2 + kLastMatchOffset, Smi::kZero);
6850 cache->set(index + kStringOffset, *key_string);
6851 cache->set(index + kPatternOffset, *key_pattern);
6852 cache->set(index + kArrayOffset, *value_array);
6853 cache->set(index + kLastMatchOffset, *last_match_cache);
6858 if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) {
6859 for (
int i = 0;
i < value_array->length();
i++) {
6860 Handle<String> str(String::cast(value_array->get(
i)), isolate);
6861 Handle<String> internalized_str = factory->InternalizeString(str);
6862 value_array->set(
i, *internalized_str);
6866 value_array->set_map_no_write_barrier(
6867 ReadOnlyRoots(isolate).fixed_cow_array_map());
6870 void RegExpResultsCache::Clear(FixedArray cache) {
6871 for (
int i = 0;
i < kRegExpResultsCacheSize;
i++) {
6872 cache->set(
i, Smi::kZero);