5 #ifndef V8_REGEXP_JSREGEXP_H_ 6 #define V8_REGEXP_JSREGEXP_H_ 8 #include "src/allocation.h" 9 #include "src/isolate.h" 10 #include "src/objects/js-regexp.h" 11 #include "src/regexp/regexp-ast.h" 12 #include "src/regexp/regexp-macro-assembler.h" 19 class RegExpMacroAssembler;
22 class BoyerMooreLookahead;
24 inline bool IgnoreCase(JSRegExp::Flags flags) {
25 return (flags & JSRegExp::kIgnoreCase) != 0;
28 inline bool IsUnicode(JSRegExp::Flags flags) {
29 return (flags & JSRegExp::kUnicode) != 0;
32 inline bool IsSticky(JSRegExp::Flags flags) {
33 return (flags & JSRegExp::kSticky) != 0;
36 inline bool IsGlobal(JSRegExp::Flags flags) {
37 return (flags & JSRegExp::kGlobal) != 0;
40 inline bool DotAll(JSRegExp::Flags flags) {
41 return (flags & JSRegExp::kDotAll) != 0;
44 inline bool Multiline(JSRegExp::Flags flags) {
45 return (flags & JSRegExp::kMultiline) != 0;
48 inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
51 return IsUnicode(flags) && IgnoreCase(flags);
57 static bool UsesNativeRegExp() {
58 #ifdef V8_INTERPRETED_REGEXP 87 int capture_register_count);
101 enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
149 V8_INLINE int32_t* FetchNext();
151 V8_INLINE int32_t* LastSuccessfulMatch();
153 V8_INLINE
bool HasException() {
return num_matches_ < 0; }
156 int AdvanceZeroLength(
int last_index);
160 int current_match_index_;
161 int registers_per_match_;
163 int32_t* register_array_;
164 int register_array_size_;
171 static int IrregexpMaxRegisterCount(
FixedArray re);
172 static void SetIrregexpMaxRegisterCount(
FixedArray re,
int value);
173 static void SetIrregexpCaptureNameMap(
FixedArray re,
175 static int IrregexpNumberOfCaptures(
FixedArray re);
176 static int IrregexpNumberOfRegisters(
FixedArray re);
185 static const size_t kRegExpExecutableMemoryLimit = 16 * MB;
186 static const size_t kRegExpCompiledLimit = 1 * MB;
187 static const int kRegExpTooLargeToOptimize = 20 * KB;
192 static inline bool EnsureCompiledIrregexp(
Isolate* isolate,
201 enum ElementInSetsRelation {
213 OutSet() : first_(0), remaining_(
nullptr), successors_(
nullptr) {}
215 bool Get(
unsigned value)
const;
216 static const unsigned kFirstLimit = 32;
222 void Set(
unsigned value,
Zone* zone);
230 : first_(first), remaining_(remaining), successors_(
nullptr) {}
246 Entry() : from_(0), to_(0), out_set_(
nullptr) {}
248 : from_(from), to_(to), out_set_(out_set) {
251 uc32 from() {
return from_; }
252 uc32 to() {
return to_; }
253 void set_to(uc32 value) { to_ = value; }
254 void AddValue(
int value,
Zone* zone) {
255 out_set_ = out_set_->Extend(value, zone);
257 OutSet* out_set() {
return out_set_; }
268 static const uc32 kNoKey;
269 static const Entry NoValue() {
return Value(); }
270 static inline int Compare(uc32 a, uc32 b) {
284 template <
typename Callback>
285 void ForEach(Callback* callback) {
286 return tree()->ForEach(callback);
292 OutSet* empty() {
return &empty_; }
294 ZoneSplayTree<Config>* tree() {
return &tree_; }
295 ZoneSplayTree<Config> tree_;
311 static const int kBase = 0;
313 static const int kBmpCodePoints = 1;
314 static const int kLeadSurrogates = 2;
315 static const int kTrailSurrogates = 3;
316 static const int kNonBmpCodePoints = 4;
327 #define FOR_EACH_NODE_TYPE(VISIT) \ 331 VISIT(BackReference) \ 343 : being_analyzed(
false),
344 been_analyzed(
false),
345 follows_word_interest(
false),
346 follows_newline_interest(
false),
347 follows_start_interest(
false),
350 replacement_calculated(
false) { }
355 return (at_end == that->at_end) &&
356 (follows_word_interest == that->follows_word_interest) &&
357 (follows_newline_interest == that->follows_newline_interest) &&
358 (follows_start_interest == that->follows_start_interest);
363 void AddFromPreceding(
NodeInfo* that) {
364 at_end |= that->at_end;
365 follows_word_interest |= that->follows_word_interest;
366 follows_newline_interest |= that->follows_newline_interest;
367 follows_start_interest |= that->follows_start_interest;
370 bool HasLookbehind() {
371 return follows_word_interest ||
372 follows_newline_interest ||
373 follows_start_interest;
378 void AddFromFollowing(
NodeInfo* that) {
379 follows_word_interest |= that->follows_word_interest;
380 follows_newline_interest |= that->follows_newline_interest;
381 follows_start_interest |= that->follows_start_interest;
384 void ResetCompilationState() {
385 being_analyzed =
false;
386 been_analyzed =
false;
389 bool being_analyzed: 1;
390 bool been_analyzed: 1;
394 bool follows_word_interest: 1;
395 bool follows_newline_interest: 1;
396 bool follows_start_interest: 1;
400 bool replacement_calculated: 1;
412 cannot_match_(
false) { }
414 : characters_(characters),
417 cannot_match_(
false) { }
418 bool Rationalize(
bool one_byte);
422 void Advance(
int by,
bool one_byte);
424 bool cannot_match() {
return cannot_match_; }
425 void set_cannot_match() { cannot_match_ =
true; }
427 Position() : mask(0), value(0), determines_perfectly(
false) { }
430 bool determines_perfectly;
432 int characters() {
return characters_; }
433 void set_characters(
int characters) { characters_ = characters; }
434 Position* positions(
int index) {
436 DCHECK_GT(characters_, index);
437 return positions_ + index;
446 Position positions_[4];
456 extern int kUninitializedRegExpNodePlaceHolder;
462 : replacement_(
nullptr),
463 on_work_list_(
false),
466 bm_info_[0] = bm_info_[1] =
nullptr;
479 virtual int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start) = 0;
484 Trace* bounds_check_trace,
486 bool preload_has_checked_bounds,
487 Label* on_possible_success,
489 bool fall_through_on_failure);
496 int characters_filled_in,
497 bool not_at_start) = 0;
498 static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
499 virtual int GreedyLoopTextLength() {
return kNodeIsTooComplexForGreedyLoops; }
502 virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
512 static const int kRecursionBudget = 200;
514 virtual void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
522 virtual RegExpNode* FilterOneByte(
int depth) {
return this; }
525 DCHECK(info()->replacement_calculated);
529 info()->replacement_calculated =
true;
530 replacement_ = replacement;
539 if (offset == 0) set_bm_info(not_at_start, bm);
542 Label* label() {
return &label_; }
548 static const int kMaxCopiesCodeGenerated = 10;
550 bool on_work_list() {
return on_work_list_; }
551 void set_on_work_list(
bool value) { on_work_list_ = value; }
556 return bm_info_[not_at_start ? 1 : 0];
559 Zone* zone()
const {
return zone_; }
562 enum LimitResult { DONE, CONTINUE };
568 bm_info_[not_at_start ? 1 : 0] = bm;
572 static const int kFirstCharBudget = 10;
591 :
RegExpNode(on_success->zone()), on_success_(on_success) { }
592 RegExpNode* on_success() {
return on_success_; }
593 void set_on_success(
RegExpNode* node) { on_success_ = node; }
594 RegExpNode* FilterOneByte(
int depth)
override;
595 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
597 on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
598 if (offset == 0) set_bm_info(not_at_start, bm);
616 POSITIVE_SUBMATCH_SUCCESS,
626 static ActionNode* BeginSubmatch(
int stack_pointer_reg,
629 static ActionNode* PositiveSubmatchSuccess(
int stack_pointer_reg,
631 int clear_capture_count,
632 int clear_capture_from,
634 static ActionNode* EmptyMatchCheck(
int start_register,
635 int repetition_register,
636 int repetition_limit,
640 int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start)
override;
643 bool not_at_start)
override {
644 return on_success()->GetQuickCheckDetails(
645 details, compiler, filled_in, not_at_start);
647 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
649 ActionType action_type() {
return action_type_; }
651 int GreedyLoopTextLength()
override {
652 return kNodeIsTooComplexForGreedyLoops;
663 } u_increment_register;
667 } u_position_register;
669 int stack_pointer_register;
670 int current_position_register;
671 int clear_register_count;
672 int clear_register_from;
676 int repetition_register;
677 int repetition_limit;
678 } u_empty_match_check;
686 action_type_(action_type) { }
687 ActionType action_type_;
688 friend class DotPrinter;
696 :
SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
701 read_backward_(read_backward) {
702 elms_->Add(TextElement::CharClass(that), zone());
719 int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start)
override;
722 bool not_at_start)
override;
724 bool read_backward() {
return read_backward_; }
725 void MakeCaseIndependent(
Isolate* isolate,
bool is_one_byte);
726 int GreedyLoopTextLength()
override;
729 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
731 void CalculateOffsets();
732 RegExpNode* FilterOneByte(
int depth)
override;
735 enum TextEmitPassType {
737 SIMPLE_CHARACTER_MATCH,
738 NON_LETTER_CHARACTER_MATCH,
739 CASE_CHARACTER_MATCH,
740 CHARACTER_CLASS_MATCH
742 static bool SkipPass(TextEmitPassType pass,
bool ignore_case);
743 static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
744 static const int kLastPass = CHARACTER_CLASS_MATCH;
746 TextEmitPassType pass,
749 bool first_element_checked,
767 return new(on_success->zone())
AssertionNode(AT_END, on_success);
770 return new(on_success->zone())
AssertionNode(AT_START, on_success);
773 return new(on_success->zone())
AssertionNode(AT_BOUNDARY, on_success);
776 return new(on_success->zone())
AssertionNode(AT_NON_BOUNDARY, on_success);
779 return new(on_success->zone())
AssertionNode(AFTER_NEWLINE, on_success);
783 int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start)
override;
786 bool not_at_start)
override;
787 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
789 AssertionType assertion_type() {
return assertion_type_; }
793 enum IfPrevious { kIsNonWord, kIsWord };
796 IfPrevious backtrack_if_previous);
799 AssertionType assertion_type_;
808 start_reg_(start_reg),
811 read_backward_(read_backward) {}
813 int start_register() {
return start_reg_; }
814 int end_register() {
return end_reg_; }
815 bool read_backward() {
return read_backward_; }
817 int EatsAtLeast(
int still_to_find,
int recursion_depth,
818 bool not_at_start)
override;
821 bool not_at_start)
override {
824 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
837 enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
841 int EatsAtLeast(
int still_to_find,
int recursion_depth,
842 bool not_at_start)
override {
847 bool not_at_start)
override {
851 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
866 int clear_capture_count,
867 int clear_capture_start,
869 :
EndNode(NEGATIVE_SUBMATCH_SUCCESS, zone),
870 stack_pointer_register_(stack_pointer_reg),
871 current_position_register_(position_reg),
872 clear_capture_count_(clear_capture_count),
873 clear_capture_start_(clear_capture_start) { }
877 int stack_pointer_register_;
878 int current_position_register_;
879 int clear_capture_count_;
880 int clear_capture_start_;
886 enum Relation { LT, GEQ };
887 Guard(
int reg, Relation op,
int value)
891 int reg() {
return reg_; }
892 Relation op() {
return op_; }
893 int value() {
return value_; }
905 : node_(node), guards_(
nullptr) {}
908 void set_node(
RegExpNode* node) { node_ = node; }
924 alternatives_(
new (zone)
927 not_at_start_(
false),
928 being_calculated_(
false) {}
931 alternatives()->Add(node, zone());
936 int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start)
override;
937 int EatsAtLeastHelper(
int still_to_find,
943 bool not_at_start)
override;
944 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
947 bool being_calculated() {
return being_calculated_; }
948 bool not_at_start() {
return not_at_start_; }
949 void set_not_at_start() { not_at_start_ =
true; }
950 void set_being_calculated(
bool b) { being_calculated_ = b; }
951 virtual bool try_to_emit_quick_check_for_alternative(
bool is_first) {
954 RegExpNode* FilterOneByte(
int depth)
override;
955 virtual bool read_backward() {
return false; }
967 int CalculatePreloadCharacters(
RegExpCompiler* compiler,
int eats_at_least);
972 int preload_characters,
973 bool next_expects_preload);
975 Trace* current_trace,
977 void AssertGuardsMentionRegisters(
Trace* trace);
994 bool being_calculated_;
1004 AddAlternative(this_must_fail);
1005 AddAlternative(then_do_this);
1007 int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start)
override;
1010 bool not_at_start)
override;
1011 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
1013 alternatives_->at(1).node()->FillInBMInfo(isolate, offset, budget - 1, bm,
1015 if (offset == 0) set_bm_info(not_at_start, bm);
1022 bool try_to_emit_quick_check_for_alternative(
bool is_first)
override {
1025 RegExpNode* FilterOneByte(
int depth)
override;
1033 loop_node_(
nullptr),
1034 continue_node_(
nullptr),
1035 body_can_be_zero_length_(body_can_be_zero_length),
1036 read_backward_(read_backward) {}
1040 int EatsAtLeast(
int still_to_find,
int budget,
bool not_at_start)
override;
1043 bool not_at_start)
override;
1044 void FillInBMInfo(
Isolate* isolate,
int offset,
int budget,
1046 RegExpNode* loop_node() {
return loop_node_; }
1047 RegExpNode* continue_node() {
return continue_node_; }
1048 bool body_can_be_zero_length() {
return body_can_be_zero_length_; }
1049 bool read_backward()
override {
return read_backward_; }
1051 RegExpNode* FilterOneByte(
int depth)
override;
1058 ChoiceNode::AddAlternative(node);
1063 bool body_can_be_zero_length_;
1064 bool read_backward_;
1093 enum ContainedInLattice {
1101 inline ContainedInLattice Combine(ContainedInLattice a, ContainedInLattice b) {
1102 return static_cast<ContainedInLattice
>(a | b);
1106 ContainedInLattice AddRange(ContainedInLattice a,
1109 Interval new_range);
1120 surrogate_(kNotYet) {
1121 for (
int i = 0;
i < kMapSize;
i++) {
1122 map_->Add(
false, zone);
1126 bool& at(
int i) {
return map_->at(
i); }
1128 static const int kMapSize = 128;
1129 static const int kMask = kMapSize - 1;
1131 int map_count()
const {
return map_count_; }
1133 void Set(
int character);
1134 void SetInterval(
const Interval& interval);
1136 bool is_non_word() {
return w_ == kLatticeOut; }
1137 bool is_word() {
return w_ == kLatticeIn; }
1142 ContainedInLattice w_;
1143 ContainedInLattice s_;
1144 ContainedInLattice d_;
1145 ContainedInLattice surrogate_;
1153 int length() {
return length_; }
1154 int max_char() {
return max_char_; }
1157 int Count(
int map_number) {
1158 return bitmaps_->at(map_number)->map_count();
1163 void Set(
int map_number,
int character) {
1164 if (character > max_char_)
return;
1166 info->Set(character);
1169 void SetInterval(
int map_number,
const Interval& interval) {
1170 if (interval.from() > max_char_)
return;
1172 if (interval.to() > max_char_) {
1173 info->SetInterval(
Interval(interval.from(), max_char_));
1175 info->SetInterval(interval);
1179 void SetAll(
int map_number) {
1180 bitmaps_->at(map_number)->SetAll();
1183 void SetRest(
int from_map) {
1184 for (
int i = from_map;
i < length_;
i++) SetAll(
i);
1199 int GetSkipTable(
int min_lookahead,
1202 bool FindWorthwhileInterval(
int* from,
int* to);
1203 int FindBestInterval(
1204 int max_number_of_chars,
int old_biggest_points,
int* from,
int* to);
1224 UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1
1230 : action_type_(action_type), reg_(reg), next_(
nullptr) {}
1232 bool Mentions(
int reg);
1233 int reg() {
return reg_; }
1234 ActionNode::ActionType action_type() {
return action_type_; }
1236 ActionNode::ActionType action_type_;
1246 cp_offset_(trace->cp_offset()),
1247 is_capture_(is_capture) { }
1248 int cp_offset() {
return cp_offset_; }
1249 bool is_capture() {
return is_capture_; }
1253 void set_cp_offset(
int cp_offset) { cp_offset_ = cp_offset; }
1261 int value() {
return value_; }
1271 Interval range() {
return range_; }
1285 backtrack_(nullptr),
1286 stop_node_(nullptr),
1287 loop_label_(nullptr),
1288 characters_preloaded_(0),
1289 bound_checked_up_to_(0),
1291 at_start_(UNKNOWN) {}
1298 int cp_offset() {
return cp_offset_; }
1299 DeferredAction* actions() {
return actions_; }
1311 return backtrack_ ==
nullptr && actions_ ==
nullptr && cp_offset_ == 0 &&
1312 characters_preloaded_ == 0 && bound_checked_up_to_ == 0 &&
1313 quick_check_performed_.characters() == 0 && at_start_ == UNKNOWN;
1315 TriBool at_start() {
return at_start_; }
1316 void set_at_start(TriBool at_start) { at_start_ = at_start; }
1317 Label* backtrack() {
return backtrack_; }
1318 Label* loop_label() {
return loop_label_; }
1319 RegExpNode* stop_node() {
return stop_node_; }
1320 int characters_preloaded() {
return characters_preloaded_; }
1321 int bound_checked_up_to() {
return bound_checked_up_to_; }
1322 int flush_budget() {
return flush_budget_; }
1323 QuickCheckDetails* quick_check_performed() {
return &quick_check_performed_; }
1324 bool mentions_reg(
int reg);
1328 bool GetStoredPosition(
int reg,
int* cp_offset);
1331 void add_action(DeferredAction* new_action) {
1332 DCHECK(new_action->next_ ==
nullptr);
1333 new_action->next_ = actions_;
1334 actions_ = new_action;
1336 void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
1337 void set_stop_node(RegExpNode* node) { stop_node_ = node; }
1338 void set_loop_label(Label* label) { loop_label_ = label; }
1339 void set_characters_preloaded(
int count) { characters_preloaded_ = count; }
1340 void set_bound_checked_up_to(
int to) { bound_checked_up_to_ = to; }
1341 void set_flush_budget(
int to) { flush_budget_ = to; }
1342 void set_quick_check_performed(QuickCheckDetails* d) {
1343 quick_check_performed_ = *d;
1345 void InvalidateCurrentCharacter();
1346 void AdvanceCurrentPositionInTrace(
int by, RegExpCompiler* compiler);
1349 int FindAffectedRegisters(OutSet* affected_registers, Zone* zone);
1350 void PerformDeferredActions(RegExpMacroAssembler* macro,
1352 const OutSet& affected_registers,
1353 OutSet* registers_to_pop,
1354 OutSet* registers_to_clear,
1356 void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
1358 const OutSet& registers_to_pop,
1359 const OutSet& registers_to_clear);
1361 DeferredAction* actions_;
1363 RegExpNode* stop_node_;
1365 int characters_preloaded_;
1366 int bound_checked_up_to_;
1367 QuickCheckDetails quick_check_performed_;
1377 Label* label() {
return &label_; }
1378 Trace* counter_backtrack_trace() {
return &counter_backtrack_trace_; }
1382 Trace counter_backtrack_trace_;
1387 static const int kEatsAtLeastNotYetInitialized = -1;
1388 bool preload_is_current_;
1389 bool preload_has_checked_bounds_;
1390 int preload_characters_;
1393 eats_at_least_ = kEatsAtLeastNotYetInitialized;
1401 #define DECLARE_VISIT(Type) \ 1402 virtual void Visit##Type(Type##Node* that) = 0; 1403 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1404 #undef DECLARE_VISIT 1405 virtual void VisitLoopChoice(
LoopChoiceNode* that) { VisitChoice(that); }
1417 ignore_case_(ignore_case),
1423 table()->AddRange(range, choice_index_, zone_);
1428 #define DECLARE_VISIT(Type) \ 1429 virtual void Visit##Type(Type##Node* that); 1430 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1431 #undef DECLARE_VISIT 1434 void set_choice_index(
int value) { choice_index_ = value; }
1459 : isolate_(isolate), is_one_byte_(is_one_byte), error_message_(
nullptr) {}
1462 #define DECLARE_VISIT(Type) void Visit##Type(Type##Node* that) override; 1463 FOR_EACH_NODE_TYPE(DECLARE_VISIT)
1464 #undef DECLARE_VISIT 1467 bool has_failed() {
return error_message_ !=
nullptr; }
1468 const char* error_message() {
1469 DCHECK(error_message_ !=
nullptr);
1470 return error_message_;
1472 void fail(
const char* error_message) {
1473 error_message_ = error_message;
1476 Isolate* isolate()
const {
return isolate_; }
1481 const char* error_message_;
1483 DISALLOW_IMPLICIT_CONSTRUCTORS(
Analysis);
1492 contains_anchor(
false),
1497 bool contains_anchor;
1509 : code(code), num_registers(registers) {}
1510 const char*
const error_message =
nullptr;
1512 int const num_registers = 0;
1524 static void DotPrint(
const char* label,
RegExpNode* node,
bool ignore_case);
1530 enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
1542 static const int kRegExpResultsCacheSize = 0x100;
1545 static const int kArrayEntriesPerCacheEntry = 4;
1546 static const int kStringOffset = 0;
1547 static const int kPatternOffset = 1;
1548 static const int kArrayOffset = 2;
1549 static const int kLastMatchOffset = 3;
1555 #endif // V8_REGEXP_JSREGEXP_H_