5 #ifndef V8_REGEXP_REGEXP_PARSER_H_ 6 #define V8_REGEXP_REGEXP_PARSER_H_ 8 #include "src/objects.h" 9 #include "src/objects/js-regexp.h" 10 #include "src/regexp/regexp-ast.h" 11 #include "src/zone/zone.h" 16 struct RegExpCompileData;
24 template <
typename T,
int initial_size>
32 void Add(T* value,
Zone* zone) {
33 if (last_ !=
nullptr) {
34 if (list_ ==
nullptr) {
37 list_->Add(last_, zone);
43 DCHECK(last_ !=
nullptr);
48 DCHECK(last_ !=
nullptr);
50 if ((list_ !=
nullptr) && (list_->length() > 0))
51 last_ = list_->RemoveLast();
58 DCHECK((0 <=
i) && (
i < length()));
59 if (list_ ==
nullptr) {
63 if (
i == list_->length()) {
64 DCHECK(last_ !=
nullptr);
78 int length = (list_ ==
nullptr) ? 0 : list_->length();
79 return length + ((last_ ==
nullptr) ? 0 : 1);
83 if (list_ ==
nullptr) {
86 if (last_ !=
nullptr) {
87 list_->Add(last_, zone);
103 void AddCharacter(uc16 character);
104 void AddUnicodeCharacter(uc32 character);
105 void AddEscapedUnicodeCharacter(uc32 character);
110 void AddCharacterClassForDesugaring(uc32 c);
114 void NewAlternative();
115 bool AddQuantifierToAtom(
int min,
int max,
116 RegExpQuantifier::QuantifierType
type);
122 bool ignore_case()
const {
return (flags_ & JSRegExp::kIgnoreCase) != 0; }
123 bool multiline()
const {
return (flags_ & JSRegExp::kMultiline) != 0; }
124 bool dotall()
const {
return (flags_ & JSRegExp::kDotAll) != 0; }
127 static const uc16 kNoPendingSurrogate = 0;
128 void AddLeadSurrogate(uc16 lead_surrogate);
129 void AddTrailSurrogate(uc16 trail_surrogate);
130 void FlushPendingSurrogate();
131 void FlushCharacters();
134 bool NeedsDesugaringForIgnoreCase(uc32 c);
135 Zone* zone()
const {
return zone_; }
136 bool unicode()
const {
return (flags_ & JSRegExp::kUnicode) != 0; }
142 uc16 pending_surrogate_;
147 enum { ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM } last_added_;
148 #define LAST(x) last_added_ = x; 168 bool ParseIntervalQuantifier(
int* min_out,
int* max_out);
172 uc32 ParseClassCharacterEscape();
176 bool ParseHexEscape(
int length, uc32* value);
177 bool ParseUnicodeEscape(uc32* value);
178 bool ParseUnlimitedLengthHexNumber(
int max_value, uc32* value);
180 bool ParsePropertyClassName(std::vector<char>* name_1,
181 std::vector<char>* name_2);
183 const std::vector<char>& name_1,
184 const std::vector<char>& name_2);
186 RegExpTree* GetPropertySequence(
const std::vector<char>& name_1);
189 uc32 ParseOctalLiteral();
195 bool ParseBackReferenceIndex(
int* index_out);
200 bool add_unicode_case_equivalents, uc32* char_out,
201 bool* is_class_escape);
203 char ParseClassEscape();
207 void Advance(
int dist);
213 bool contains_anchor() {
return contains_anchor_; }
214 void set_contains_anchor() { contains_anchor_ =
true; }
215 int captures_started() {
return captures_started_; }
216 int position() {
return next_pos_ - 1; }
217 bool failed() {
return failed_; }
220 bool unicode()
const {
return (top_level_flags_ & JSRegExp::kUnicode) != 0; }
222 static bool IsSyntaxCharacterOrSlash(uc32 c);
224 static const int kMaxCaptures = 1 << 16;
225 static const uc32 kEndMarker = (1 << 21);
228 enum SubexpressionType {
239 RegExpParserState(RegExpParserState* previous_state,
240 SubexpressionType group_type,
241 RegExpLookaround::Type lookaround_type,
242 int disjunction_capture_index,
245 : previous_state_(previous_state),
247 group_type_(group_type),
248 lookaround_type_(lookaround_type),
249 disjunction_capture_index_(disjunction_capture_index),
250 capture_name_(capture_name) {}
252 RegExpParserState* previous_state()
const {
return previous_state_; }
253 bool IsSubexpression() {
return previous_state_ !=
nullptr; }
257 SubexpressionType group_type()
const {
return group_type_; }
259 RegExpLookaround::Type lookaround_type()
const {
return lookaround_type_; }
263 int capture_index()
const {
return disjunction_capture_index_; }
268 bool IsNamedCapture()
const {
return capture_name_ !=
nullptr; }
271 bool IsInsideCaptureGroup(
int index);
277 RegExpParserState*
const previous_state_;
281 const SubexpressionType group_type_;
283 const RegExpLookaround::Type lookaround_type_;
285 const int disjunction_capture_index_;
303 RegExpParserState* state);
304 RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
309 void PatchNamedBackReferences();
315 bool HasNamedCaptures();
317 Isolate* isolate() {
return isolate_; }
318 Zone* zone()
const {
return zone_; }
320 uc32 current() {
return current_; }
321 bool has_more() {
return has_more_; }
322 bool has_next() {
return next_pos_ < in()->length(); }
324 template <
bool update_position>
327 void ScanForCaptures();
342 int captures_started_;
346 bool contains_anchor_;
347 bool is_scanned_for_captures_;
348 bool has_named_captures_;
355 #endif // V8_REGEXP_REGEXP_PARSER_H_