V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
regexp-ast.cc
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/ostreams.h"
6 #include "src/regexp/regexp-ast.h"
7 
8 namespace v8 {
9 namespace internal {
10 
11 #define MAKE_ACCEPT(Name) \
12  void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13  return visitor->Visit##Name(this, data); \
14  }
15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16 #undef MAKE_ACCEPT
17 
18 #define MAKE_TYPE_CASE(Name) \
19  RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
20  bool RegExpTree::Is##Name() { return false; }
21 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22 #undef MAKE_TYPE_CASE
23 
24 #define MAKE_TYPE_CASE(Name) \
25  RegExp##Name* RegExp##Name::As##Name() { return this; } \
26  bool RegExp##Name::Is##Name() { return true; }
27 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28 #undef MAKE_TYPE_CASE
29 
30 
31 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
32  Interval result = Interval::Empty();
33  for (int i = 0; i < children->length(); i++)
34  result = result.Union(children->at(i)->CaptureRegisters());
35  return result;
36 }
37 
38 
39 Interval RegExpAlternative::CaptureRegisters() {
40  return ListCaptureRegisters(nodes());
41 }
42 
43 
44 Interval RegExpDisjunction::CaptureRegisters() {
45  return ListCaptureRegisters(alternatives());
46 }
47 
48 
49 Interval RegExpLookaround::CaptureRegisters() {
50  return body()->CaptureRegisters();
51 }
52 
53 
54 Interval RegExpCapture::CaptureRegisters() {
55  Interval self(StartRegister(index()), EndRegister(index()));
56  return self.Union(body()->CaptureRegisters());
57 }
58 
59 
60 Interval RegExpQuantifier::CaptureRegisters() {
61  return body()->CaptureRegisters();
62 }
63 
64 
65 bool RegExpAssertion::IsAnchoredAtStart() {
66  return assertion_type() == RegExpAssertion::START_OF_INPUT;
67 }
68 
69 
70 bool RegExpAssertion::IsAnchoredAtEnd() {
71  return assertion_type() == RegExpAssertion::END_OF_INPUT;
72 }
73 
74 
75 bool RegExpAlternative::IsAnchoredAtStart() {
76  ZoneList<RegExpTree*>* nodes = this->nodes();
77  for (int i = 0; i < nodes->length(); i++) {
78  RegExpTree* node = nodes->at(i);
79  if (node->IsAnchoredAtStart()) {
80  return true;
81  }
82  if (node->max_match() > 0) {
83  return false;
84  }
85  }
86  return false;
87 }
88 
89 
90 bool RegExpAlternative::IsAnchoredAtEnd() {
91  ZoneList<RegExpTree*>* nodes = this->nodes();
92  for (int i = nodes->length() - 1; i >= 0; i--) {
93  RegExpTree* node = nodes->at(i);
94  if (node->IsAnchoredAtEnd()) {
95  return true;
96  }
97  if (node->max_match() > 0) {
98  return false;
99  }
100  }
101  return false;
102 }
103 
104 
105 bool RegExpDisjunction::IsAnchoredAtStart() {
106  ZoneList<RegExpTree*>* alternatives = this->alternatives();
107  for (int i = 0; i < alternatives->length(); i++) {
108  if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
109  }
110  return true;
111 }
112 
113 
114 bool RegExpDisjunction::IsAnchoredAtEnd() {
115  ZoneList<RegExpTree*>* alternatives = this->alternatives();
116  for (int i = 0; i < alternatives->length(); i++) {
117  if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
118  }
119  return true;
120 }
121 
122 
123 bool RegExpLookaround::IsAnchoredAtStart() {
124  return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
125 }
126 
127 
128 bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
129 
130 
131 bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
132 
133 
134 // Convert regular expression trees to a simple sexp representation.
135 // This representation should be different from the input grammar
136 // in as many cases as possible, to make it more difficult for incorrect
137 // parses to look as correct ones which is likely if the input and
138 // output formats are alike.
139 class RegExpUnparser final : public RegExpVisitor {
140  public:
141  RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
142  void VisitCharacterRange(CharacterRange that);
143 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
144  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
145 #undef MAKE_CASE
146  private:
147  std::ostream& os_;
148  Zone* zone_;
149 };
150 
151 
152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
153  os_ << "(|";
154  for (int i = 0; i < that->alternatives()->length(); i++) {
155  os_ << " ";
156  that->alternatives()->at(i)->Accept(this, data);
157  }
158  os_ << ")";
159  return nullptr;
160 }
161 
162 
163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
164  os_ << "(:";
165  for (int i = 0; i < that->nodes()->length(); i++) {
166  os_ << " ";
167  that->nodes()->at(i)->Accept(this, data);
168  }
169  os_ << ")";
170  return nullptr;
171 }
172 
173 
174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
175  os_ << AsUC32(that.from());
176  if (!that.IsSingleton()) {
177  os_ << "-" << AsUC32(that.to());
178  }
179 }
180 
181 
182 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
183  void* data) {
184  if (that->is_negated()) os_ << "^";
185  os_ << "[";
186  for (int i = 0; i < that->ranges(zone_)->length(); i++) {
187  if (i > 0) os_ << " ";
188  VisitCharacterRange(that->ranges(zone_)->at(i));
189  }
190  os_ << "]";
191  return nullptr;
192 }
193 
194 
195 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
196  switch (that->assertion_type()) {
197  case RegExpAssertion::START_OF_INPUT:
198  os_ << "@^i";
199  break;
200  case RegExpAssertion::END_OF_INPUT:
201  os_ << "@$i";
202  break;
203  case RegExpAssertion::START_OF_LINE:
204  os_ << "@^l";
205  break;
206  case RegExpAssertion::END_OF_LINE:
207  os_ << "@$l";
208  break;
209  case RegExpAssertion::BOUNDARY:
210  os_ << "@b";
211  break;
212  case RegExpAssertion::NON_BOUNDARY:
213  os_ << "@B";
214  break;
215  }
216  return nullptr;
217 }
218 
219 
220 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
221  os_ << "'";
222  Vector<const uc16> chardata = that->data();
223  for (int i = 0; i < chardata.length(); i++) {
224  os_ << AsUC16(chardata[i]);
225  }
226  os_ << "'";
227  return nullptr;
228 }
229 
230 
231 void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
232  if (that->elements()->length() == 1) {
233  that->elements()->at(0).tree()->Accept(this, data);
234  } else {
235  os_ << "(!";
236  for (int i = 0; i < that->elements()->length(); i++) {
237  os_ << " ";
238  that->elements()->at(i).tree()->Accept(this, data);
239  }
240  os_ << ")";
241  }
242  return nullptr;
243 }
244 
245 
246 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
247  os_ << "(# " << that->min() << " ";
248  if (that->max() == RegExpTree::kInfinity) {
249  os_ << "- ";
250  } else {
251  os_ << that->max() << " ";
252  }
253  os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
254  that->body()->Accept(this, data);
255  os_ << ")";
256  return nullptr;
257 }
258 
259 
260 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
261  os_ << "(^ ";
262  that->body()->Accept(this, data);
263  os_ << ")";
264  return nullptr;
265 }
266 
267 void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
268  os_ << "(?: ";
269  that->body()->Accept(this, data);
270  os_ << ")";
271  return nullptr;
272 }
273 
274 void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
275  os_ << "(";
276  os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
277  os_ << (that->is_positive() ? " + " : " - ");
278  that->body()->Accept(this, data);
279  os_ << ")";
280  return nullptr;
281 }
282 
283 
284 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
285  void* data) {
286  os_ << "(<- " << that->index() << ")";
287  return nullptr;
288 }
289 
290 
291 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
292  os_ << '%';
293  return nullptr;
294 }
295 
296 
297 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT
298  RegExpUnparser unparser(os, zone);
299  Accept(&unparser, nullptr);
300  return os;
301 }
302 
303 
304 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
305  : alternatives_(alternatives) {
306  DCHECK_LT(1, alternatives->length());
307  RegExpTree* first_alternative = alternatives->at(0);
308  min_match_ = first_alternative->min_match();
309  max_match_ = first_alternative->max_match();
310  for (int i = 1; i < alternatives->length(); i++) {
311  RegExpTree* alternative = alternatives->at(i);
312  min_match_ = Min(min_match_, alternative->min_match());
313  max_match_ = Max(max_match_, alternative->max_match());
314  }
315 }
316 
317 
318 static int IncreaseBy(int previous, int increase) {
319  if (RegExpTree::kInfinity - previous < increase) {
320  return RegExpTree::kInfinity;
321  } else {
322  return previous + increase;
323  }
324 }
325 
326 
327 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
328  : nodes_(nodes) {
329  DCHECK_LT(1, nodes->length());
330  min_match_ = 0;
331  max_match_ = 0;
332  for (int i = 0; i < nodes->length(); i++) {
333  RegExpTree* node = nodes->at(i);
334  int node_min_match = node->min_match();
335  min_match_ = IncreaseBy(min_match_, node_min_match);
336  int node_max_match = node->max_match();
337  max_match_ = IncreaseBy(max_match_, node_max_match);
338  }
339 }
340 
341 
342 } // namespace internal
343 } // namespace v8
Definition: libplatform.h:13