7 #ifdef V8_INTERPRETED_REGEXP 9 #include "src/regexp/interpreter-irregexp.h" 11 #include "src/ast/ast.h" 12 #include "src/objects-inl.h" 13 #include "src/regexp/bytecodes-irregexp.h" 14 #include "src/regexp/jsregexp.h" 15 #include "src/regexp/regexp-macro-assembler.h" 17 #include "src/utils.h" 19 #ifdef V8_INTL_SUPPORT 20 #include "unicode/uchar.h" 21 #endif // V8_INTL_SUPPORT 28 static bool BackRefMatchesNoCase(Isolate* isolate,
int from,
int current,
29 int len, Vector<const uc16> subject,
32 reinterpret_cast<Address
>(
const_cast<uc16*
>(&subject.at(from)));
34 reinterpret_cast<Address
>(
const_cast<uc16*
>(&subject.at(current)));
35 size_t length = len * kUC16Size;
36 return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
37 offset_a, offset_b, length, unicode ?
nullptr : isolate) == 1;
41 static bool BackRefMatchesNoCase(Isolate* isolate,
int from,
int current,
42 int len, Vector<const uint8_t> subject,
45 for (
int i = 0;
i < len;
i++) {
46 unsigned int old_char = subject[from++];
47 unsigned int new_char = subject[current++];
48 if (old_char == new_char)
continue;
52 if (old_char != new_char)
return false;
54 if (!(old_char -
'a' <=
'z' -
'a') &&
55 !(old_char - 224 <= 254 - 224 && old_char != 247)) {
64 static void TraceInterpreter(
const byte* code_base,
70 const char* bytecode_name) {
71 if (FLAG_trace_regexp_bytecodes) {
72 bool printable = (current_char < 127 && current_char >= 32);
75 "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
76 "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
82 printable ? current_char :
'.',
84 for (
int i = 0;
i < bytecode_length;
i++) {
85 printf(
", %02x", pc[
i]);
88 for (
int i = 1;
i < bytecode_length;
i++) {
89 unsigned char b = pc[
i];
90 if (b < 127 && b >= 32) {
101 #define BYTECODE(name) \ 103 TraceInterpreter(code_base, \ 105 static_cast<int>(backtrack_sp - backtrack_stack_base), \ 108 BC_##name##_LENGTH, \ 111 #define BYTECODE(name) \ 116 static int32_t Load32Aligned(
const byte* pc) {
117 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
118 return *
reinterpret_cast<const int32_t *
>(pc);
122 static int32_t Load16Aligned(
const byte* pc) {
123 DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
124 return *
reinterpret_cast<const uint16_t *
>(pc);
132 class BacktrackStack {
134 BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
140 int* data()
const {
return data_; }
142 int max_size()
const {
return kBacktrackStackSize; }
145 static const int kBacktrackStackSize = 10000;
149 DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
153 template <
typename Char>
154 static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
155 const byte* code_base,
156 Vector<const Char> subject,
160 const byte* pc = code_base;
164 BacktrackStack backtrack_stack;
165 int* backtrack_stack_base = backtrack_stack.data();
166 int* backtrack_sp = backtrack_stack_base;
167 int backtrack_stack_space = backtrack_stack.max_size();
169 if (FLAG_trace_regexp_bytecodes) {
170 PrintF(
"\n\nStart bytecode interpreter\n\n");
174 int32_t insn = Load32Aligned(pc);
175 switch (insn & BYTECODE_MASK) {
179 if (--backtrack_stack_space < 0) {
180 return RegExpImpl::RE_EXCEPTION;
182 *backtrack_sp++ = current;
183 pc += BC_PUSH_CP_LENGTH;
186 if (--backtrack_stack_space < 0) {
187 return RegExpImpl::RE_EXCEPTION;
189 *backtrack_sp++ = Load32Aligned(pc + 4);
190 pc += BC_PUSH_BT_LENGTH;
192 BYTECODE(PUSH_REGISTER)
193 if (--backtrack_stack_space < 0) {
194 return RegExpImpl::RE_EXCEPTION;
196 *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
197 pc += BC_PUSH_REGISTER_LENGTH;
199 BYTECODE(SET_REGISTER)
200 registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
201 pc += BC_SET_REGISTER_LENGTH;
203 BYTECODE(ADVANCE_REGISTER)
204 registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
205 pc += BC_ADVANCE_REGISTER_LENGTH;
207 BYTECODE(SET_REGISTER_TO_CP)
208 registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
209 pc += BC_SET_REGISTER_TO_CP_LENGTH;
211 BYTECODE(SET_CP_TO_REGISTER)
212 current = registers[insn >> BYTECODE_SHIFT];
213 pc += BC_SET_CP_TO_REGISTER_LENGTH;
215 BYTECODE(SET_REGISTER_TO_SP)
216 registers[insn >> BYTECODE_SHIFT] =
217 static_cast<int>(backtrack_sp - backtrack_stack_base);
218 pc += BC_SET_REGISTER_TO_SP_LENGTH;
220 BYTECODE(SET_SP_TO_REGISTER)
221 backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
222 backtrack_stack_space = backtrack_stack.max_size() -
223 static_cast<int>(backtrack_sp - backtrack_stack_base);
224 pc += BC_SET_SP_TO_REGISTER_LENGTH;
227 backtrack_stack_space++;
229 current = *backtrack_sp;
230 pc += BC_POP_CP_LENGTH;
233 backtrack_stack_space++;
235 pc = code_base + *backtrack_sp;
237 BYTECODE(POP_REGISTER)
238 backtrack_stack_space++;
240 registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
241 pc += BC_POP_REGISTER_LENGTH;
244 return RegExpImpl::RE_FAILURE;
246 return RegExpImpl::RE_SUCCESS;
248 current += insn >> BYTECODE_SHIFT;
249 pc += BC_ADVANCE_CP_LENGTH;
252 pc = code_base + Load32Aligned(pc + 4);
254 BYTECODE(ADVANCE_CP_AND_GOTO)
255 current += insn >> BYTECODE_SHIFT;
256 pc = code_base + Load32Aligned(pc + 4);
258 BYTECODE(CHECK_GREEDY)
259 if (current == backtrack_sp[-1]) {
261 backtrack_stack_space++;
262 pc = code_base + Load32Aligned(pc + 4);
264 pc += BC_CHECK_GREEDY_LENGTH;
267 BYTECODE(LOAD_CURRENT_CHAR) {
268 int pos = current + (insn >> BYTECODE_SHIFT);
269 if (pos >= subject.length() || pos < 0) {
270 pc = code_base + Load32Aligned(pc + 4);
272 current_char = subject[pos];
273 pc += BC_LOAD_CURRENT_CHAR_LENGTH;
277 BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
278 int pos = current + (insn >> BYTECODE_SHIFT);
279 current_char = subject[pos];
280 pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
283 BYTECODE(LOAD_2_CURRENT_CHARS) {
284 int pos = current + (insn >> BYTECODE_SHIFT);
285 if (pos + 2 > subject.length() || pos < 0) {
286 pc = code_base + Load32Aligned(pc + 4);
288 Char next = subject[pos + 1];
290 (subject[pos] | (next << (kBitsPerByte *
sizeof(Char))));
291 pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
295 BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
296 int pos = current + (insn >> BYTECODE_SHIFT);
297 Char next = subject[pos + 1];
298 current_char = (subject[pos] | (next << (kBitsPerByte *
sizeof(Char))));
299 pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
302 BYTECODE(LOAD_4_CURRENT_CHARS) {
303 DCHECK_EQ(1,
sizeof(Char));
304 int pos = current + (insn >> BYTECODE_SHIFT);
305 if (pos + 4 > subject.length() || pos < 0) {
306 pc = code_base + Load32Aligned(pc + 4);
308 Char next1 = subject[pos + 1];
309 Char next2 = subject[pos + 2];
310 Char next3 = subject[pos + 3];
311 current_char = (subject[pos] |
315 pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
319 BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
320 DCHECK_EQ(1,
sizeof(Char));
321 int pos = current + (insn >> BYTECODE_SHIFT);
322 Char next1 = subject[pos + 1];
323 Char next2 = subject[pos + 2];
324 Char next3 = subject[pos + 3];
325 current_char = (subject[pos] |
329 pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
332 BYTECODE(CHECK_4_CHARS) {
334 if (c == current_char) {
335 pc = code_base + Load32Aligned(pc + 8);
337 pc += BC_CHECK_4_CHARS_LENGTH;
341 BYTECODE(CHECK_CHAR) {
342 uint32_t c = (insn >> BYTECODE_SHIFT);
343 if (c == current_char) {
344 pc = code_base + Load32Aligned(pc + 4);
346 pc += BC_CHECK_CHAR_LENGTH;
350 BYTECODE(CHECK_NOT_4_CHARS) {
352 if (c != current_char) {
353 pc = code_base + Load32Aligned(pc + 8);
355 pc += BC_CHECK_NOT_4_CHARS_LENGTH;
359 BYTECODE(CHECK_NOT_CHAR) {
360 uint32_t c = (insn >> BYTECODE_SHIFT);
361 if (c != current_char) {
362 pc = code_base + Load32Aligned(pc + 4);
364 pc += BC_CHECK_NOT_CHAR_LENGTH;
368 BYTECODE(AND_CHECK_4_CHARS) {
370 if (c == (current_char & Load32Aligned(pc + 8))) {
371 pc = code_base + Load32Aligned(pc + 12);
373 pc += BC_AND_CHECK_4_CHARS_LENGTH;
377 BYTECODE(AND_CHECK_CHAR) {
378 uint32_t c = (insn >> BYTECODE_SHIFT);
379 if (c == (current_char & Load32Aligned(pc + 4))) {
380 pc = code_base + Load32Aligned(pc + 8);
382 pc += BC_AND_CHECK_CHAR_LENGTH;
386 BYTECODE(AND_CHECK_NOT_4_CHARS) {
388 if (c != (current_char & Load32Aligned(pc + 8))) {
389 pc = code_base + Load32Aligned(pc + 12);
391 pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
395 BYTECODE(AND_CHECK_NOT_CHAR) {
396 uint32_t c = (insn >> BYTECODE_SHIFT);
397 if (c != (current_char & Load32Aligned(pc + 4))) {
398 pc = code_base + Load32Aligned(pc + 8);
400 pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
404 BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
405 uint32_t c = (insn >> BYTECODE_SHIFT);
406 uint32_t minus = Load16Aligned(pc + 4);
407 uint32_t mask = Load16Aligned(pc + 6);
408 if (c != ((current_char - minus) & mask)) {
409 pc = code_base + Load32Aligned(pc + 8);
411 pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
415 BYTECODE(CHECK_CHAR_IN_RANGE) {
416 uint32_t from = Load16Aligned(pc + 4);
417 uint32_t to = Load16Aligned(pc + 6);
418 if (from <= current_char && current_char <= to) {
419 pc = code_base + Load32Aligned(pc + 8);
421 pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
425 BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
426 uint32_t from = Load16Aligned(pc + 4);
427 uint32_t to = Load16Aligned(pc + 6);
428 if (from > current_char || current_char > to) {
429 pc = code_base + Load32Aligned(pc + 8);
431 pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
435 BYTECODE(CHECK_BIT_IN_TABLE) {
436 int mask = RegExpMacroAssembler::kTableMask;
437 byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
438 int bit = (current_char & (kBitsPerByte - 1));
439 if ((b & (1 << bit)) != 0) {
440 pc = code_base + Load32Aligned(pc + 4);
442 pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
447 uint32_t limit = (insn >> BYTECODE_SHIFT);
448 if (current_char < limit) {
449 pc = code_base + Load32Aligned(pc + 4);
451 pc += BC_CHECK_LT_LENGTH;
456 uint32_t limit = (insn >> BYTECODE_SHIFT);
457 if (current_char > limit) {
458 pc = code_base + Load32Aligned(pc + 4);
460 pc += BC_CHECK_GT_LENGTH;
464 BYTECODE(CHECK_REGISTER_LT)
465 if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
466 pc = code_base + Load32Aligned(pc + 8);
468 pc += BC_CHECK_REGISTER_LT_LENGTH;
471 BYTECODE(CHECK_REGISTER_GE)
472 if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
473 pc = code_base + Load32Aligned(pc + 8);
475 pc += BC_CHECK_REGISTER_GE_LENGTH;
478 BYTECODE(CHECK_REGISTER_EQ_POS)
479 if (registers[insn >> BYTECODE_SHIFT] == current) {
480 pc = code_base + Load32Aligned(pc + 4);
482 pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
485 BYTECODE(CHECK_NOT_REGS_EQUAL)
486 if (registers[insn >> BYTECODE_SHIFT] ==
487 registers[Load32Aligned(pc + 4)]) {
488 pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
490 pc = code_base + Load32Aligned(pc + 8);
493 BYTECODE(CHECK_NOT_BACK_REF) {
494 int from = registers[insn >> BYTECODE_SHIFT];
495 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
496 if (from >= 0 && len > 0) {
497 if (current + len > subject.length() ||
498 CompareChars(&subject[from], &subject[current], len) != 0) {
499 pc = code_base + Load32Aligned(pc + 4);
504 pc += BC_CHECK_NOT_BACK_REF_LENGTH;
507 BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
508 int from = registers[insn >> BYTECODE_SHIFT];
509 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
510 if (from >= 0 && len > 0) {
511 if (current - len < 0 ||
512 CompareChars(&subject[from], &subject[current - len], len) != 0) {
513 pc = code_base + Load32Aligned(pc + 4);
518 pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
521 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
523 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
525 (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
526 int from = registers[insn >> BYTECODE_SHIFT];
527 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
528 if (from >= 0 && len > 0) {
529 if (current + len > subject.length() ||
530 !BackRefMatchesNoCase(isolate, from, current, len, subject,
532 pc = code_base + Load32Aligned(pc + 4);
537 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
540 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
542 BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
543 bool unicode = (insn & BYTECODE_MASK) ==
544 BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
545 int from = registers[insn >> BYTECODE_SHIFT];
546 int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
547 if (from >= 0 && len > 0) {
548 if (current - len < 0 ||
549 !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
551 pc = code_base + Load32Aligned(pc + 4);
556 pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
559 BYTECODE(CHECK_AT_START)
561 pc = code_base + Load32Aligned(pc + 4);
563 pc += BC_CHECK_AT_START_LENGTH;
566 BYTECODE(CHECK_NOT_AT_START)
567 if (current + (insn >> BYTECODE_SHIFT) == 0) {
568 pc += BC_CHECK_NOT_AT_START_LENGTH;
570 pc = code_base + Load32Aligned(pc + 4);
573 BYTECODE(SET_CURRENT_POSITION_FROM_END) {
574 int by =
static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
575 if (subject.length() - current > by) {
576 current = subject.length() - by;
577 current_char = subject[current - 1];
579 pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
590 RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
592 Handle<ByteArray> code_array,
593 Handle<String> subject,
595 int start_position) {
596 DCHECK(subject->IsFlat());
598 DisallowHeapAllocation no_gc;
599 const byte* code_base = code_array->GetDataStartAddress();
600 uc16 previous_char =
'\n';
601 String::FlatContent subject_content = subject->GetFlatContent();
602 if (subject_content.IsOneByte()) {
603 Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
604 if (start_position != 0) previous_char = subject_vector[start_position - 1];
605 return RawMatch(isolate,
612 DCHECK(subject_content.IsTwoByte());
613 Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
614 if (start_position != 0) previous_char = subject_vector[start_position - 1];
615 return RawMatch(isolate,
627 #endif // V8_INTERPRETED_REGEXP