V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
regexp-macro-assembler-ia32.cc
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_IA32
6 
7 #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
8 
9 #include "src/assembler-inl.h"
10 #include "src/log.h"
11 #include "src/macro-assembler.h"
12 #include "src/objects-inl.h"
13 #include "src/regexp/regexp-macro-assembler.h"
14 #include "src/regexp/regexp-stack.h"
15 #include "src/unicode.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 #ifndef V8_INTERPRETED_REGEXP
21 /*
22  * This assembler uses the following register assignment convention
23  * - edx : Current character. Must be loaded using LoadCurrentCharacter
24  * before using any of the dispatch methods. Temporarily stores the
25  * index of capture start after a matching pass for a global regexp.
26  * - edi : Current position in input, as negative offset from end of string.
27  * Please notice that this is the byte offset, not the character offset!
28  * - esi : end of input (points to byte after last character in input).
29  * - ebp : Frame pointer. Used to access arguments, local variables and
30  * RegExp registers.
31  * - esp : Points to tip of C stack.
32  * - ecx : Points to tip of backtrack stack
33  *
34  * The registers eax and ebx are free to use for computations.
35  *
36  * Each call to a public method should retain this convention.
37  * The stack will have the following structure:
38  * - Isolate* isolate (address of the current isolate)
39  * - direct_call (if 1, direct call from JavaScript code, if 0
40  * call through the runtime system)
41  * - stack_area_base (high end of the memory area to use as
42  * backtracking stack)
43  * - capture array size (may fit multiple sets of matches)
44  * - int* capture_array (int[num_saved_registers_], for output).
45  * - end of input (address of end of string)
46  * - start of input (address of first character in string)
47  * - start index (character index of start)
48  * - String input_string (location of a handle containing the string)
49  * --- frame alignment (if applicable) ---
50  * - return address
51  * ebp-> - old ebp
52  * - backup of caller esi
53  * - backup of caller edi
54  * - backup of caller ebx
55  * - success counter (only for global regexps to count matches).
56  * - Offset of location before start of input (effectively character
57  * string start - 1). Used to initialize capture registers to a
58  * non-position.
59  * - register 0 ebp[-4] (only positions must be stored in the first
60  * - register 1 ebp[-8] num_saved_registers_ registers)
61  * - ...
62  *
63  * The first num_saved_registers_ registers are initialized to point to
64  * "character -1" in the string (i.e., char_size() bytes before the first
65  * character of the string). The remaining registers starts out as garbage.
66  *
67  * The data up to the return address must be placed there by the calling
68  * code, by calling the code entry as cast to a function with the signature:
69  * int (*match)(String input_string,
70  * int start_index,
71  * Address start,
72  * Address end,
73  * int* capture_output_array,
74  * int num_capture_registers,
75  * byte* stack_area_base,
76  * bool direct_call = false,
77  * Isolate* isolate);
78  */
79 
80 #define __ ACCESS_MASM(masm_)
81 
82 RegExpMacroAssemblerIA32::RegExpMacroAssemblerIA32(Isolate* isolate, Zone* zone,
83  Mode mode,
84  int registers_to_save)
85  : NativeRegExpMacroAssembler(isolate, zone),
86  masm_(new MacroAssembler(isolate, nullptr, kRegExpCodeSize,
87  CodeObjectRequired::kYes)),
88  mode_(mode),
89  num_registers_(registers_to_save),
90  num_saved_registers_(registers_to_save),
91  entry_label_(),
92  start_label_(),
93  success_label_(),
94  backtrack_label_(),
95  exit_label_() {
96  // Irregexp code clobbers ebx and spills/restores it at all boundaries.
97  masm_->set_root_array_available(false);
98 
99  DCHECK_EQ(0, registers_to_save % 2);
100  __ jmp(&entry_label_); // We'll write the entry code later.
101  __ bind(&start_label_); // And then continue from here.
102 }
103 
104 
105 RegExpMacroAssemblerIA32::~RegExpMacroAssemblerIA32() {
106  delete masm_;
107  // Unuse labels in case we throw away the assembler without calling GetCode.
108  entry_label_.Unuse();
109  start_label_.Unuse();
110  success_label_.Unuse();
111  backtrack_label_.Unuse();
112  exit_label_.Unuse();
113  check_preempt_label_.Unuse();
114  stack_overflow_label_.Unuse();
115 }
116 
117 
118 int RegExpMacroAssemblerIA32::stack_limit_slack() {
119  return RegExpStack::kStackLimitSlack;
120 }
121 
122 
123 void RegExpMacroAssemblerIA32::AdvanceCurrentPosition(int by) {
124  if (by != 0) {
125  __ add(edi, Immediate(by * char_size()));
126  }
127 }
128 
129 
130 void RegExpMacroAssemblerIA32::AdvanceRegister(int reg, int by) {
131  DCHECK_LE(0, reg);
132  DCHECK_GT(num_registers_, reg);
133  if (by != 0) {
134  __ add(register_location(reg), Immediate(by));
135  }
136 }
137 
138 
139 void RegExpMacroAssemblerIA32::Backtrack() {
140  CheckPreemption();
141  // Pop Code offset from backtrack stack, add Code and jump to location.
142  Pop(ebx);
143  __ add(ebx, Immediate(masm_->CodeObject()));
144  __ jmp(ebx);
145 }
146 
147 
148 void RegExpMacroAssemblerIA32::Bind(Label* label) {
149  __ bind(label);
150 }
151 
152 
153 void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
154  __ cmp(current_character(), c);
155  BranchOrBacktrack(equal, on_equal);
156 }
157 
158 
159 void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
160  __ cmp(current_character(), limit);
161  BranchOrBacktrack(greater, on_greater);
162 }
163 
164 
165 void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
166  __ lea(eax, Operand(edi, -char_size()));
167  __ cmp(eax, Operand(ebp, kStringStartMinusOne));
168  BranchOrBacktrack(equal, on_at_start);
169 }
170 
171 
172 void RegExpMacroAssemblerIA32::CheckNotAtStart(int cp_offset,
173  Label* on_not_at_start) {
174  __ lea(eax, Operand(edi, -char_size() + cp_offset * char_size()));
175  __ cmp(eax, Operand(ebp, kStringStartMinusOne));
176  BranchOrBacktrack(not_equal, on_not_at_start);
177 }
178 
179 
180 void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
181  __ cmp(current_character(), limit);
182  BranchOrBacktrack(less, on_less);
183 }
184 
185 
186 void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
187  Label fallthrough;
188  __ cmp(edi, Operand(backtrack_stackpointer(), 0));
189  __ j(not_equal, &fallthrough);
190  __ add(backtrack_stackpointer(), Immediate(kPointerSize)); // Pop.
191  BranchOrBacktrack(no_condition, on_equal);
192  __ bind(&fallthrough);
193 }
194 
195 
196 void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
197  int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
198  Label fallthrough;
199  __ mov(edx, register_location(start_reg)); // Index of start of capture
200  __ mov(ebx, register_location(start_reg + 1)); // Index of end of capture
201  __ sub(ebx, edx); // Length of capture.
202 
203  // At this point, the capture registers are either both set or both cleared.
204  // If the capture length is zero, then the capture is either empty or cleared.
205  // Fall through in both cases.
206  __ j(equal, &fallthrough);
207 
208  // Check that there are sufficient characters left in the input.
209  if (read_backward) {
210  __ mov(eax, Operand(ebp, kStringStartMinusOne));
211  __ add(eax, ebx);
212  __ cmp(edi, eax);
213  BranchOrBacktrack(less_equal, on_no_match);
214  } else {
215  __ mov(eax, edi);
216  __ add(eax, ebx);
217  BranchOrBacktrack(greater, on_no_match);
218  }
219 
220  if (mode_ == LATIN1) {
221  Label success;
222  Label fail;
223  Label loop_increment;
224  // Save register contents to make the registers available below.
225  __ push(edi);
226  __ push(backtrack_stackpointer());
227  // After this, the eax, ecx, and edi registers are available.
228 
229  __ add(edx, esi); // Start of capture
230  __ add(edi, esi); // Start of text to match against capture.
231  if (read_backward) {
232  __ sub(edi, ebx); // Offset by length when matching backwards.
233  }
234  __ add(ebx, edi); // End of text to match against capture.
235 
236  Label loop;
237  __ bind(&loop);
238  __ movzx_b(eax, Operand(edi, 0));
239  __ cmpb_al(Operand(edx, 0));
240  __ j(equal, &loop_increment);
241 
242  // Mismatch, try case-insensitive match (converting letters to lower-case).
243  __ or_(eax, 0x20); // Convert match character to lower-case.
244  __ lea(ecx, Operand(eax, -'a'));
245  __ cmp(ecx, static_cast<int32_t>('z' - 'a')); // Is eax a lowercase letter?
246  Label convert_capture;
247  __ j(below_equal, &convert_capture); // In range 'a'-'z'.
248  // Latin-1: Check for values in range [224,254] but not 247.
249  __ sub(ecx, Immediate(224 - 'a'));
250  __ cmp(ecx, Immediate(254 - 224));
251  __ j(above, &fail); // Weren't Latin-1 letters.
252  __ cmp(ecx, Immediate(247 - 224)); // Check for 247.
253  __ j(equal, &fail);
254  __ bind(&convert_capture);
255  // Also convert capture character.
256  __ movzx_b(ecx, Operand(edx, 0));
257  __ or_(ecx, 0x20);
258 
259  __ cmp(eax, ecx);
260  __ j(not_equal, &fail);
261 
262  __ bind(&loop_increment);
263  // Increment pointers into match and capture strings.
264  __ add(edx, Immediate(1));
265  __ add(edi, Immediate(1));
266  // Compare to end of match, and loop if not done.
267  __ cmp(edi, ebx);
268  __ j(below, &loop);
269  __ jmp(&success);
270 
271  __ bind(&fail);
272  // Restore original values before failing.
273  __ pop(backtrack_stackpointer());
274  __ pop(edi);
275  BranchOrBacktrack(no_condition, on_no_match);
276 
277  __ bind(&success);
278  // Restore original value before continuing.
279  __ pop(backtrack_stackpointer());
280  // Drop original value of character position.
281  __ add(esp, Immediate(kPointerSize));
282  // Compute new value of character position after the matched part.
283  __ sub(edi, esi);
284  if (read_backward) {
285  // Subtract match length if we matched backward.
286  __ add(edi, register_location(start_reg));
287  __ sub(edi, register_location(start_reg + 1));
288  }
289  } else {
290  DCHECK(mode_ == UC16);
291  // Save registers before calling C function.
292  __ push(esi);
293  __ push(edi);
294  __ push(backtrack_stackpointer());
295  __ push(ebx);
296 
297  static const int argument_count = 4;
298  __ PrepareCallCFunction(argument_count, ecx);
299  // Put arguments into allocated stack area, last argument highest on stack.
300  // Parameters are
301  // Address byte_offset1 - Address captured substring's start.
302  // Address byte_offset2 - Address of current character position.
303  // size_t byte_length - length of capture in bytes(!)
304 // Isolate* isolate or 0 if unicode flag.
305 
306  // Set isolate.
307 #ifdef V8_INTL_SUPPORT
308  if (unicode) {
309  __ mov(Operand(esp, 3 * kPointerSize), Immediate(0));
310  } else // NOLINT
311 #endif // V8_INTL_SUPPORT
312  {
313  __ mov(Operand(esp, 3 * kPointerSize),
314  Immediate(ExternalReference::isolate_address(isolate())));
315  }
316  // Set byte_length.
317  __ mov(Operand(esp, 2 * kPointerSize), ebx);
318  // Set byte_offset2.
319  // Found by adding negative string-end offset of current position (edi)
320  // to end of string.
321  __ add(edi, esi);
322  if (read_backward) {
323  __ sub(edi, ebx); // Offset by length when matching backwards.
324  }
325  __ mov(Operand(esp, 1 * kPointerSize), edi);
326  // Set byte_offset1.
327  // Start of capture, where edx already holds string-end negative offset.
328  __ add(edx, esi);
329  __ mov(Operand(esp, 0 * kPointerSize), edx);
330 
331  {
332  AllowExternalCallThatCantCauseGC scope(masm_);
333  ExternalReference compare =
334  ExternalReference::re_case_insensitive_compare_uc16(isolate());
335  __ CallCFunction(compare, argument_count);
336  }
337  // Pop original values before reacting on result value.
338  __ pop(ebx);
339  __ pop(backtrack_stackpointer());
340  __ pop(edi);
341  __ pop(esi);
342 
343  // Check if function returned non-zero for success or zero for failure.
344  __ or_(eax, eax);
345  BranchOrBacktrack(zero, on_no_match);
346  // On success, advance position by length of capture.
347  if (read_backward) {
348  __ sub(edi, ebx);
349  } else {
350  __ add(edi, ebx);
351  }
352  }
353  __ bind(&fallthrough);
354 }
355 
356 
357 void RegExpMacroAssemblerIA32::CheckNotBackReference(int start_reg,
358  bool read_backward,
359  Label* on_no_match) {
360  Label fallthrough;
361  Label success;
362  Label fail;
363 
364  // Find length of back-referenced capture.
365  __ mov(edx, register_location(start_reg));
366  __ mov(eax, register_location(start_reg + 1));
367  __ sub(eax, edx); // Length to check.
368 
369  // At this point, the capture registers are either both set or both cleared.
370  // If the capture length is zero, then the capture is either empty or cleared.
371  // Fall through in both cases.
372  __ j(equal, &fallthrough);
373 
374  // Check that there are sufficient characters left in the input.
375  if (read_backward) {
376  __ mov(ebx, Operand(ebp, kStringStartMinusOne));
377  __ add(ebx, eax);
378  __ cmp(edi, ebx);
379  BranchOrBacktrack(less_equal, on_no_match);
380  } else {
381  __ mov(ebx, edi);
382  __ add(ebx, eax);
383  BranchOrBacktrack(greater, on_no_match);
384  }
385 
386  // Save register to make it available below.
387  __ push(backtrack_stackpointer());
388 
389  // Compute pointers to match string and capture string
390  __ add(edx, esi); // Start of capture.
391  __ lea(ebx, Operand(esi, edi, times_1, 0)); // Start of match.
392  if (read_backward) {
393  __ sub(ebx, eax); // Offset by length when matching backwards.
394  }
395  __ lea(ecx, Operand(eax, ebx, times_1, 0)); // End of match
396 
397  Label loop;
398  __ bind(&loop);
399  if (mode_ == LATIN1) {
400  __ movzx_b(eax, Operand(edx, 0));
401  __ cmpb_al(Operand(ebx, 0));
402  } else {
403  DCHECK(mode_ == UC16);
404  __ movzx_w(eax, Operand(edx, 0));
405  __ cmpw_ax(Operand(ebx, 0));
406  }
407  __ j(not_equal, &fail);
408  // Increment pointers into capture and match string.
409  __ add(edx, Immediate(char_size()));
410  __ add(ebx, Immediate(char_size()));
411  // Check if we have reached end of match area.
412  __ cmp(ebx, ecx);
413  __ j(below, &loop);
414  __ jmp(&success);
415 
416  __ bind(&fail);
417  // Restore backtrack stackpointer.
418  __ pop(backtrack_stackpointer());
419  BranchOrBacktrack(no_condition, on_no_match);
420 
421  __ bind(&success);
422  // Move current character position to position after match.
423  __ mov(edi, ecx);
424  __ sub(edi, esi);
425  if (read_backward) {
426  // Subtract match length if we matched backward.
427  __ add(edi, register_location(start_reg));
428  __ sub(edi, register_location(start_reg + 1));
429  }
430  // Restore backtrack stackpointer.
431  __ pop(backtrack_stackpointer());
432 
433  __ bind(&fallthrough);
434 }
435 
436 
437 void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
438  Label* on_not_equal) {
439  __ cmp(current_character(), c);
440  BranchOrBacktrack(not_equal, on_not_equal);
441 }
442 
443 
444 void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
445  uint32_t mask,
446  Label* on_equal) {
447  if (c == 0) {
448  __ test(current_character(), Immediate(mask));
449  } else {
450  __ mov(eax, mask);
451  __ and_(eax, current_character());
452  __ cmp(eax, c);
453  }
454  BranchOrBacktrack(equal, on_equal);
455 }
456 
457 
458 void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
459  uint32_t mask,
460  Label* on_not_equal) {
461  if (c == 0) {
462  __ test(current_character(), Immediate(mask));
463  } else {
464  __ mov(eax, mask);
465  __ and_(eax, current_character());
466  __ cmp(eax, c);
467  }
468  BranchOrBacktrack(not_equal, on_not_equal);
469 }
470 
471 
472 void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
473  uc16 c,
474  uc16 minus,
475  uc16 mask,
476  Label* on_not_equal) {
477  DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
478  __ lea(eax, Operand(current_character(), -minus));
479  if (c == 0) {
480  __ test(eax, Immediate(mask));
481  } else {
482  __ and_(eax, mask);
483  __ cmp(eax, c);
484  }
485  BranchOrBacktrack(not_equal, on_not_equal);
486 }
487 
488 
489 void RegExpMacroAssemblerIA32::CheckCharacterInRange(
490  uc16 from,
491  uc16 to,
492  Label* on_in_range) {
493  __ lea(eax, Operand(current_character(), -from));
494  __ cmp(eax, to - from);
495  BranchOrBacktrack(below_equal, on_in_range);
496 }
497 
498 
499 void RegExpMacroAssemblerIA32::CheckCharacterNotInRange(
500  uc16 from,
501  uc16 to,
502  Label* on_not_in_range) {
503  __ lea(eax, Operand(current_character(), -from));
504  __ cmp(eax, to - from);
505  BranchOrBacktrack(above, on_not_in_range);
506 }
507 
508 
509 void RegExpMacroAssemblerIA32::CheckBitInTable(
510  Handle<ByteArray> table,
511  Label* on_bit_set) {
512  __ mov(eax, Immediate(table));
513  Register index = current_character();
514  if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
515  __ mov(ebx, kTableSize - 1);
516  __ and_(ebx, current_character());
517  index = ebx;
518  }
519  __ cmpb(FieldOperand(eax, index, times_1, ByteArray::kHeaderSize),
520  Immediate(0));
521  BranchOrBacktrack(not_equal, on_bit_set);
522 }
523 
524 
525 bool RegExpMacroAssemblerIA32::CheckSpecialCharacterClass(uc16 type,
526  Label* on_no_match) {
527  // Range checks (c in min..max) are generally implemented by an unsigned
528  // (c - min) <= (max - min) check
529  switch (type) {
530  case 's':
531  // Match space-characters
532  if (mode_ == LATIN1) {
533  // One byte space characters are '\t'..'\r', ' ' and \u00a0.
534  Label success;
535  __ cmp(current_character(), ' ');
536  __ j(equal, &success, Label::kNear);
537  // Check range 0x09..0x0D
538  __ lea(eax, Operand(current_character(), -'\t'));
539  __ cmp(eax, '\r' - '\t');
540  __ j(below_equal, &success, Label::kNear);
541  // \u00a0 (NBSP).
542  __ cmp(eax, 0x00A0 - '\t');
543  BranchOrBacktrack(not_equal, on_no_match);
544  __ bind(&success);
545  return true;
546  }
547  return false;
548  case 'S':
549  // The emitted code for generic character classes is good enough.
550  return false;
551  case 'd':
552  // Match ASCII digits ('0'..'9')
553  __ lea(eax, Operand(current_character(), -'0'));
554  __ cmp(eax, '9' - '0');
555  BranchOrBacktrack(above, on_no_match);
556  return true;
557  case 'D':
558  // Match non ASCII-digits
559  __ lea(eax, Operand(current_character(), -'0'));
560  __ cmp(eax, '9' - '0');
561  BranchOrBacktrack(below_equal, on_no_match);
562  return true;
563  case '.': {
564  // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
565  __ mov(eax, current_character());
566  __ xor_(eax, Immediate(0x01));
567  // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
568  __ sub(eax, Immediate(0x0B));
569  __ cmp(eax, 0x0C - 0x0B);
570  BranchOrBacktrack(below_equal, on_no_match);
571  if (mode_ == UC16) {
572  // Compare original value to 0x2028 and 0x2029, using the already
573  // computed (current_char ^ 0x01 - 0x0B). I.e., check for
574  // 0x201D (0x2028 - 0x0B) or 0x201E.
575  __ sub(eax, Immediate(0x2028 - 0x0B));
576  __ cmp(eax, 0x2029 - 0x2028);
577  BranchOrBacktrack(below_equal, on_no_match);
578  }
579  return true;
580  }
581  case 'w': {
582  if (mode_ != LATIN1) {
583  // Table is 256 entries, so all Latin1 characters can be tested.
584  __ cmp(current_character(), Immediate('z'));
585  BranchOrBacktrack(above, on_no_match);
586  }
587  DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
588  ExternalReference word_map =
589  ExternalReference::re_word_character_map(isolate());
590  __ test_b(current_character(),
591  Operand(current_character(), times_1, word_map.address(),
592  RelocInfo::EXTERNAL_REFERENCE));
593  BranchOrBacktrack(zero, on_no_match);
594  return true;
595  }
596  case 'W': {
597  Label done;
598  if (mode_ != LATIN1) {
599  // Table is 256 entries, so all Latin1 characters can be tested.
600  __ cmp(current_character(), Immediate('z'));
601  __ j(above, &done);
602  }
603  DCHECK_EQ(0, word_character_map[0]); // Character '\0' is not a word char.
604  ExternalReference word_map =
605  ExternalReference::re_word_character_map(isolate());
606  __ test_b(current_character(),
607  Operand(current_character(), times_1, word_map.address(),
608  RelocInfo::EXTERNAL_REFERENCE));
609  BranchOrBacktrack(not_zero, on_no_match);
610  if (mode_ != LATIN1) {
611  __ bind(&done);
612  }
613  return true;
614  }
615  // Non-standard classes (with no syntactic shorthand) used internally.
616  case '*':
617  // Match any character.
618  return true;
619  case 'n': {
620  // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 or 0x2029).
621  // The opposite of '.'.
622  __ mov(eax, current_character());
623  __ xor_(eax, Immediate(0x01));
624  // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C
625  __ sub(eax, Immediate(0x0B));
626  __ cmp(eax, 0x0C - 0x0B);
627  if (mode_ == LATIN1) {
628  BranchOrBacktrack(above, on_no_match);
629  } else {
630  Label done;
631  BranchOrBacktrack(below_equal, &done);
632  DCHECK_EQ(UC16, mode_);
633  // Compare original value to 0x2028 and 0x2029, using the already
634  // computed (current_char ^ 0x01 - 0x0B). I.e., check for
635  // 0x201D (0x2028 - 0x0B) or 0x201E.
636  __ sub(eax, Immediate(0x2028 - 0x0B));
637  __ cmp(eax, 1);
638  BranchOrBacktrack(above, on_no_match);
639  __ bind(&done);
640  }
641  return true;
642  }
643  // No custom implementation (yet): s(UC16), S(UC16).
644  default:
645  return false;
646  }
647 }
648 
649 
650 void RegExpMacroAssemblerIA32::Fail() {
651  STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
652  if (!global()) {
653  __ Move(eax, Immediate(FAILURE));
654  }
655  __ jmp(&exit_label_);
656 }
657 
658 
659 Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
660  Label return_eax;
661  // Finalize code - write the entry point code now we know how many
662  // registers we need.
663 
664  // Entry code:
665  __ bind(&entry_label_);
666 
667  // Tell the system that we have a stack frame. Because the type is MANUAL, no
668  // code is generated.
669  FrameScope scope(masm_, StackFrame::MANUAL);
670 
671  // Actually emit code to start a new stack frame.
672  __ push(ebp);
673  __ mov(ebp, esp);
674  // Save callee-save registers. Order here should correspond to order of
675  // kBackup_ebx etc.
676  __ push(esi);
677  __ push(edi);
678  __ push(ebx); // Callee-save on MacOS.
679  __ push(Immediate(0)); // Number of successful matches in a global regexp.
680  __ push(Immediate(0)); // Make room for "string start - 1" constant.
681 
682  // Check if we have space on the stack for registers.
683  Label stack_limit_hit;
684  Label stack_ok;
685 
686  ExternalReference stack_limit =
687  ExternalReference::address_of_stack_limit(isolate());
688  __ mov(ecx, esp);
689  __ sub(ecx, StaticVariable(stack_limit));
690  // Handle it if the stack pointer is already below the stack limit.
691  __ j(below_equal, &stack_limit_hit);
692  // Check if there is room for the variable number of registers above
693  // the stack limit.
694  __ cmp(ecx, num_registers_ * kPointerSize);
695  __ j(above_equal, &stack_ok);
696  // Exit with OutOfMemory exception. There is not enough space on the stack
697  // for our working registers.
698  __ mov(eax, EXCEPTION);
699  __ jmp(&return_eax);
700 
701  __ bind(&stack_limit_hit);
702  CallCheckStackGuardState(ebx);
703  __ or_(eax, eax);
704  // If returned value is non-zero, we exit with the returned value as result.
705  __ j(not_zero, &return_eax);
706 
707  __ bind(&stack_ok);
708  // Load start index for later use.
709  __ mov(ebx, Operand(ebp, kStartIndex));
710 
711  // Allocate space on stack for registers.
712  __ sub(esp, Immediate(num_registers_ * kPointerSize));
713  // Load string length.
714  __ mov(esi, Operand(ebp, kInputEnd));
715  // Load input position.
716  __ mov(edi, Operand(ebp, kInputStart));
717  // Set up edi to be negative offset from string end.
718  __ sub(edi, esi);
719 
720  // Set eax to address of char before start of the string.
721  // (effectively string position -1).
722  __ neg(ebx);
723  if (mode_ == UC16) {
724  __ lea(eax, Operand(edi, ebx, times_2, -char_size()));
725  } else {
726  __ lea(eax, Operand(edi, ebx, times_1, -char_size()));
727  }
728  // Store this value in a local variable, for use when clearing
729  // position registers.
730  __ mov(Operand(ebp, kStringStartMinusOne), eax);
731 
732 #if V8_OS_WIN
733  // Ensure that we write to each stack page, in order. Skipping a page
734  // on Windows can cause segmentation faults. Assuming page size is 4k.
735  const int kPageSize = 4096;
736  const int kRegistersPerPage = kPageSize / kPointerSize;
737  for (int i = num_saved_registers_ + kRegistersPerPage - 1;
738  i < num_registers_;
739  i += kRegistersPerPage) {
740  __ mov(register_location(i), eax); // One write every page.
741  }
742 #endif // V8_OS_WIN
743 
744  Label load_char_start_regexp, start_regexp;
745  // Load newline if index is at start, previous character otherwise.
746  __ cmp(Operand(ebp, kStartIndex), Immediate(0));
747  __ j(not_equal, &load_char_start_regexp, Label::kNear);
748  __ mov(current_character(), '\n');
749  __ jmp(&start_regexp, Label::kNear);
750 
751  // Global regexp restarts matching here.
752  __ bind(&load_char_start_regexp);
753  // Load previous char as initial value of current character register.
754  LoadCurrentCharacterUnchecked(-1, 1);
755  __ bind(&start_regexp);
756 
757  // Initialize on-stack registers.
758  if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
759  // Fill saved registers with initial value = start offset - 1
760  // Fill in stack push order, to avoid accessing across an unwritten
761  // page (a problem on Windows).
762  if (num_saved_registers_ > 8) {
763  __ mov(ecx, kRegisterZero);
764  Label init_loop;
765  __ bind(&init_loop);
766  __ mov(Operand(ebp, ecx, times_1, 0), eax);
767  __ sub(ecx, Immediate(kPointerSize));
768  __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
769  __ j(greater, &init_loop);
770  } else { // Unroll the loop.
771  for (int i = 0; i < num_saved_registers_; i++) {
772  __ mov(register_location(i), eax);
773  }
774  }
775  }
776 
777  // Initialize backtrack stack pointer.
778  __ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
779 
780  __ jmp(&start_label_);
781 
782  // Exit code:
783  if (success_label_.is_linked()) {
784  // Save captures when successful.
785  __ bind(&success_label_);
786  if (num_saved_registers_ > 0) {
787  // copy captures to output
788  __ mov(ebx, Operand(ebp, kRegisterOutput));
789  __ mov(ecx, Operand(ebp, kInputEnd));
790  __ mov(edx, Operand(ebp, kStartIndex));
791  __ sub(ecx, Operand(ebp, kInputStart));
792  if (mode_ == UC16) {
793  __ lea(ecx, Operand(ecx, edx, times_2, 0));
794  } else {
795  __ add(ecx, edx);
796  }
797  for (int i = 0; i < num_saved_registers_; i++) {
798  __ mov(eax, register_location(i));
799  if (i == 0 && global_with_zero_length_check()) {
800  // Keep capture start in edx for the zero-length check later.
801  __ mov(edx, eax);
802  }
803  // Convert to index from start of string, not end.
804  __ add(eax, ecx);
805  if (mode_ == UC16) {
806  __ sar(eax, 1); // Convert byte index to character index.
807  }
808  __ mov(Operand(ebx, i * kPointerSize), eax);
809  }
810  }
811 
812  if (global()) {
813  // Restart matching if the regular expression is flagged as global.
814  // Increment success counter.
815  __ inc(Operand(ebp, kSuccessfulCaptures));
816  // Capture results have been stored, so the number of remaining global
817  // output registers is reduced by the number of stored captures.
818  __ mov(ecx, Operand(ebp, kNumOutputRegisters));
819  __ sub(ecx, Immediate(num_saved_registers_));
820  // Check whether we have enough room for another set of capture results.
821  __ cmp(ecx, Immediate(num_saved_registers_));
822  __ j(less, &exit_label_);
823 
824  __ mov(Operand(ebp, kNumOutputRegisters), ecx);
825  // Advance the location for output.
826  __ add(Operand(ebp, kRegisterOutput),
827  Immediate(num_saved_registers_ * kPointerSize));
828 
829  // Prepare eax to initialize registers with its value in the next run.
830  __ mov(eax, Operand(ebp, kStringStartMinusOne));
831 
832  if (global_with_zero_length_check()) {
833  // Special case for zero-length matches.
834  // edx: capture start index
835  __ cmp(edi, edx);
836  // Not a zero-length match, restart.
837  __ j(not_equal, &load_char_start_regexp);
838  // edi (offset from the end) is zero if we already reached the end.
839  __ test(edi, edi);
840  __ j(zero, &exit_label_, Label::kNear);
841  // Advance current position after a zero-length match.
842  Label advance;
843  __ bind(&advance);
844  if (mode_ == UC16) {
845  __ add(edi, Immediate(2));
846  } else {
847  __ inc(edi);
848  }
849  if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
850  }
851  __ jmp(&load_char_start_regexp);
852  } else {
853  __ mov(eax, Immediate(SUCCESS));
854  }
855  }
856 
857  __ bind(&exit_label_);
858  if (global()) {
859  // Return the number of successful captures.
860  __ mov(eax, Operand(ebp, kSuccessfulCaptures));
861  }
862 
863  __ bind(&return_eax);
864  // Skip esp past regexp registers.
865  __ lea(esp, Operand(ebp, kBackup_ebx));
866  // Restore callee-save registers.
867  __ pop(ebx);
868  __ pop(edi);
869  __ pop(esi);
870  // Exit function frame, restore previous one.
871  __ pop(ebp);
872  __ ret(0);
873 
874  // Backtrack code (branch target for conditional backtracks).
875  if (backtrack_label_.is_linked()) {
876  __ bind(&backtrack_label_);
877  Backtrack();
878  }
879 
880  Label exit_with_exception;
881 
882  // Preempt-code
883  if (check_preempt_label_.is_linked()) {
884  SafeCallTarget(&check_preempt_label_);
885 
886  __ push(backtrack_stackpointer());
887  __ push(edi);
888 
889  CallCheckStackGuardState(ebx);
890  __ or_(eax, eax);
891  // If returning non-zero, we should end execution with the given
892  // result as return value.
893  __ j(not_zero, &return_eax);
894 
895  __ pop(edi);
896  __ pop(backtrack_stackpointer());
897  // String might have moved: Reload esi from frame.
898  __ mov(esi, Operand(ebp, kInputEnd));
899  SafeReturn();
900  }
901 
902  // Backtrack stack overflow code.
903  if (stack_overflow_label_.is_linked()) {
904  SafeCallTarget(&stack_overflow_label_);
905  // Reached if the backtrack-stack limit has been hit.
906 
907  Label grow_failed;
908  // Save registers before calling C function
909  __ push(esi);
910  __ push(edi);
911 
912  // Call GrowStack(backtrack_stackpointer())
913  static const int num_arguments = 3;
914  __ PrepareCallCFunction(num_arguments, ebx);
915  __ mov(Operand(esp, 2 * kPointerSize),
916  Immediate(ExternalReference::isolate_address(isolate())));
917  __ lea(eax, Operand(ebp, kStackHighEnd));
918  __ mov(Operand(esp, 1 * kPointerSize), eax);
919  __ mov(Operand(esp, 0 * kPointerSize), backtrack_stackpointer());
920  ExternalReference grow_stack =
921  ExternalReference::re_grow_stack(isolate());
922  __ CallCFunction(grow_stack, num_arguments);
923  // If return nullptr, we have failed to grow the stack, and
924  // must exit with a stack-overflow exception.
925  __ or_(eax, eax);
926  __ j(equal, &exit_with_exception);
927  // Otherwise use return value as new stack pointer.
928  __ mov(backtrack_stackpointer(), eax);
929  // Restore saved registers and continue.
930  __ pop(edi);
931  __ pop(esi);
932  SafeReturn();
933  }
934 
935  if (exit_with_exception.is_linked()) {
936  // If any of the code above needed to exit with an exception.
937  __ bind(&exit_with_exception);
938  // Exit with Result EXCEPTION(-1) to signal thrown exception.
939  __ mov(eax, EXCEPTION);
940  __ jmp(&return_eax);
941  }
942 
943  CodeDesc code_desc;
944  masm_->GetCode(masm_->isolate(), &code_desc);
945  Handle<Code> code = isolate()->factory()->NewCode(code_desc, Code::REGEXP,
946  masm_->CodeObject());
947  PROFILE(masm_->isolate(),
948  RegExpCodeCreateEvent(AbstractCode::cast(*code), *source));
949  return Handle<HeapObject>::cast(code);
950 }
951 
952 
953 void RegExpMacroAssemblerIA32::GoTo(Label* to) {
954  BranchOrBacktrack(no_condition, to);
955 }
956 
957 
958 void RegExpMacroAssemblerIA32::IfRegisterGE(int reg,
959  int comparand,
960  Label* if_ge) {
961  __ cmp(register_location(reg), Immediate(comparand));
962  BranchOrBacktrack(greater_equal, if_ge);
963 }
964 
965 
966 void RegExpMacroAssemblerIA32::IfRegisterLT(int reg,
967  int comparand,
968  Label* if_lt) {
969  __ cmp(register_location(reg), Immediate(comparand));
970  BranchOrBacktrack(less, if_lt);
971 }
972 
973 
974 void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg,
975  Label* if_eq) {
976  __ cmp(edi, register_location(reg));
977  BranchOrBacktrack(equal, if_eq);
978 }
979 
980 
981 RegExpMacroAssembler::IrregexpImplementation
982  RegExpMacroAssemblerIA32::Implementation() {
983  return kIA32Implementation;
984 }
985 
986 
987 void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
988  Label* on_end_of_input,
989  bool check_bounds,
990  int characters) {
991  DCHECK(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
992  if (check_bounds) {
993  if (cp_offset >= 0) {
994  CheckPosition(cp_offset + characters - 1, on_end_of_input);
995  } else {
996  CheckPosition(cp_offset, on_end_of_input);
997  }
998  }
999  LoadCurrentCharacterUnchecked(cp_offset, characters);
1000 }
1001 
1002 
1003 void RegExpMacroAssemblerIA32::PopCurrentPosition() {
1004  Pop(edi);
1005 }
1006 
1007 
1008 void RegExpMacroAssemblerIA32::PopRegister(int register_index) {
1009  Pop(eax);
1010  __ mov(register_location(register_index), eax);
1011 }
1012 
1013 
1014 void RegExpMacroAssemblerIA32::PushBacktrack(Label* label) {
1015  Push(Immediate::CodeRelativeOffset(label));
1016  CheckStackLimit();
1017 }
1018 
1019 
1020 void RegExpMacroAssemblerIA32::PushCurrentPosition() {
1021  Push(edi);
1022 }
1023 
1024 
1025 void RegExpMacroAssemblerIA32::PushRegister(int register_index,
1026  StackCheckFlag check_stack_limit) {
1027  __ mov(eax, register_location(register_index));
1028  Push(eax);
1029  if (check_stack_limit) CheckStackLimit();
1030 }
1031 
1032 
1033 void RegExpMacroAssemblerIA32::ReadCurrentPositionFromRegister(int reg) {
1034  __ mov(edi, register_location(reg));
1035 }
1036 
1037 
1038 void RegExpMacroAssemblerIA32::ReadStackPointerFromRegister(int reg) {
1039  __ mov(backtrack_stackpointer(), register_location(reg));
1040  __ add(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
1041 }
1042 
1043 void RegExpMacroAssemblerIA32::SetCurrentPositionFromEnd(int by) {
1044  Label after_position;
1045  __ cmp(edi, -by * char_size());
1046  __ j(greater_equal, &after_position, Label::kNear);
1047  __ mov(edi, -by * char_size());
1048  // On RegExp code entry (where this operation is used), the character before
1049  // the current position is expected to be already loaded.
1050  // We have advanced the position, so it's safe to read backwards.
1051  LoadCurrentCharacterUnchecked(-1, 1);
1052  __ bind(&after_position);
1053 }
1054 
1055 
1056 void RegExpMacroAssemblerIA32::SetRegister(int register_index, int to) {
1057  DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1058  __ mov(register_location(register_index), Immediate(to));
1059 }
1060 
1061 
1062 bool RegExpMacroAssemblerIA32::Succeed() {
1063  __ jmp(&success_label_);
1064  return global();
1065 }
1066 
1067 
1068 void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
1069  int cp_offset) {
1070  if (cp_offset == 0) {
1071  __ mov(register_location(reg), edi);
1072  } else {
1073  __ lea(eax, Operand(edi, cp_offset * char_size()));
1074  __ mov(register_location(reg), eax);
1075  }
1076 }
1077 
1078 
1079 void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
1080  DCHECK(reg_from <= reg_to);
1081  __ mov(eax, Operand(ebp, kStringStartMinusOne));
1082  for (int reg = reg_from; reg <= reg_to; reg++) {
1083  __ mov(register_location(reg), eax);
1084  }
1085 }
1086 
1087 
1088 void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
1089  __ mov(eax, backtrack_stackpointer());
1090  __ sub(eax, Operand(ebp, kStackHighEnd));
1091  __ mov(register_location(reg), eax);
1092 }
1093 
1094 
1095 // Private methods:
1096 
1097 void RegExpMacroAssemblerIA32::CallCheckStackGuardState(Register scratch) {
1098  static const int num_arguments = 3;
1099  __ PrepareCallCFunction(num_arguments, scratch);
1100  // RegExp code frame pointer.
1101  __ mov(Operand(esp, 2 * kPointerSize), ebp);
1102  // Code of self.
1103  __ mov(Operand(esp, 1 * kPointerSize), Immediate(masm_->CodeObject()));
1104  // Next address on the stack (will be address of return address).
1105  __ lea(eax, Operand(esp, -kPointerSize));
1106  __ mov(Operand(esp, 0 * kPointerSize), eax);
1107  ExternalReference check_stack_guard =
1108  ExternalReference::re_check_stack_guard_state(isolate());
1109  __ CallCFunction(check_stack_guard, num_arguments);
1110 }
1111 
1112 Operand RegExpMacroAssemblerIA32::StaticVariable(const ExternalReference& ext) {
1113  return Operand(ext.address(), RelocInfo::EXTERNAL_REFERENCE);
1114 }
1115 
1116 // Helper function for reading a value out of a stack frame.
1117 template <typename T>
1118 static T& frame_entry(Address re_frame, int frame_offset) {
1119  return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
1120 }
1121 
1122 
1123 template <typename T>
1124 static T* frame_entry_address(Address re_frame, int frame_offset) {
1125  return reinterpret_cast<T*>(re_frame + frame_offset);
1126 }
1127 
1128 int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
1129  Address raw_code,
1130  Address re_frame) {
1131  Code re_code = Code::cast(ObjectPtr(raw_code));
1132  return NativeRegExpMacroAssembler::CheckStackGuardState(
1133  frame_entry<Isolate*>(re_frame, kIsolate),
1134  frame_entry<int>(re_frame, kStartIndex),
1135  frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
1136  frame_entry_address<Address>(re_frame, kInputString),
1137  frame_entry_address<const byte*>(re_frame, kInputStart),
1138  frame_entry_address<const byte*>(re_frame, kInputEnd));
1139 }
1140 
1141 
1142 Operand RegExpMacroAssemblerIA32::register_location(int register_index) {
1143  DCHECK(register_index < (1<<30));
1144  if (num_registers_ <= register_index) {
1145  num_registers_ = register_index + 1;
1146  }
1147  return Operand(ebp, kRegisterZero - register_index * kPointerSize);
1148 }
1149 
1150 
1151 void RegExpMacroAssemblerIA32::CheckPosition(int cp_offset,
1152  Label* on_outside_input) {
1153  if (cp_offset >= 0) {
1154  __ cmp(edi, -cp_offset * char_size());
1155  BranchOrBacktrack(greater_equal, on_outside_input);
1156  } else {
1157  __ lea(eax, Operand(edi, cp_offset * char_size()));
1158  __ cmp(eax, Operand(ebp, kStringStartMinusOne));
1159  BranchOrBacktrack(less_equal, on_outside_input);
1160  }
1161 }
1162 
1163 
1164 void RegExpMacroAssemblerIA32::BranchOrBacktrack(Condition condition,
1165  Label* to) {
1166  if (condition < 0) { // No condition
1167  if (to == nullptr) {
1168  Backtrack();
1169  return;
1170  }
1171  __ jmp(to);
1172  return;
1173  }
1174  if (to == nullptr) {
1175  __ j(condition, &backtrack_label_);
1176  return;
1177  }
1178  __ j(condition, to);
1179 }
1180 
1181 
1182 void RegExpMacroAssemblerIA32::SafeCall(Label* to) {
1183  Label return_to;
1184  __ push(Immediate::CodeRelativeOffset(&return_to));
1185  __ jmp(to);
1186  __ bind(&return_to);
1187 }
1188 
1189 
1190 void RegExpMacroAssemblerIA32::SafeReturn() {
1191  __ pop(ebx);
1192  __ add(ebx, Immediate(masm_->CodeObject()));
1193  __ jmp(ebx);
1194 }
1195 
1196 
1197 void RegExpMacroAssemblerIA32::SafeCallTarget(Label* name) {
1198  __ bind(name);
1199 }
1200 
1201 
1202 void RegExpMacroAssemblerIA32::Push(Register source) {
1203  DCHECK(source != backtrack_stackpointer());
1204  // Notice: This updates flags, unlike normal Push.
1205  __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
1206  __ mov(Operand(backtrack_stackpointer(), 0), source);
1207 }
1208 
1209 
1210 void RegExpMacroAssemblerIA32::Push(Immediate value) {
1211  // Notice: This updates flags, unlike normal Push.
1212  __ sub(backtrack_stackpointer(), Immediate(kPointerSize));
1213  __ mov(Operand(backtrack_stackpointer(), 0), value);
1214 }
1215 
1216 
1217 void RegExpMacroAssemblerIA32::Pop(Register target) {
1218  DCHECK(target != backtrack_stackpointer());
1219  __ mov(target, Operand(backtrack_stackpointer(), 0));
1220  // Notice: This updates flags, unlike normal Pop.
1221  __ add(backtrack_stackpointer(), Immediate(kPointerSize));
1222 }
1223 
1224 
1225 void RegExpMacroAssemblerIA32::CheckPreemption() {
1226  // Check for preemption.
1227  Label no_preempt;
1228  ExternalReference stack_limit =
1229  ExternalReference::address_of_stack_limit(isolate());
1230  __ cmp(esp, StaticVariable(stack_limit));
1231  __ j(above, &no_preempt);
1232 
1233  SafeCall(&check_preempt_label_);
1234 
1235  __ bind(&no_preempt);
1236 }
1237 
1238 
1239 void RegExpMacroAssemblerIA32::CheckStackLimit() {
1240  Label no_stack_overflow;
1241  ExternalReference stack_limit =
1242  ExternalReference::address_of_regexp_stack_limit(isolate());
1243  __ cmp(backtrack_stackpointer(), StaticVariable(stack_limit));
1244  __ j(above, &no_stack_overflow);
1245 
1246  SafeCall(&stack_overflow_label_);
1247 
1248  __ bind(&no_stack_overflow);
1249 }
1250 
1251 
1252 void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
1253  int characters) {
1254  if (mode_ == LATIN1) {
1255  if (characters == 4) {
1256  __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
1257  } else if (characters == 2) {
1258  __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
1259  } else {
1260  DCHECK_EQ(1, characters);
1261  __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
1262  }
1263  } else {
1264  DCHECK(mode_ == UC16);
1265  if (characters == 2) {
1266  __ mov(current_character(),
1267  Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1268  } else {
1269  DCHECK_EQ(1, characters);
1270  __ movzx_w(current_character(),
1271  Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
1272  }
1273  }
1274 }
1275 
1276 
1277 #undef __
1278 
1279 #endif // V8_INTERPRETED_REGEXP
1280 
1281 } // namespace internal
1282 } // namespace v8
1283 
1284 #endif // V8_TARGET_ARCH_IA32
Definition: libplatform.h:13