5 #include "src/regexp/regexp-macro-assembler.h" 7 #include "src/assembler.h" 8 #include "src/isolate-inl.h" 9 #include "src/regexp/regexp-stack.h" 10 #include "src/simulator.h" 11 #include "src/unicode-inl.h" 13 #ifdef V8_INTL_SUPPORT 14 #include "unicode/uchar.h" 15 #endif // V8_INTL_SUPPORT 20 RegExpMacroAssembler::RegExpMacroAssembler(Isolate* isolate, Zone* zone)
21 : slow_safe_compiler_(false),
22 global_mode_(NOT_GLOBAL),
26 RegExpMacroAssembler::~RegExpMacroAssembler() =
default;
28 int RegExpMacroAssembler::CaseInsensitiveCompareUC16(Address byte_offset1,
33 isolate->regexp_macro_assembler_canonicalize();
37 DCHECK_EQ(0, byte_length % 2);
38 uc16* substring1 =
reinterpret_cast<uc16*
>(byte_offset1);
39 uc16* substring2 =
reinterpret_cast<uc16*
>(byte_offset2);
40 size_t length = byte_length >> 1;
42 #ifdef V8_INTL_SUPPORT 43 if (isolate ==
nullptr) {
44 for (
size_t i = 0;
i < length;
i++) {
45 uc32 c1 = substring1[
i];
46 uc32 c2 = substring2[
i];
47 if (unibrow::Utf16::IsLeadSurrogate(c1)) {
50 if (!unibrow::Utf16::IsLeadSurrogate(c2))
return 0;
52 uc16 c1t = substring1[
i + 1];
53 uc16 c2t = substring2[
i + 1];
54 if (unibrow::Utf16::IsTrailSurrogate(c1t) &&
55 unibrow::Utf16::IsTrailSurrogate(c2t)) {
56 c1 = unibrow::Utf16::CombineSurrogatePair(c1, c1t);
57 c2 = unibrow::Utf16::CombineSurrogatePair(c2, c2t);
62 c1 = u_foldCase(c1, U_FOLD_CASE_DEFAULT);
63 c2 = u_foldCase(c2, U_FOLD_CASE_DEFAULT);
64 if (c1 != c2)
return 0;
68 #endif // V8_INTL_SUPPORT 69 DCHECK_NOT_NULL(isolate);
70 for (
size_t i = 0;
i < length;
i++) {
71 unibrow::uchar c1 = substring1[
i];
72 unibrow::uchar c2 = substring2[
i];
74 unibrow::uchar s1[1] = {c1};
75 canonicalize->get(c1,
'\0', s1);
77 unibrow::uchar s2[1] = {c2};
78 canonicalize->get(c2,
'\0', s2);
89 void RegExpMacroAssembler::CheckNotInSurrogatePair(
int cp_offset,
93 LoadCurrentCharacter(cp_offset, &ok);
94 CheckCharacterNotInRange(kTrailSurrogateStart, kTrailSurrogateEnd, &ok);
96 LoadCurrentCharacter(cp_offset - 1, &ok);
97 CheckCharacterInRange(kLeadSurrogateStart, kLeadSurrogateEnd, on_failure);
101 void RegExpMacroAssembler::CheckPosition(
int cp_offset,
102 Label* on_outside_input) {
103 LoadCurrentCharacter(cp_offset, on_outside_input,
true);
106 bool RegExpMacroAssembler::CheckSpecialCharacterClass(uc16 type,
107 Label* on_no_match) {
111 #ifndef V8_INTERPRETED_REGEXP // Avoid unused code, e.g., on ARM. 113 NativeRegExpMacroAssembler::NativeRegExpMacroAssembler(Isolate* isolate,
115 : RegExpMacroAssembler(isolate, zone) {}
117 NativeRegExpMacroAssembler::~NativeRegExpMacroAssembler() =
default;
119 bool NativeRegExpMacroAssembler::CanReadUnaligned() {
120 return FLAG_enable_regexp_unaligned_accesses && !slow_safe();
123 const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
124 String subject,
int start_index) {
125 if (subject->IsConsString()) {
126 subject = ConsString::cast(subject)->first();
127 }
else if (subject->IsSlicedString()) {
128 start_index += SlicedString::cast(subject)->offset();
129 subject = SlicedString::cast(subject)->parent();
131 if (subject->IsThinString()) {
132 subject = ThinString::cast(subject)->actual();
134 DCHECK_LE(0, start_index);
135 DCHECK_LE(start_index, subject->length());
136 if (subject->IsSeqOneByteString()) {
137 return reinterpret_cast<const byte*
>(
138 SeqOneByteString::cast(subject)->GetChars() + start_index);
139 }
else if (subject->IsSeqTwoByteString()) {
140 return reinterpret_cast<const byte*
>(
141 SeqTwoByteString::cast(subject)->GetChars() + start_index);
142 }
else if (subject->IsExternalOneByteString()) {
143 return reinterpret_cast<const byte*
>(
144 ExternalOneByteString::cast(subject)->GetChars() + start_index);
146 DCHECK(subject->IsExternalTwoByteString());
147 return reinterpret_cast<const byte*
>(
148 ExternalTwoByteString::cast(subject)->GetChars() + start_index);
152 int NativeRegExpMacroAssembler::CheckStackGuardState(
153 Isolate* isolate,
int start_index,
bool is_direct_call,
154 Address* return_address, Code re_code, Address* subject,
155 const byte** input_start,
const byte** input_end) {
156 AllowHeapAllocation allow_allocation;
157 DCHECK(re_code->raw_instruction_start() <= *return_address);
158 DCHECK(*return_address <= re_code->raw_instruction_end());
159 int return_value = 0;
161 HandleScope handles(isolate);
162 Handle<Code> code_handle(re_code, isolate);
163 Handle<String> subject_handle(String::cast(ObjectPtr(*subject)), isolate);
164 bool is_one_byte = subject_handle->IsOneByteRepresentationUnderneath();
166 StackLimitCheck check(isolate);
167 bool js_has_overflowed = check.JsHasOverflowed();
169 if (is_direct_call) {
175 return_value = js_has_overflowed ? EXCEPTION : RETRY;
176 }
else if (js_has_overflowed) {
177 isolate->StackOverflow();
178 return_value = EXCEPTION;
180 Object* result = isolate->stack_guard()->HandleInterrupts();
181 if (result->IsException(isolate)) return_value = EXCEPTION;
184 DisallowHeapAllocation no_gc;
186 if (*code_handle != re_code) {
187 intptr_t delta = code_handle->address() - re_code->address();
189 *return_address += delta;
193 if (return_value == 0) {
195 if (subject_handle->IsOneByteRepresentationUnderneath() != is_one_byte) {
199 return_value = RETRY;
201 *subject = subject_handle->ptr();
202 intptr_t byte_length = *input_end - *input_start;
203 *input_start = StringCharacterPosition(*subject_handle, start_index);
204 *input_end = *input_start + byte_length;
210 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Match(
211 Handle<Code> regexp_code,
212 Handle<String> subject,
214 int offsets_vector_length,
218 DCHECK(subject->IsFlat());
219 DCHECK_LE(0, previous_index);
220 DCHECK_LE(previous_index, subject->length());
226 String subject_ptr = *subject;
228 int start_offset = previous_index;
229 int char_length = subject_ptr->length() - start_offset;
230 int slice_offset = 0;
234 if (StringShape(subject_ptr).IsCons()) {
235 DCHECK_EQ(0, ConsString::cast(subject_ptr)->second()->length());
236 subject_ptr = ConsString::cast(subject_ptr)->first();
237 }
else if (StringShape(subject_ptr).IsSliced()) {
238 SlicedString slice = SlicedString::cast(subject_ptr);
239 subject_ptr = slice->parent();
240 slice_offset = slice->offset();
242 if (StringShape(subject_ptr).IsThin()) {
243 subject_ptr = ThinString::cast(subject_ptr)->actual();
246 bool is_one_byte = subject_ptr->IsOneByteRepresentation();
247 DCHECK(subject_ptr->IsExternalString() || subject_ptr->IsSeqString());
249 int char_size_shift = is_one_byte ? 0 : 1;
251 DisallowHeapAllocation no_gc;
252 const byte* input_start =
253 StringCharacterPosition(subject_ptr, start_offset + slice_offset);
254 int byte_length = char_length << char_size_shift;
255 const byte* input_end = input_start + byte_length;
256 Result res = Execute(*regexp_code,
262 offsets_vector_length,
267 NativeRegExpMacroAssembler::Result NativeRegExpMacroAssembler::Execute(
270 int start_offset,
const byte* input_start,
const byte* input_end,
271 int* output,
int output_size, Isolate* isolate) {
273 RegExpStackScope stack_scope(isolate);
274 Address stack_base = stack_scope.stack()->stack_base();
278 using RegexpMatcherSig =
int(
279 Address input_string,
int start_offset,
280 const byte* input_start,
const byte* input_end,
int* output,
281 int output_size, Address stack_base,
int direct_call, Isolate* isolate);
283 auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
284 int result = fn.Call(input.ptr(), start_offset, input_start, input_end,
285 output, output_size, stack_base, direct_call, isolate);
286 DCHECK(result >= RETRY);
288 if (result == EXCEPTION && !isolate->has_pending_exception()) {
293 AllowHeapAllocation allow_allocation;
294 isolate->StackOverflow();
296 return static_cast<Result
>(result);
300 const byte NativeRegExpMacroAssembler::word_character_map[] = {
301 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
302 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
303 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
304 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
306 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
307 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
308 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
309 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
311 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
312 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
313 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
314 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0xFFu,
316 0x00u, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
317 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
318 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu, 0xFFu,
319 0xFFu, 0xFFu, 0xFFu, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
321 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
322 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
323 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
324 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
326 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
327 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
328 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
329 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
331 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
332 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
333 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
334 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
336 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
337 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
338 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
339 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
343 Address NativeRegExpMacroAssembler::GrowStack(Address stack_pointer,
346 RegExpStack* regexp_stack = isolate->regexp_stack();
347 size_t size = regexp_stack->stack_capacity();
348 Address old_stack_base = regexp_stack->stack_base();
349 DCHECK(old_stack_base == *stack_base);
350 DCHECK(stack_pointer <= old_stack_base);
351 DCHECK(static_cast<size_t>(old_stack_base - stack_pointer) <= size);
352 Address new_stack_base = regexp_stack->EnsureCapacity(size * 2);
353 if (new_stack_base == kNullAddress) {
356 *stack_base = new_stack_base;
357 intptr_t stack_content_size = old_stack_base - stack_pointer;
358 return new_stack_base - stack_content_size;
361 #endif // V8_INTERPRETED_REGEXP