5 #ifndef V8_STRING_SEARCH_H_ 6 #define V8_STRING_SEARCH_H_ 8 #include "src/isolate.h" 9 #include "src/vector.h" 27 static const int kBMMaxShift = Isolate::kBMMaxShift;
36 static const int kLatin1AlphabetSize = 256;
37 static const int kUC16AlphabetSize = Isolate::kUC16AlphabetSize;
42 static const int kBMMinPatternLength = 7;
49 return String::IsOneByte(
string.start(),
string.length());
56 template <
typename PatternChar,
typename SubjectChar>
62 start_(Max(0, pattern.length() - kBMMaxShift)) {
63 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
64 if (!IsOneByteString(pattern_)) {
65 strategy_ = &FailSearch;
69 int pattern_length = pattern_.length();
70 if (pattern_length < kBMMinPatternLength) {
71 if (pattern_length == 1) {
72 strategy_ = &SingleCharSearch;
75 strategy_ = &LinearSearch;
78 strategy_ = &InitialSearch;
82 return strategy_(
this, subject, index);
85 static inline int AlphabetSize() {
86 if (
sizeof(PatternChar) == 1) {
88 return kLatin1AlphabetSize;
90 DCHECK_EQ(
sizeof(PatternChar), 2);
92 return kUC16AlphabetSize;
97 typedef int (*SearchFunction)(
120 static int BoyerMooreHorspoolSearch(
129 void PopulateBoyerMooreHorspoolTable();
131 void PopulateBoyerMooreTable();
133 static inline bool exceedsOneByte(uint8_t c) {
137 static inline bool exceedsOneByte(uint16_t c) {
138 return c > String::kMaxOneByteCharCodeU;
141 static inline int CharOccurrence(
int* bad_char_occurrence,
142 SubjectChar char_code) {
143 if (
sizeof(SubjectChar) == 1) {
144 return bad_char_occurrence[
static_cast<int>(char_code)];
146 if (
sizeof(PatternChar) == 1) {
147 if (exceedsOneByte(char_code)) {
150 return bad_char_occurrence[
static_cast<unsigned int>(char_code)];
153 int equiv_class = char_code % kUC16AlphabetSize;
154 return bad_char_occurrence[equiv_class];
164 int* bad_char_table() {
165 return isolate_->bad_char_shift_table();
169 int* good_suffix_shift_table() {
172 return isolate_->good_suffix_shift_table() - start_;
177 int* suffix_table() {
180 return isolate_->suffix_table() - start_;
187 SearchFunction strategy_;
193 template <
typename T,
typename U>
194 inline T AlignDown(
T value, U alignment) {
195 return reinterpret_cast<T>(
196 (
reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
200 inline uint8_t GetHighestValueByte(uc16 character) {
201 return Max(static_cast<uint8_t>(character & 0xFF),
202 static_cast<uint8_t>(character >> 8));
206 inline uint8_t GetHighestValueByte(uint8_t character) {
return character; }
209 template <
typename PatternChar,
typename SubjectChar>
210 inline int FindFirstCharacter(Vector<const PatternChar> pattern,
211 Vector<const SubjectChar> subject,
int index) {
212 const PatternChar pattern_first_char = pattern[0];
213 const int max_n = (subject.length() - pattern.length() + 1);
215 const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
216 const SubjectChar search_char =
static_cast<SubjectChar
>(pattern_first_char);
219 DCHECK_GE(max_n - pos, 0);
220 const SubjectChar* char_pos =
reinterpret_cast<const SubjectChar*
>(
221 memchr(subject.start() + pos, search_byte,
222 (max_n - pos) *
sizeof(SubjectChar)));
223 if (char_pos ==
nullptr)
return -1;
224 char_pos = AlignDown(char_pos,
sizeof(SubjectChar));
225 pos =
static_cast<int>(char_pos - subject.start());
226 if (subject[pos] == search_char)
return pos;
227 }
while (++pos < max_n);
237 template <
typename PatternChar,
typename SubjectChar>
238 int StringSearch<PatternChar, SubjectChar>::SingleCharSearch(
239 StringSearch<PatternChar, SubjectChar>* search,
240 Vector<const SubjectChar> subject,
242 DCHECK_EQ(1, search->pattern_.length());
243 PatternChar pattern_first_char = search->pattern_[0];
244 if (
sizeof(PatternChar) >
sizeof(SubjectChar)) {
245 if (exceedsOneByte(pattern_first_char)) {
249 return FindFirstCharacter(search->pattern_, subject, index);
257 template <
typename PatternChar,
typename SubjectChar>
258 inline bool CharCompare(
const PatternChar* pattern,
259 const SubjectChar* subject,
261 DCHECK_GT(length, 0);
264 if (pattern[pos] != subject[pos]) {
268 }
while (pos < length);
274 template <
typename PatternChar,
typename SubjectChar>
275 int StringSearch<PatternChar, SubjectChar>::LinearSearch(
276 StringSearch<PatternChar, SubjectChar>* search,
277 Vector<const SubjectChar> subject,
279 Vector<const PatternChar> pattern = search->pattern_;
280 DCHECK_GT(pattern.length(), 1);
281 int pattern_length = pattern.length();
283 int n = subject.length() - pattern_length;
285 i = FindFirstCharacter(pattern, subject,
i);
286 if (
i == -1)
return -1;
291 if (CharCompare(pattern.start() + 1,
293 pattern_length - 1)) {
304 template <
typename PatternChar,
typename SubjectChar>
305 int StringSearch<PatternChar, SubjectChar>::BoyerMooreSearch(
306 StringSearch<PatternChar, SubjectChar>* search,
307 Vector<const SubjectChar> subject,
309 Vector<const PatternChar> pattern = search->pattern_;
310 int subject_length = subject.length();
311 int pattern_length = pattern.length();
313 int start = search->start_;
315 int* bad_char_occurence = search->bad_char_table();
316 int* good_suffix_shift = search->good_suffix_shift_table();
318 PatternChar last_char = pattern[pattern_length - 1];
319 int index = start_index;
321 while (index <= subject_length - pattern_length) {
322 int j = pattern_length - 1;
324 while (last_char != (c = subject[index + j])) {
326 j - CharOccurrence(bad_char_occurence, c);
328 if (index > subject_length - pattern_length) {
332 while (j >= 0 && pattern[j] == (c = subject[index + j])) j--;
335 }
else if (j < start) {
338 index += pattern_length - 1
339 - CharOccurrence(bad_char_occurence,
340 static_cast<SubjectChar>(last_char));
342 int gs_shift = good_suffix_shift[j + 1];
344 CharOccurrence(bad_char_occurence, c);
345 int shift = j - bc_occ;
346 if (gs_shift > shift) {
357 template <
typename PatternChar,
typename SubjectChar>
358 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreTable() {
359 int pattern_length = pattern_.length();
360 const PatternChar* pattern = pattern_.start();
364 int length = pattern_length - start;
368 int* shift_table = good_suffix_shift_table();
369 int* suffix_table = this->suffix_table();
372 for (
int i = start;
i < pattern_length;
i++) {
373 shift_table[
i] = length;
375 shift_table[pattern_length] = 1;
376 suffix_table[pattern_length] = pattern_length + 1;
378 if (pattern_length <= start) {
383 PatternChar last_char = pattern[pattern_length - 1];
384 int suffix = pattern_length + 1;
386 int i = pattern_length;
388 PatternChar c = pattern[
i - 1];
389 while (suffix <= pattern_length && c != pattern[suffix - 1]) {
390 if (shift_table[suffix] == length) {
391 shift_table[suffix] = suffix -
i;
393 suffix = suffix_table[suffix];
395 suffix_table[--
i] = --suffix;
396 if (suffix == pattern_length) {
398 while ((
i > start) && (pattern[
i - 1] != last_char)) {
399 if (shift_table[pattern_length] == length) {
400 shift_table[pattern_length] = pattern_length -
i;
402 suffix_table[--
i] = pattern_length;
405 suffix_table[--
i] = --suffix;
411 if (suffix < pattern_length) {
412 for (
int i = start;
i <= pattern_length;
i++) {
413 if (shift_table[
i] == length) {
414 shift_table[
i] = suffix - start;
417 suffix = suffix_table[suffix];
427 template <
typename PatternChar,
typename SubjectChar>
428 int StringSearch<PatternChar, SubjectChar>::BoyerMooreHorspoolSearch(
429 StringSearch<PatternChar, SubjectChar>* search,
430 Vector<const SubjectChar> subject,
432 Vector<const PatternChar> pattern = search->pattern_;
433 int subject_length = subject.length();
434 int pattern_length = pattern.length();
435 int* char_occurrences = search->bad_char_table();
436 int badness = -pattern_length;
439 PatternChar last_char = pattern[pattern_length - 1];
440 int last_char_shift = pattern_length - 1 -
441 CharOccurrence(char_occurrences, static_cast<SubjectChar>(last_char));
443 int index = start_index;
444 while (index <= subject_length - pattern_length) {
445 int j = pattern_length - 1;
447 while (last_char != (subject_char = subject[index + j])) {
448 int bc_occ = CharOccurrence(char_occurrences, subject_char);
449 int shift = j - bc_occ;
451 badness += 1 - shift;
452 if (index > subject_length - pattern_length) {
457 while (j >= 0 && pattern[j] == (subject[index + j])) j--;
461 index += last_char_shift;
466 badness += (pattern_length - j) - last_char_shift;
468 search->PopulateBoyerMooreTable();
469 search->strategy_ = &BoyerMooreSearch;
470 return BoyerMooreSearch(search, subject, index);
478 template <
typename PatternChar,
typename SubjectChar>
479 void StringSearch<PatternChar, SubjectChar>::PopulateBoyerMooreHorspoolTable() {
480 int pattern_length = pattern_.length();
482 int* bad_char_occurrence = bad_char_table();
489 int table_size = AlphabetSize();
491 memset(bad_char_occurrence,
493 table_size *
sizeof(*bad_char_occurrence));
495 for (
int i = 0;
i < table_size;
i++) {
496 bad_char_occurrence[
i] = start - 1;
499 for (
int i = start;
i < pattern_length - 1;
i++) {
500 PatternChar c = pattern_[
i];
501 int bucket = (
sizeof(PatternChar) == 1) ? c : c % AlphabetSize();
502 bad_char_occurrence[bucket] =
i;
512 template <
typename PatternChar,
typename SubjectChar>
513 int StringSearch<PatternChar, SubjectChar>::InitialSearch(
514 StringSearch<PatternChar, SubjectChar>* search,
515 Vector<const SubjectChar> subject,
517 Vector<const PatternChar> pattern = search->pattern_;
518 int pattern_length = pattern.length();
522 int badness = -10 - (pattern_length << 2);
526 for (
int i = index, n = subject.length() - pattern_length;
i <= n;
i++) {
529 i = FindFirstCharacter(pattern, subject,
i);
530 if (
i == -1)
return -1;
534 if (pattern[j] != subject[
i + j]) {
538 }
while (j < pattern_length);
539 if (j == pattern_length) {
544 search->PopulateBoyerMooreHorspoolTable();
545 search->strategy_ = &BoyerMooreHorspoolSearch;
546 return BoyerMooreHorspoolSearch(search, subject,
i);
557 template <
typename SubjectChar,
typename PatternChar>
558 int SearchString(Isolate* isolate,
559 Vector<const SubjectChar> subject,
560 Vector<const PatternChar> pattern,
562 StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
563 return search.Search(subject, start_index);
569 template <
typename SubjectChar,
typename PatternChar>
570 intptr_t SearchStringRaw(Isolate* isolate,
const SubjectChar* subject_ptr,
571 int subject_length,
const PatternChar* pattern_ptr,
572 int pattern_length,
int start_index) {
573 DisallowHeapAllocation no_gc;
574 Vector<const SubjectChar> subject(subject_ptr, subject_length);
575 Vector<const PatternChar> pattern(pattern_ptr, pattern_length);
576 return SearchString(isolate, subject, pattern, start_index);
582 #endif // V8_STRING_SEARCH_H_