V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
builtins-string.cc
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/builtins/builtins-utils-inl.h"
6 #include "src/builtins/builtins.h"
7 #include "src/conversions.h"
8 #include "src/counters.h"
9 #include "src/objects-inl.h"
10 #ifdef V8_INTL_SUPPORT
11 #include "src/objects/intl-objects.h"
12 #endif
13 #include "src/regexp/regexp-utils.h"
14 #include "src/string-builder-inl.h"
15 #include "src/string-case.h"
16 #include "src/unicode-inl.h"
17 #include "src/unicode.h"
18 
19 namespace v8 {
20 namespace internal {
21 
22 namespace { // for String.fromCodePoint
23 
24 bool IsValidCodePoint(Isolate* isolate, Handle<Object> value) {
25  if (!value->IsNumber() &&
26  !Object::ToNumber(isolate, value).ToHandle(&value)) {
27  return false;
28  }
29 
30  if (Object::ToInteger(isolate, value).ToHandleChecked()->Number() !=
31  value->Number()) {
32  return false;
33  }
34 
35  if (value->Number() < 0 || value->Number() > 0x10FFFF) {
36  return false;
37  }
38 
39  return true;
40 }
41 
42 uc32 NextCodePoint(Isolate* isolate, BuiltinArguments args, int index) {
43  Handle<Object> value = args.at(1 + index);
44  ASSIGN_RETURN_ON_EXCEPTION_VALUE(isolate, value,
45  Object::ToNumber(isolate, value), -1);
46  if (!IsValidCodePoint(isolate, value)) {
47  isolate->Throw(*isolate->factory()->NewRangeError(
48  MessageTemplate::kInvalidCodePoint, value));
49  return -1;
50  }
51  return DoubleToUint32(value->Number());
52 }
53 
54 } // namespace
55 
56 // ES6 section 21.1.2.2 String.fromCodePoint ( ...codePoints )
57 BUILTIN(StringFromCodePoint) {
58  HandleScope scope(isolate);
59  int const length = args.length() - 1;
60  if (length == 0) return ReadOnlyRoots(isolate).empty_string();
61  DCHECK_LT(0, length);
62 
63  // Optimistically assume that the resulting String contains only one byte
64  // characters.
65  std::vector<uint8_t> one_byte_buffer;
66  one_byte_buffer.reserve(length);
67  uc32 code = 0;
68  int index;
69  for (index = 0; index < length; index++) {
70  code = NextCodePoint(isolate, args, index);
71  if (code < 0) {
72  return ReadOnlyRoots(isolate).exception();
73  }
74  if (code > String::kMaxOneByteCharCode) {
75  break;
76  }
77  one_byte_buffer.push_back(code);
78  }
79 
80  if (index == length) {
81  RETURN_RESULT_OR_FAILURE(
82  isolate, isolate->factory()->NewStringFromOneByte(Vector<uint8_t>(
83  one_byte_buffer.data(), one_byte_buffer.size())));
84  }
85 
86  std::vector<uc16> two_byte_buffer;
87  two_byte_buffer.reserve(length - index);
88 
89  while (true) {
90  if (code <= static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
91  two_byte_buffer.push_back(code);
92  } else {
93  two_byte_buffer.push_back(unibrow::Utf16::LeadSurrogate(code));
94  two_byte_buffer.push_back(unibrow::Utf16::TrailSurrogate(code));
95  }
96 
97  if (++index == length) {
98  break;
99  }
100  code = NextCodePoint(isolate, args, index);
101  if (code < 0) {
102  return ReadOnlyRoots(isolate).exception();
103  }
104  }
105 
106  Handle<SeqTwoByteString> result;
107  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
108  isolate, result,
109  isolate->factory()->NewRawTwoByteString(
110  static_cast<int>(one_byte_buffer.size() + two_byte_buffer.size())));
111 
112  DisallowHeapAllocation no_gc;
113  CopyChars(result->GetChars(), one_byte_buffer.data(), one_byte_buffer.size());
114  CopyChars(result->GetChars() + one_byte_buffer.size(), two_byte_buffer.data(),
115  two_byte_buffer.size());
116 
117  return *result;
118 }
119 
120 // ES6 section 21.1.3.6
121 // String.prototype.endsWith ( searchString [ , endPosition ] )
122 BUILTIN(StringPrototypeEndsWith) {
123  HandleScope handle_scope(isolate);
124  TO_THIS_STRING(str, "String.prototype.endsWith");
125 
126  // Check if the search string is a regExp and fail if it is.
127  Handle<Object> search = args.atOrUndefined(isolate, 1);
128  Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
129  if (is_reg_exp.IsNothing()) {
130  DCHECK(isolate->has_pending_exception());
131  return ReadOnlyRoots(isolate).exception();
132  }
133  if (is_reg_exp.FromJust()) {
134  THROW_NEW_ERROR_RETURN_FAILURE(
135  isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
136  isolate->factory()->NewStringFromStaticChars(
137  "String.prototype.endsWith")));
138  }
139  Handle<String> search_string;
140  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
141  Object::ToString(isolate, search));
142 
143  Handle<Object> position = args.atOrUndefined(isolate, 2);
144  int end;
145 
146  if (position->IsUndefined(isolate)) {
147  end = str->length();
148  } else {
149  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
150  Object::ToInteger(isolate, position));
151  end = str->ToValidIndex(*position);
152  }
153 
154  int start = end - search_string->length();
155  if (start < 0) return ReadOnlyRoots(isolate).false_value();
156 
157  str = String::Flatten(isolate, str);
158  search_string = String::Flatten(isolate, search_string);
159 
160  DisallowHeapAllocation no_gc; // ensure vectors stay valid
161  String::FlatContent str_content = str->GetFlatContent();
162  String::FlatContent search_content = search_string->GetFlatContent();
163 
164  if (str_content.IsOneByte() && search_content.IsOneByte()) {
165  Vector<const uint8_t> str_vector = str_content.ToOneByteVector();
166  Vector<const uint8_t> search_vector = search_content.ToOneByteVector();
167 
168  return isolate->heap()->ToBoolean(memcmp(str_vector.start() + start,
169  search_vector.start(),
170  search_string->length()) == 0);
171  }
172 
173  FlatStringReader str_reader(isolate, str);
174  FlatStringReader search_reader(isolate, search_string);
175 
176  for (int i = 0; i < search_string->length(); i++) {
177  if (str_reader.Get(start + i) != search_reader.Get(i)) {
178  return ReadOnlyRoots(isolate).false_value();
179  }
180  }
181  return ReadOnlyRoots(isolate).true_value();
182 }
183 
184 // ES6 section 21.1.3.9
185 // String.prototype.lastIndexOf ( searchString [ , position ] )
186 BUILTIN(StringPrototypeLastIndexOf) {
187  HandleScope handle_scope(isolate);
188  return String::LastIndexOf(isolate, args.receiver(),
189  args.atOrUndefined(isolate, 1),
190  args.atOrUndefined(isolate, 2));
191 }
192 
193 // ES6 section 21.1.3.10 String.prototype.localeCompare ( that )
194 //
195 // This function is implementation specific. For now, we do not
196 // do anything locale specific.
197 BUILTIN(StringPrototypeLocaleCompare) {
198  HandleScope handle_scope(isolate);
199 
200  isolate->CountUsage(v8::Isolate::UseCounterFeature::kStringLocaleCompare);
201 
202 #ifdef V8_INTL_SUPPORT
203  TO_THIS_STRING(str1, "String.prototype.localeCompare");
204  Handle<String> str2;
205  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
206  isolate, str2, Object::ToString(isolate, args.atOrUndefined(isolate, 1)));
207  RETURN_RESULT_OR_FAILURE(
208  isolate, Intl::StringLocaleCompare(isolate, str1, str2,
209  args.atOrUndefined(isolate, 2),
210  args.atOrUndefined(isolate, 3)));
211 #else
212  DCHECK_EQ(2, args.length());
213 
214  TO_THIS_STRING(str1, "String.prototype.localeCompare");
215  Handle<String> str2;
216  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, str2,
217  Object::ToString(isolate, args.at(1)));
218 
219  if (str1.is_identical_to(str2)) return Smi::kZero; // Equal.
220  int str1_length = str1->length();
221  int str2_length = str2->length();
222 
223  // Decide trivial cases without flattening.
224  if (str1_length == 0) {
225  if (str2_length == 0) return Smi::kZero; // Equal.
226  return Smi::FromInt(-str2_length);
227  } else {
228  if (str2_length == 0) return Smi::FromInt(str1_length);
229  }
230 
231  int end = str1_length < str2_length ? str1_length : str2_length;
232 
233  // No need to flatten if we are going to find the answer on the first
234  // character. At this point we know there is at least one character
235  // in each string, due to the trivial case handling above.
236  int d = str1->Get(0) - str2->Get(0);
237  if (d != 0) return Smi::FromInt(d);
238 
239  str1 = String::Flatten(isolate, str1);
240  str2 = String::Flatten(isolate, str2);
241 
242  DisallowHeapAllocation no_gc;
243  String::FlatContent flat1 = str1->GetFlatContent();
244  String::FlatContent flat2 = str2->GetFlatContent();
245 
246  for (int i = 0; i < end; i++) {
247  if (flat1.Get(i) != flat2.Get(i)) {
248  return Smi::FromInt(flat1.Get(i) - flat2.Get(i));
249  }
250  }
251 
252  return Smi::FromInt(str1_length - str2_length);
253 #endif // !V8_INTL_SUPPORT
254 }
255 
256 #ifndef V8_INTL_SUPPORT
257 // ES6 section 21.1.3.12 String.prototype.normalize ( [form] )
258 //
259 // Simply checks the argument is valid and returns the string itself.
260 // If internationalization is enabled, then intl.js will override this function
261 // and provide the proper functionality, so this is just a fallback.
262 BUILTIN(StringPrototypeNormalize) {
263  HandleScope handle_scope(isolate);
264  TO_THIS_STRING(string, "String.prototype.normalize");
265 
266  Handle<Object> form_input = args.atOrUndefined(isolate, 1);
267  if (form_input->IsUndefined(isolate)) return *string;
268 
269  Handle<String> form;
270  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, form,
271  Object::ToString(isolate, form_input));
272 
273  if (!(String::Equals(isolate, form,
274  isolate->factory()->NewStringFromStaticChars("NFC")) ||
275  String::Equals(isolate, form,
276  isolate->factory()->NewStringFromStaticChars("NFD")) ||
277  String::Equals(isolate, form,
278  isolate->factory()->NewStringFromStaticChars("NFKC")) ||
279  String::Equals(isolate, form,
280  isolate->factory()->NewStringFromStaticChars("NFKD")))) {
281  Handle<String> valid_forms =
282  isolate->factory()->NewStringFromStaticChars("NFC, NFD, NFKC, NFKD");
283  THROW_NEW_ERROR_RETURN_FAILURE(
284  isolate,
285  NewRangeError(MessageTemplate::kNormalizationForm, valid_forms));
286  }
287 
288  return *string;
289 }
290 #endif // !V8_INTL_SUPPORT
291 
292 BUILTIN(StringPrototypeStartsWith) {
293  HandleScope handle_scope(isolate);
294  TO_THIS_STRING(str, "String.prototype.startsWith");
295 
296  // Check if the search string is a regExp and fail if it is.
297  Handle<Object> search = args.atOrUndefined(isolate, 1);
298  Maybe<bool> is_reg_exp = RegExpUtils::IsRegExp(isolate, search);
299  if (is_reg_exp.IsNothing()) {
300  DCHECK(isolate->has_pending_exception());
301  return ReadOnlyRoots(isolate).exception();
302  }
303  if (is_reg_exp.FromJust()) {
304  THROW_NEW_ERROR_RETURN_FAILURE(
305  isolate, NewTypeError(MessageTemplate::kFirstArgumentNotRegExp,
306  isolate->factory()->NewStringFromStaticChars(
307  "String.prototype.startsWith")));
308  }
309  Handle<String> search_string;
310  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, search_string,
311  Object::ToString(isolate, search));
312 
313  Handle<Object> position = args.atOrUndefined(isolate, 2);
314  int start;
315 
316  if (position->IsUndefined(isolate)) {
317  start = 0;
318  } else {
319  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, position,
320  Object::ToInteger(isolate, position));
321  start = str->ToValidIndex(*position);
322  }
323 
324  if (start + search_string->length() > str->length()) {
325  return ReadOnlyRoots(isolate).false_value();
326  }
327 
328  FlatStringReader str_reader(isolate, String::Flatten(isolate, str));
329  FlatStringReader search_reader(isolate,
330  String::Flatten(isolate, search_string));
331 
332  for (int i = 0; i < search_string->length(); i++) {
333  if (str_reader.Get(start + i) != search_reader.Get(i)) {
334  return ReadOnlyRoots(isolate).false_value();
335  }
336  }
337  return ReadOnlyRoots(isolate).true_value();
338 }
339 
340 #ifndef V8_INTL_SUPPORT
341 namespace {
342 
343 inline bool ToUpperOverflows(uc32 character) {
344  // y with umlauts and the micro sign are the only characters that stop
345  // fitting into one-byte when converting to uppercase.
346  static const uc32 yuml_code = 0xFF;
347  static const uc32 micro_code = 0xB5;
348  return (character == yuml_code || character == micro_code);
349 }
350 
351 template <class Converter>
352 V8_WARN_UNUSED_RESULT static Object* ConvertCaseHelper(
353  Isolate* isolate, String string, SeqString result, int result_length,
355  DisallowHeapAllocation no_gc;
356  // We try this twice, once with the assumption that the result is no longer
357  // than the input and, if that assumption breaks, again with the exact
358  // length. This may not be pretty, but it is nicer than what was here before
359  // and I hereby claim my vaffel-is.
360  //
361  // NOTE: This assumes that the upper/lower case of an ASCII
362  // character is also ASCII. This is currently the case, but it
363  // might break in the future if we implement more context and locale
364  // dependent upper/lower conversions.
365  bool has_changed_character = false;
366 
367  // Convert all characters to upper case, assuming that they will fit
368  // in the buffer
369  StringCharacterStream stream(string);
370  unibrow::uchar chars[Converter::kMaxWidth];
371  // We can assume that the string is not empty
372  uc32 current = stream.GetNext();
373  bool ignore_overflow = Converter::kIsToLower || result->IsSeqTwoByteString();
374  for (int i = 0; i < result_length;) {
375  bool has_next = stream.HasMore();
376  uc32 next = has_next ? stream.GetNext() : 0;
377  int char_length = mapping->get(current, next, chars);
378  if (char_length == 0) {
379  // The case conversion of this character is the character itself.
380  result->Set(i, current);
381  i++;
382  } else if (char_length == 1 &&
383  (ignore_overflow || !ToUpperOverflows(current))) {
384  // Common case: converting the letter resulted in one character.
385  DCHECK(static_cast<uc32>(chars[0]) != current);
386  result->Set(i, chars[0]);
387  has_changed_character = true;
388  i++;
389  } else if (result_length == string->length()) {
390  bool overflows = ToUpperOverflows(current);
391  // We've assumed that the result would be as long as the
392  // input but here is a character that converts to several
393  // characters. No matter, we calculate the exact length
394  // of the result and try the whole thing again.
395  //
396  // Note that this leaves room for optimization. We could just
397  // memcpy what we already have to the result string. Also,
398  // the result string is the last object allocated we could
399  // "realloc" it and probably, in the vast majority of cases,
400  // extend the existing string to be able to hold the full
401  // result.
402  int next_length = 0;
403  if (has_next) {
404  next_length = mapping->get(next, 0, chars);
405  if (next_length == 0) next_length = 1;
406  }
407  int current_length = i + char_length + next_length;
408  while (stream.HasMore()) {
409  current = stream.GetNext();
410  overflows |= ToUpperOverflows(current);
411  // NOTE: we use 0 as the next character here because, while
412  // the next character may affect what a character converts to,
413  // it does not in any case affect the length of what it convert
414  // to.
415  int char_length = mapping->get(current, 0, chars);
416  if (char_length == 0) char_length = 1;
417  current_length += char_length;
418  if (current_length > String::kMaxLength) {
419  AllowHeapAllocation allocate_error_and_return;
420  THROW_NEW_ERROR_RETURN_FAILURE(isolate,
421  NewInvalidStringLengthError());
422  }
423  }
424  // Try again with the real length. Return signed if we need
425  // to allocate a two-byte string for to uppercase.
426  return (overflows && !ignore_overflow) ? Smi::FromInt(-current_length)
427  : Smi::FromInt(current_length);
428  } else {
429  for (int j = 0; j < char_length; j++) {
430  result->Set(i, chars[j]);
431  i++;
432  }
433  has_changed_character = true;
434  }
435  current = next;
436  }
437  if (has_changed_character) {
438  return result;
439  } else {
440  // If we didn't actually change anything in doing the conversion
441  // we simple return the result and let the converted string
442  // become garbage; there is no reason to keep two identical strings
443  // alive.
444  return string;
445  }
446 }
447 
448 template <class Converter>
449 V8_WARN_UNUSED_RESULT static Object* ConvertCase(
450  Handle<String> s, Isolate* isolate,
452  s = String::Flatten(isolate, s);
453  int length = s->length();
454  // Assume that the string is not empty; we need this assumption later
455  if (length == 0) return *s;
456 
457  // Simpler handling of ASCII strings.
458  //
459  // NOTE: This assumes that the upper/lower case of an ASCII
460  // character is also ASCII. This is currently the case, but it
461  // might break in the future if we implement more context and locale
462  // dependent upper/lower conversions.
463  if (s->IsOneByteRepresentationUnderneath()) {
464  // Same length as input.
465  Handle<SeqOneByteString> result =
466  isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
467  DisallowHeapAllocation no_gc;
468  String::FlatContent flat_content = s->GetFlatContent();
469  DCHECK(flat_content.IsFlat());
470  bool has_changed_character = false;
471  int index_to_first_unprocessed = FastAsciiConvert<Converter::kIsToLower>(
472  reinterpret_cast<char*>(result->GetChars()),
473  reinterpret_cast<const char*>(flat_content.ToOneByteVector().start()),
474  length, &has_changed_character);
475  // If not ASCII, we discard the result and take the 2 byte path.
476  if (index_to_first_unprocessed == length)
477  return has_changed_character ? *result : *s;
478  }
479 
480  Handle<SeqString> result; // Same length as input.
481  if (s->IsOneByteRepresentation()) {
482  result = isolate->factory()->NewRawOneByteString(length).ToHandleChecked();
483  } else {
484  result = isolate->factory()->NewRawTwoByteString(length).ToHandleChecked();
485  }
486 
487  Object* answer = ConvertCaseHelper(isolate, *s, *result, length, mapping);
488  if (answer->IsException(isolate) || answer->IsString()) return answer;
489 
490  DCHECK(answer->IsSmi());
491  length = Smi::ToInt(answer);
492  if (s->IsOneByteRepresentation() && length > 0) {
493  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
494  isolate, result, isolate->factory()->NewRawOneByteString(length));
495  } else {
496  if (length < 0) length = -length;
497  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
498  isolate, result, isolate->factory()->NewRawTwoByteString(length));
499  }
500  return ConvertCaseHelper(isolate, *s, *result, length, mapping);
501 }
502 
503 } // namespace
504 
505 BUILTIN(StringPrototypeToLocaleLowerCase) {
506  HandleScope scope(isolate);
507  TO_THIS_STRING(string, "String.prototype.toLocaleLowerCase");
508  return ConvertCase(string, isolate,
509  isolate->runtime_state()->to_lower_mapping());
510 }
511 
512 BUILTIN(StringPrototypeToLocaleUpperCase) {
513  HandleScope scope(isolate);
514  TO_THIS_STRING(string, "String.prototype.toLocaleUpperCase");
515  return ConvertCase(string, isolate,
516  isolate->runtime_state()->to_upper_mapping());
517 }
518 
519 BUILTIN(StringPrototypeToLowerCase) {
520  HandleScope scope(isolate);
521  TO_THIS_STRING(string, "String.prototype.toLowerCase");
522  return ConvertCase(string, isolate,
523  isolate->runtime_state()->to_lower_mapping());
524 }
525 
526 BUILTIN(StringPrototypeToUpperCase) {
527  HandleScope scope(isolate);
528  TO_THIS_STRING(string, "String.prototype.toUpperCase");
529  return ConvertCase(string, isolate,
530  isolate->runtime_state()->to_upper_mapping());
531 }
532 #endif // !V8_INTL_SUPPORT
533 
534 // ES6 #sec-string.prototype.raw
535 BUILTIN(StringRaw) {
536  HandleScope scope(isolate);
537  Handle<Object> templ = args.atOrUndefined(isolate, 1);
538  const uint32_t argc = args.length();
539  Handle<String> raw_string =
540  isolate->factory()->NewStringFromAsciiChecked("raw");
541 
542  Handle<Object> cooked;
543  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, cooked,
544  Object::ToObject(isolate, templ));
545 
546  Handle<Object> raw;
547  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
548  isolate, raw, Object::GetProperty(isolate, cooked, raw_string));
549  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw,
550  Object::ToObject(isolate, raw));
551  Handle<Object> raw_len;
552  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
553  isolate, raw_len,
554  Object::GetProperty(isolate, raw, isolate->factory()->length_string()));
555 
556  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, raw_len,
557  Object::ToLength(isolate, raw_len));
558 
559  IncrementalStringBuilder result_builder(isolate);
560  const uint32_t length = static_cast<uint32_t>(raw_len->Number());
561  if (length > 0) {
562  Handle<Object> first_element;
563  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, first_element,
564  Object::GetElement(isolate, raw, 0));
565 
566  Handle<String> first_string;
567  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
568  isolate, first_string, Object::ToString(isolate, first_element));
569  result_builder.AppendString(first_string);
570 
571  for (uint32_t i = 1, arg_i = 2; i < length; i++, arg_i++) {
572  if (arg_i < argc) {
573  Handle<String> argument_string;
574  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
575  isolate, argument_string,
576  Object::ToString(isolate, args.at(arg_i)));
577  result_builder.AppendString(argument_string);
578  }
579 
580  Handle<Object> element;
581  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element,
582  Object::GetElement(isolate, raw, i));
583 
584  Handle<String> element_string;
585  ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, element_string,
586  Object::ToString(isolate, element));
587  result_builder.AppendString(element_string);
588  }
589  }
590 
591  RETURN_RESULT_OR_FAILURE(isolate, result_builder.Finish());
592 }
593 
594 } // namespace internal
595 } // namespace v8
Definition: libplatform.h:13