V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
string.h
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_OBJECTS_STRING_H_
6 #define V8_OBJECTS_STRING_H_
7 
8 #include "src/base/bits.h"
9 #include "src/objects/instance-type.h"
10 #include "src/objects/name.h"
11 #include "src/objects/smi.h"
12 #include "src/unicode-decoder.h"
13 
14 // Has to be the last include (doesn't have include guards):
15 #include "src/objects/object-macros.h"
16 
17 namespace v8 {
18 namespace internal {
19 
20 enum InstanceType : uint16_t;
21 
22 enum AllowNullsFlag { ALLOW_NULLS, DISALLOW_NULLS };
23 enum RobustnessFlag { ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL };
24 
25 // The characteristics of a string are stored in its map. Retrieving these
26 // few bits of information is moderately expensive, involving two memory
27 // loads where the second is dependent on the first. To improve efficiency
28 // the shape of the string is given its own class so that it can be retrieved
29 // once and used for several string operations. A StringShape is small enough
30 // to be passed by value and is immutable, but be aware that flattening a
31 // string can potentially alter its shape. Also be aware that a GC caused by
32 // something else can alter the shape of a string due to ConsString
33 // shortcutting. Keeping these restrictions in mind has proven to be error-
34 // prone and so we no longer put StringShapes in variables unless there is a
35 // concrete performance benefit at that particular point in the code.
36 class StringShape {
37  public:
38  inline explicit StringShape(const String s);
39  inline explicit StringShape(Map s);
40  inline explicit StringShape(InstanceType t);
41  inline bool IsSequential();
42  inline bool IsExternal();
43  inline bool IsCons();
44  inline bool IsSliced();
45  inline bool IsThin();
46  inline bool IsIndirect();
47  inline bool IsExternalOneByte();
48  inline bool IsExternalTwoByte();
49  inline bool IsSequentialOneByte();
50  inline bool IsSequentialTwoByte();
51  inline bool IsInternalized();
52  inline StringRepresentationTag representation_tag();
53  inline uint32_t encoding_tag();
54  inline uint32_t full_representation_tag();
55  inline bool HasOnlyOneByteChars();
56 #ifdef DEBUG
57  inline uint32_t type() { return type_; }
58  inline void invalidate() { valid_ = false; }
59  inline bool valid() { return valid_; }
60 #else
61  inline void invalidate() {}
62 #endif
63 
64  private:
65  uint32_t type_;
66 #ifdef DEBUG
67  inline void set_valid() { valid_ = true; }
68  bool valid_;
69 #else
70  inline void set_valid() {}
71 #endif
72 };
73 
74 // The String abstract class captures JavaScript string values:
75 //
76 // Ecma-262:
77 // 4.3.16 String Value
78 // A string value is a member of the type String and is a finite
79 // ordered sequence of zero or more 16-bit unsigned integer values.
80 //
81 // All string values have a length field.
82 class String : public Name {
83  public:
84  enum Encoding { ONE_BYTE_ENCODING, TWO_BYTE_ENCODING };
85 
86  // Representation of the flat content of a String.
87  // A non-flat string doesn't have flat content.
88  // A flat string has content that's encoded as a sequence of either
89  // one-byte chars or two-byte UC16.
90  // Returned by String::GetFlatContent().
91  class FlatContent {
92  public:
93  // Returns true if the string is flat and this structure contains content.
94  bool IsFlat() const { return state_ != NON_FLAT; }
95  // Returns true if the structure contains one-byte content.
96  bool IsOneByte() const { return state_ == ONE_BYTE; }
97  // Returns true if the structure contains two-byte content.
98  bool IsTwoByte() const { return state_ == TWO_BYTE; }
99 
100  // Return the one byte content of the string. Only use if IsOneByte()
101  // returns true.
102  Vector<const uint8_t> ToOneByteVector() const {
103  DCHECK_EQ(ONE_BYTE, state_);
104  return Vector<const uint8_t>(onebyte_start, length_);
105  }
106  // Return the two-byte content of the string. Only use if IsTwoByte()
107  // returns true.
108  Vector<const uc16> ToUC16Vector() const {
109  DCHECK_EQ(TWO_BYTE, state_);
110  return Vector<const uc16>(twobyte_start, length_);
111  }
112 
113  uc16 Get(int i) const {
114  DCHECK(i < length_);
115  DCHECK(state_ != NON_FLAT);
116  if (state_ == ONE_BYTE) return onebyte_start[i];
117  return twobyte_start[i];
118  }
119 
120  bool UsesSameString(const FlatContent& other) const {
121  return onebyte_start == other.onebyte_start;
122  }
123 
124  private:
125  enum State { NON_FLAT, ONE_BYTE, TWO_BYTE };
126 
127  // Constructors only used by String::GetFlatContent().
128  explicit FlatContent(const uint8_t* start, int length)
129  : onebyte_start(start), length_(length), state_(ONE_BYTE) {}
130  explicit FlatContent(const uc16* start, int length)
131  : twobyte_start(start), length_(length), state_(TWO_BYTE) {}
132  FlatContent() : onebyte_start(nullptr), length_(0), state_(NON_FLAT) {}
133 
134  union {
135  const uint8_t* onebyte_start;
136  const uc16* twobyte_start;
137  };
138  int length_;
139  State state_;
140 
141  friend class String;
142  friend class IterableSubString;
143  };
144 
145  template <typename Char>
146  V8_INLINE Vector<const Char> GetCharVector();
147 
148  // Get and set the length of the string.
149  inline int length() const;
150  inline void set_length(int value);
151 
152  // Get and set the length of the string using acquire loads and release
153  // stores.
154  inline int synchronized_length() const;
155  inline void synchronized_set_length(int value);
156 
157  // Returns whether this string has only one-byte chars, i.e. all of them can
158  // be one-byte encoded. This might be the case even if the string is
159  // two-byte. Such strings may appear when the embedder prefers
160  // two-byte external representations even for one-byte data.
161  inline bool IsOneByteRepresentation() const;
162  inline bool IsTwoByteRepresentation() const;
163 
164  // Cons and slices have an encoding flag that may not represent the actual
165  // encoding of the underlying string. This is taken into account here.
166  // Requires: this->IsFlat()
167  inline bool IsOneByteRepresentationUnderneath();
168  inline bool IsTwoByteRepresentationUnderneath();
169 
170  // NOTE: this should be considered only a hint. False negatives are
171  // possible.
172  inline bool HasOnlyOneByteChars();
173 
174  // Get and set individual two byte chars in the string.
175  inline void Set(int index, uint16_t value);
176  // Get individual two byte char in the string. Repeated calls
177  // to this method are not efficient unless the string is flat.
178  V8_INLINE uint16_t Get(int index);
179 
180  // ES6 section 7.1.3.1 ToNumber Applied to the String Type
181  static Handle<Object> ToNumber(Isolate* isolate, Handle<String> subject);
182 
183  // Flattens the string. Checks first inline to see if it is
184  // necessary. Does nothing if the string is not a cons string.
185  // Flattening allocates a sequential string with the same data as
186  // the given string and mutates the cons string to a degenerate
187  // form, where the first component is the new sequential string and
188  // the second component is the empty string. If allocation fails,
189  // this function returns a failure. If flattening succeeds, this
190  // function returns the sequential string that is now the first
191  // component of the cons string.
192  //
193  // Degenerate cons strings are handled specially by the garbage
194  // collector (see IsShortcutCandidate).
195 
196  static inline Handle<String> Flatten(Isolate* isolate, Handle<String> string,
197  PretenureFlag pretenure = NOT_TENURED);
198 
199  // Tries to return the content of a flat string as a structure holding either
200  // a flat vector of char or of uc16.
201  // If the string isn't flat, and therefore doesn't have flat content, the
202  // returned structure will report so, and can't provide a vector of either
203  // kind.
204  FlatContent GetFlatContent();
205 
206  // Returns the parent of a sliced string or first part of a flat cons string.
207  // Requires: StringShape(this).IsIndirect() && this->IsFlat()
208  inline String GetUnderlying();
209 
210  // String relational comparison, implemented according to ES6 section 7.2.11
211  // Abstract Relational Comparison (step 5): The comparison of Strings uses a
212  // simple lexicographic ordering on sequences of code unit values. There is no
213  // attempt to use the more complex, semantically oriented definitions of
214  // character or string equality and collating order defined in the Unicode
215  // specification. Therefore String values that are canonically equal according
216  // to the Unicode standard could test as unequal. In effect this algorithm
217  // assumes that both Strings are already in normalized form. Also, note that
218  // for strings containing supplementary characters, lexicographic ordering on
219  // sequences of UTF-16 code unit values differs from that on sequences of code
220  // point values.
221  V8_WARN_UNUSED_RESULT static ComparisonResult Compare(Isolate* isolate,
222  Handle<String> x,
223  Handle<String> y);
224 
225  // Perform ES6 21.1.3.8, including checking arguments.
226  static Object* IndexOf(Isolate* isolate, Handle<Object> receiver,
227  Handle<Object> search, Handle<Object> position);
228  // Perform string match of pattern on subject, starting at start index.
229  // Caller must ensure that 0 <= start_index <= sub->length(), as this does not
230  // check any arguments.
231  static int IndexOf(Isolate* isolate, Handle<String> receiver,
232  Handle<String> search, int start_index);
233 
234  static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver,
235  Handle<Object> search, Handle<Object> position);
236 
237  // Encapsulates logic related to a match and its capture groups as required
238  // by GetSubstitution.
239  class Match {
240  public:
241  virtual Handle<String> GetMatch() = 0;
242  virtual Handle<String> GetPrefix() = 0;
243  virtual Handle<String> GetSuffix() = 0;
244 
245  // A named capture can be invalid (if it is not specified in the pattern),
246  // unmatched (specified but not matched in the current string), and matched.
247  enum CaptureState { INVALID, UNMATCHED, MATCHED };
248 
249  virtual int CaptureCount() = 0;
250  virtual bool HasNamedCaptures() = 0;
251  virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
252  virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
253  CaptureState* state) = 0;
254 
255  virtual ~Match() = default;
256  };
257 
258  // ES#sec-getsubstitution
259  // GetSubstitution(matched, str, position, captures, replacement)
260  // Expand the $-expressions in the string and return a new string with
261  // the result.
262  // A {start_index} can be passed to specify where to start scanning the
263  // replacement string.
264  V8_WARN_UNUSED_RESULT static MaybeHandle<String> GetSubstitution(
265  Isolate* isolate, Match* match, Handle<String> replacement,
266  int start_index = 0);
267 
268  // String equality operations.
269  inline bool Equals(String other);
270  inline static bool Equals(Isolate* isolate, Handle<String> one,
271  Handle<String> two);
272  bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);
273 
274  // Dispatches to Is{One,Two}ByteEqualTo.
275  template <typename Char>
276  bool IsEqualTo(Vector<const Char> str);
277 
278  bool IsOneByteEqualTo(Vector<const uint8_t> str);
279  bool IsTwoByteEqualTo(Vector<const uc16> str);
280 
281  // Return a UTF8 representation of the string. The string is null
282  // terminated but may optionally contain nulls. Length is returned
283  // in length_output if length_output is not a null pointer The string
284  // should be nearly flat, otherwise the performance of this method may
285  // be very slow (quadratic in the length). Setting robustness_flag to
286  // ROBUST_STRING_TRAVERSAL invokes behaviour that is robust This means it
287  // handles unexpected data without causing assert failures and it does not
288  // do any heap allocations. This is useful when printing stack traces.
289  std::unique_ptr<char[]> ToCString(AllowNullsFlag allow_nulls,
290  RobustnessFlag robustness_flag, int offset,
291  int length, int* length_output = nullptr);
292  std::unique_ptr<char[]> ToCString(
293  AllowNullsFlag allow_nulls = DISALLOW_NULLS,
294  RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
295  int* length_output = nullptr);
296 
297  bool ComputeArrayIndex(uint32_t* index);
298 
299  // Externalization.
300  bool MakeExternal(v8::String::ExternalStringResource* resource);
301  bool MakeExternal(v8::String::ExternalOneByteStringResource* resource);
302  bool SupportsExternalization();
303 
304  // Conversion.
305  inline bool AsArrayIndex(uint32_t* index);
306  uint32_t inline ToValidIndex(Object* number);
307 
308  // Trimming.
309  enum TrimMode { kTrim, kTrimStart, kTrimEnd };
310  static Handle<String> Trim(Isolate* isolate, Handle<String> string,
311  TrimMode mode);
312 
313  DECL_CAST2(String)
314 
315  void PrintOn(FILE* out);
316 
317  // For use during stack traces. Performs rudimentary sanity check.
318  bool LooksValid();
319 
320  // Dispatched behavior.
321  void StringShortPrint(StringStream* accumulator, bool show_details = true);
322  void PrintUC16(std::ostream& os, int start = 0, int end = -1); // NOLINT
323 #if defined(DEBUG) || defined(OBJECT_PRINT)
324  char* ToAsciiArray();
325 #endif
326  DECL_PRINTER(String)
327  DECL_VERIFIER(String)
328 
329  inline bool IsFlat();
330 
331  // Layout description.
332  static const int kLengthOffset = Name::kHeaderSize;
333  static const int kHeaderSize = kLengthOffset + kInt32Size;
334 
335  // Max char codes.
336  static const int32_t kMaxOneByteCharCode = unibrow::Latin1::kMaxChar;
337  static const uint32_t kMaxOneByteCharCodeU = unibrow::Latin1::kMaxChar;
338  static const int kMaxUtf16CodeUnit = 0xffff;
339  static const uint32_t kMaxUtf16CodeUnitU = kMaxUtf16CodeUnit;
340  static const uc32 kMaxCodePoint = 0x10ffff;
341 
342  // Maximal string length.
343  // The max length is different on 32 and 64 bit platforms. Max length for a
344  // 32-bit platform is ~268.4M chars. On 64-bit platforms, max length is
345  // ~1.073B chars. The limit on 64-bit is so that SeqTwoByteString::kMaxSize
346  // can fit in a 32bit int: 2^31 - 1 is the max positive int, minus one bit as
347  // each char needs two bytes, subtract 24 bytes for the string header size.
348 
349  // See include/v8.h for the definition.
350  static const int kMaxLength = v8::String::kMaxLength;
351  static_assert(kMaxLength <= (Smi::kMaxValue / 2 - kHeaderSize),
352  "Unexpected max String length");
353 
354  // Max length for computing hash. For strings longer than this limit the
355  // string length is used as the hash value.
356  static const int kMaxHashCalcLength = 16383;
357 
358  // Limit for truncation in short printing.
359  static const int kMaxShortPrintLength = 1024;
360 
361  // Helper function for flattening strings.
362  template <typename sinkchar>
363  static void WriteToFlat(String source, sinkchar* sink, int from, int to);
364 
365  // The return value may point to the first aligned word containing the first
366  // non-one-byte character, rather than directly to the non-one-byte character.
367  // If the return value is >= the passed length, the entire string was
368  // one-byte.
369  static inline int NonAsciiStart(const char* chars, int length) {
370  const char* start = chars;
371  const char* limit = chars + length;
372 
373  if (length >= kIntptrSize) {
374  // Check unaligned bytes.
375  while (!IsAligned(reinterpret_cast<intptr_t>(chars), sizeof(uintptr_t))) {
376  if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
377  return static_cast<int>(chars - start);
378  }
379  ++chars;
380  }
381  // Check aligned words.
382  DCHECK_EQ(unibrow::Utf8::kMaxOneByteChar, 0x7F);
383  const uintptr_t non_one_byte_mask = kUintptrAllBitsSet / 0xFF * 0x80;
384  while (chars + sizeof(uintptr_t) <= limit) {
385  if (*reinterpret_cast<const uintptr_t*>(chars) & non_one_byte_mask) {
386  return static_cast<int>(chars - start);
387  }
388  chars += sizeof(uintptr_t);
389  }
390  }
391  // Check remaining unaligned bytes.
392  while (chars < limit) {
393  if (static_cast<uint8_t>(*chars) > unibrow::Utf8::kMaxOneByteChar) {
394  return static_cast<int>(chars - start);
395  }
396  ++chars;
397  }
398 
399  return static_cast<int>(chars - start);
400  }
401 
402  static inline bool IsAscii(const char* chars, int length) {
403  return NonAsciiStart(chars, length) >= length;
404  }
405 
406  static inline bool IsAscii(const uint8_t* chars, int length) {
407  return NonAsciiStart(reinterpret_cast<const char*>(chars), length) >=
408  length;
409  }
410 
411  static inline int NonOneByteStart(const uc16* chars, int length) {
412  const uc16* limit = chars + length;
413  const uc16* start = chars;
414  while (chars < limit) {
415  if (*chars > kMaxOneByteCharCodeU) return static_cast<int>(chars - start);
416  ++chars;
417  }
418  return static_cast<int>(chars - start);
419  }
420 
421  static inline bool IsOneByte(const uc16* chars, int length) {
422  return NonOneByteStart(chars, length) >= length;
423  }
424 
425  template <class Visitor>
426  static inline ConsString VisitFlat(Visitor* visitor, String string,
427  int offset = 0);
428 
429  static Handle<FixedArray> CalculateLineEnds(Isolate* isolate,
430  Handle<String> string,
431  bool include_ending_line);
432 
433  private:
434  friend class Name;
435  friend class StringTableInsertionKey;
436  friend class InternalizedStringKey;
437 
438  static Handle<String> SlowFlatten(Isolate* isolate, Handle<ConsString> cons,
439  PretenureFlag tenure);
440 
441  // Slow case of String::Equals. This implementation works on any strings
442  // but it is most efficient on strings that are almost flat.
443  bool SlowEquals(String other);
444 
445  static bool SlowEquals(Isolate* isolate, Handle<String> one,
446  Handle<String> two);
447 
448  // Slow case of AsArrayIndex.
449  V8_EXPORT_PRIVATE bool SlowAsArrayIndex(uint32_t* index);
450 
451  // Compute and set the hash code.
452  uint32_t ComputeAndSetHash(Isolate* isolate);
453 
454  OBJECT_CONSTRUCTORS(String, Name);
455 };
456 
458  public:
459  explicit inline SubStringRange(String string, int first = 0, int length = -1);
460  class iterator;
461  inline iterator begin();
462  inline iterator end();
463 
464  private:
465  String string_;
466  int first_;
467  int length_;
468 };
469 
470 // The SeqString abstract class captures sequential string values.
471 class SeqString : public String {
472  public:
473  DECL_CAST2(SeqString)
474 
475  // Truncate the string in-place if possible and return the result.
476  // In case of new_length == 0, the empty string is returned without
477  // truncating the original string.
478  V8_WARN_UNUSED_RESULT static Handle<String> Truncate(Handle<SeqString> string,
479  int new_length);
480 
481  OBJECT_CONSTRUCTORS(SeqString, String);
482 };
483 
484 class InternalizedString : public String {
485  public:
486  DECL_CAST2(InternalizedString)
487  // TODO(neis): Possibly move some stuff from String here.
488 
489  OBJECT_CONSTRUCTORS(InternalizedString, String);
490 };
491 
492 // The OneByteString class captures sequential one-byte string objects.
493 // Each character in the OneByteString is an one-byte character.
494 class SeqOneByteString : public SeqString {
495  public:
496  static const bool kHasOneByteEncoding = true;
497 
498  // Dispatched behavior.
499  inline uint16_t SeqOneByteStringGet(int index);
500  inline void SeqOneByteStringSet(int index, uint16_t value);
501 
502  // Get the address of the characters in this string.
503  inline Address GetCharsAddress();
504 
505  inline uint8_t* GetChars();
506 
507  // Clear uninitialized padding space. This ensures that the snapshot content
508  // is deterministic.
509  void clear_padding();
510 
511  DECL_CAST2(SeqOneByteString)
512 
513  // Garbage collection support. This method is called by the
514  // garbage collector to compute the actual size of an OneByteString
515  // instance.
516  inline int SeqOneByteStringSize(InstanceType instance_type);
517 
518  // Computes the size for an OneByteString instance of a given length.
519  static int SizeFor(int length) {
520  return OBJECT_POINTER_ALIGN(kHeaderSize + length * kCharSize);
521  }
522 
523  // Maximal memory usage for a single sequential one-byte string.
524  static const int kMaxCharsSize = kMaxLength;
525  static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
526  STATIC_ASSERT((kMaxSize - kHeaderSize) >= String::kMaxLength);
527 
528  class BodyDescriptor;
529 
530  OBJECT_CONSTRUCTORS(SeqOneByteString, SeqString);
531 };
532 
533 // The TwoByteString class captures sequential unicode string objects.
534 // Each character in the TwoByteString is a two-byte uint16_t.
535 class SeqTwoByteString : public SeqString {
536  public:
537  static const bool kHasOneByteEncoding = false;
538 
539  // Dispatched behavior.
540  inline uint16_t SeqTwoByteStringGet(int index);
541  inline void SeqTwoByteStringSet(int index, uint16_t value);
542 
543  // Get the address of the characters in this string.
544  inline Address GetCharsAddress();
545 
546  inline uc16* GetChars();
547 
548  // Clear uninitialized padding space. This ensures that the snapshot content
549  // is deterministic.
550  void clear_padding();
551 
552  DECL_CAST2(SeqTwoByteString)
553 
554  // Garbage collection support. This method is called by the
555  // garbage collector to compute the actual size of a TwoByteString
556  // instance.
557  inline int SeqTwoByteStringSize(InstanceType instance_type);
558 
559  // Computes the size for a TwoByteString instance of a given length.
560  static int SizeFor(int length) {
561  return OBJECT_POINTER_ALIGN(kHeaderSize + length * kShortSize);
562  }
563 
564  // Maximal memory usage for a single sequential two-byte string.
565  static const int kMaxCharsSize = kMaxLength * 2;
566  static const int kMaxSize = OBJECT_POINTER_ALIGN(kMaxCharsSize + kHeaderSize);
567  STATIC_ASSERT(static_cast<int>((kMaxSize - kHeaderSize) / sizeof(uint16_t)) >=
568  String::kMaxLength);
569 
570  class BodyDescriptor;
571 
572  OBJECT_CONSTRUCTORS(SeqTwoByteString, SeqString);
573 };
574 
575 // The ConsString class describes string values built by using the
576 // addition operator on strings. A ConsString is a pair where the
577 // first and second components are pointers to other string values.
578 // One or both components of a ConsString can be pointers to other
579 // ConsStrings, creating a binary tree of ConsStrings where the leaves
580 // are non-ConsString string values. The string value represented by
581 // a ConsString can be obtained by concatenating the leaf string
582 // values in a left-to-right depth-first traversal of the tree.
583 class ConsString : public String {
584  public:
585  // First string of the cons cell.
586  inline String first();
587  // Doesn't check that the result is a string, even in debug mode. This is
588  // useful during GC where the mark bits confuse the checks.
589  inline Object* unchecked_first();
590  inline void set_first(Isolate* isolate, String first,
591  WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
592 
593  // Second string of the cons cell.
594  inline String second();
595  // Doesn't check that the result is a string, even in debug mode. This is
596  // useful during GC where the mark bits confuse the checks.
597  inline Object* unchecked_second();
598  inline void set_second(Isolate* isolate, String second,
599  WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
600 
601  // Dispatched behavior.
602  V8_EXPORT_PRIVATE uint16_t ConsStringGet(int index);
603 
604  DECL_CAST2(ConsString)
605 
606  // Layout description.
607 #define CONS_STRING_FIELDS(V) \
608  V(kFirstOffset, kTaggedSize) \
609  V(kSecondOffset, kTaggedSize) \
610  /* Total size. */ \
611  V(kSize, 0)
612 
613  DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, CONS_STRING_FIELDS)
614 #undef CONS_STRING_FIELDS
615 
616  // Minimum length for a cons string.
617  static const int kMinLength = 13;
618 
620 
621  DECL_VERIFIER(ConsString)
622 
623  OBJECT_CONSTRUCTORS(ConsString, String);
624 };
625 
626 // The ThinString class describes string objects that are just references
627 // to another string object. They are used for in-place internalization when
628 // the original string cannot actually be internalized in-place: in these
629 // cases, the original string is converted to a ThinString pointing at its
630 // internalized version (which is allocated as a new object).
631 // In terms of memory layout and most algorithms operating on strings,
632 // ThinStrings can be thought of as "one-part cons strings".
633 class ThinString : public String {
634  public:
635  // Actual string that this ThinString refers to.
636  inline String actual() const;
637  inline HeapObject* unchecked_actual() const;
638  inline void set_actual(String s,
639  WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
640 
641  V8_EXPORT_PRIVATE uint16_t ThinStringGet(int index);
642 
643  DECL_CAST2(ThinString)
644  DECL_VERIFIER(ThinString)
645 
646  // Layout description.
647 #define THIN_STRING_FIELDS(V) \
648  V(kActualOffset, kTaggedSize) \
649  /* Total size. */ \
650  V(kSize, 0)
651 
652  DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, THIN_STRING_FIELDS)
653 #undef THIN_STRING_FIELDS
654 
656 
657  OBJECT_CONSTRUCTORS(ThinString, String);
658 };
659 
660 // The Sliced String class describes strings that are substrings of another
661 // sequential string. The motivation is to save time and memory when creating
662 // a substring. A Sliced String is described as a pointer to the parent,
663 // the offset from the start of the parent string and the length. Using
664 // a Sliced String therefore requires unpacking of the parent string and
665 // adding the offset to the start address. A substring of a Sliced String
666 // are not nested since the double indirection is simplified when creating
667 // such a substring.
668 // Currently missing features are:
669 // - handling externalized parent strings
670 // - external strings as parent
671 // - truncating sliced string to enable otherwise unneeded parent to be GC'ed.
672 class SlicedString : public String {
673  public:
674  inline String parent();
675  inline void set_parent(Isolate* isolate, String parent,
676  WriteBarrierMode mode = UPDATE_WRITE_BARRIER);
677  inline int offset() const;
678  inline void set_offset(int offset);
679 
680  // Dispatched behavior.
681  V8_EXPORT_PRIVATE uint16_t SlicedStringGet(int index);
682 
683  DECL_CAST2(SlicedString)
684 
685  // Layout description.
686 #define SLICED_STRING_FIELDS(V) \
687  V(kParentOffset, kTaggedSize) \
688  V(kOffsetOffset, kTaggedSize) \
689  /* Total size. */ \
690  V(kSize, 0)
691 
692  DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, SLICED_STRING_FIELDS)
693 #undef SLICED_STRING_FIELDS
694 
695  // Minimum length for a sliced string.
696  static const int kMinLength = 13;
697 
699 
700  DECL_VERIFIER(SlicedString)
701 
702  OBJECT_CONSTRUCTORS(SlicedString, String);
703 };
704 
705 // The ExternalString class describes string values that are backed by
706 // a string resource that lies outside the V8 heap. ExternalStrings
707 // consist of the length field common to all strings, a pointer to the
708 // external resource. It is important to ensure (externally) that the
709 // resource is not deallocated while the ExternalString is live in the
710 // V8 heap.
711 //
712 // The API expects that all ExternalStrings are created through the
713 // API. Therefore, ExternalStrings should not be used internally.
714 class ExternalString : public String {
715  public:
716  DECL_CAST2(ExternalString)
717 
718  // Layout description.
719 #define EXTERNAL_STRING_FIELDS(V) \
720  V(kResourceOffset, kSystemPointerSize) \
721  /* Size of uncached external strings. */ \
722  V(kUncachedSize, 0) \
723  V(kResourceDataOffset, kSystemPointerSize) \
724  /* Total size. */ \
725  V(kSize, 0)
726 
727  DEFINE_FIELD_OFFSET_CONSTANTS(String::kHeaderSize, EXTERNAL_STRING_FIELDS)
728 #undef EXTERNAL_STRING_FIELDS
729 
730  // Return whether the external string data pointer is not cached.
731  inline bool is_uncached() const;
732  // Size in bytes of the external payload.
733  int ExternalPayloadSize() const;
734 
735  // Used in the serializer/deserializer.
736  inline Address resource_as_address();
737  inline void set_address_as_resource(Address address);
738  inline uint32_t resource_as_uint32();
739  inline void set_uint32_as_resource(uint32_t value);
740 
741  STATIC_ASSERT(kResourceOffset == Internals::kStringResourceOffset);
742 
743  OBJECT_CONSTRUCTORS(ExternalString, String);
744 };
745 
746 // The ExternalOneByteString class is an external string backed by an
747 // one-byte string.
749  public:
750  static const bool kHasOneByteEncoding = true;
751 
753 
754  // The underlying resource.
755  inline const Resource* resource();
756 
757  // It is assumed that the previous resource is null. If it is not null, then
758  // it is the responsability of the caller the handle the previous resource.
759  inline void SetResource(Isolate* isolate, const Resource* buffer);
760  // Used only during serialization.
761  inline void set_resource(const Resource* buffer);
762 
763  // Update the pointer cache to the external character array.
764  // The cached pointer is always valid, as the external character array does =
765  // not move during lifetime. Deserialization is the only exception, after
766  // which the pointer cache has to be refreshed.
767  inline void update_data_cache();
768 
769  inline const uint8_t* GetChars();
770 
771  // Dispatched behavior.
772  inline uint16_t ExternalOneByteStringGet(int index);
773 
774  DECL_CAST2(ExternalOneByteString)
775 
776  class BodyDescriptor;
777 
778  OBJECT_CONSTRUCTORS(ExternalOneByteString, ExternalString);
779 };
780 
781 // The ExternalTwoByteString class is an external string backed by a UTF-16
782 // encoded string.
784  public:
785  static const bool kHasOneByteEncoding = false;
786 
788 
789  // The underlying string resource.
790  inline const Resource* resource();
791 
792  // It is assumed that the previous resource is null. If it is not null, then
793  // it is the responsability of the caller the handle the previous resource.
794  inline void SetResource(Isolate* isolate, const Resource* buffer);
795  // Used only during serialization.
796  inline void set_resource(const Resource* buffer);
797 
798  // Update the pointer cache to the external character array.
799  // The cached pointer is always valid, as the external character array does =
800  // not move during lifetime. Deserialization is the only exception, after
801  // which the pointer cache has to be refreshed.
802  inline void update_data_cache();
803 
804  inline const uint16_t* GetChars();
805 
806  // Dispatched behavior.
807  inline uint16_t ExternalTwoByteStringGet(int index);
808 
809  // For regexp code.
810  inline const uint16_t* ExternalTwoByteStringGetData(unsigned start);
811 
812  DECL_CAST2(ExternalTwoByteString)
813 
814  class BodyDescriptor;
815 
816  OBJECT_CONSTRUCTORS(ExternalTwoByteString, ExternalString);
817 };
818 
819 // A flat string reader provides random access to the contents of a
820 // string independent of the character width of the string. The handle
821 // must be valid as long as the reader is being used.
823  public:
824  FlatStringReader(Isolate* isolate, Handle<String> str);
826  void PostGarbageCollection() override;
827  inline uc32 Get(int index);
828  template <typename Char>
829  inline Char Get(int index);
830  int length() { return length_; }
831 
832  private:
833  Address* str_;
834  bool is_one_byte_;
835  int length_;
836  const void* start_;
837 };
838 
839 // This maintains an off-stack representation of the stack frames required
840 // to traverse a ConsString, allowing an entirely iterative and restartable
841 // traversal of the entire string
843  public:
844  inline ConsStringIterator() = default;
845  inline explicit ConsStringIterator(ConsString cons_string, int offset = 0) {
846  Reset(cons_string, offset);
847  }
848  inline void Reset(ConsString cons_string, int offset = 0) {
849  depth_ = 0;
850  // Next will always return nullptr.
851  if (cons_string.is_null()) return;
852  Initialize(cons_string, offset);
853  }
854  // Returns nullptr when complete.
855  inline String Next(int* offset_out) {
856  *offset_out = 0;
857  if (depth_ == 0) return String();
858  return Continue(offset_out);
859  }
860 
861  private:
862  static const int kStackSize = 32;
863  // Use a mask instead of doing modulo operations for stack wrapping.
864  static const int kDepthMask = kStackSize - 1;
865  static_assert(base::bits::IsPowerOfTwo(kStackSize),
866  "kStackSize must be power of two");
867  static inline int OffsetForDepth(int depth);
868 
869  inline void PushLeft(ConsString string);
870  inline void PushRight(ConsString string);
871  inline void AdjustMaximumDepth();
872  inline void Pop();
873  inline bool StackBlown() { return maximum_depth_ - depth_ == kStackSize; }
874  void Initialize(ConsString cons_string, int offset);
875  String Continue(int* offset_out);
876  String NextLeaf(bool* blew_stack);
877  String Search(int* offset_out);
878 
879  // Stack must always contain only frames for which right traversal
880  // has not yet been performed.
881  ConsString frames_[kStackSize];
882  ConsString root_;
883  int depth_;
884  int maximum_depth_;
885  int consumed_;
886  DISALLOW_COPY_AND_ASSIGN(ConsStringIterator);
887 };
888 
890  public:
891  inline explicit StringCharacterStream(String string, int offset = 0);
892  inline uint16_t GetNext();
893  inline bool HasMore();
894  inline void Reset(String string, int offset = 0);
895  inline void VisitOneByteString(const uint8_t* chars, int length);
896  inline void VisitTwoByteString(const uint16_t* chars, int length);
897 
898  private:
899  ConsStringIterator iter_;
900  bool is_one_byte_;
901  union {
902  const uint8_t* buffer8_;
903  const uint16_t* buffer16_;
904  };
905  const uint8_t* end_;
906  DISALLOW_COPY_AND_ASSIGN(StringCharacterStream);
907 };
908 
909 } // namespace internal
910 } // namespace v8
911 
912 #include "src/objects/object-macros-undef.h"
913 
914 #endif // V8_OBJECTS_STRING_H_
Definition: libplatform.h:13
Definition: v8.h:3740