V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
scanner-character-streams.cc
1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/parsing/scanner-character-streams.h"
6 
7 #include <memory>
8 #include <vector>
9 
10 #include "include/v8.h"
11 #include "src/counters.h"
12 #include "src/globals.h"
13 #include "src/handles.h"
14 #include "src/objects-inl.h"
15 #include "src/parsing/scanner.h"
16 #include "src/unicode-inl.h"
17 
18 namespace v8 {
19 namespace internal {
20 
22  public:
23  explicit ScopedExternalStringLock(ExternalString string) {
24  DCHECK(!string.is_null());
25  if (string->IsExternalOneByteString()) {
26  resource_ = ExternalOneByteString::cast(string)->resource();
27  } else {
28  DCHECK(string->IsExternalTwoByteString());
29  resource_ = ExternalTwoByteString::cast(string)->resource();
30  }
31  DCHECK(resource_);
32  resource_->Lock();
33  }
34 
35  // Copying a lock increases the locking depth.
37  : resource_(other.resource_) {
38  resource_->Lock();
39  }
40 
41  ~ScopedExternalStringLock() { resource_->Unlock(); }
42 
43  private:
44  // Not nullptr.
46 };
47 
48 namespace {
49 const unibrow::uchar kUtf8Bom = 0xFEFF;
50 } // namespace
51 
52 template <typename Char>
53 struct CharTraits;
54 
55 template <>
56 struct CharTraits<uint8_t> {
57  typedef SeqOneByteString String;
59 };
60 
61 template <>
62 struct CharTraits<uint16_t> {
63  typedef SeqTwoByteString String;
65 };
66 
67 template <typename Char>
68 struct Range {
69  const Char* start;
70  const Char* end;
71 
72  size_t length() { return static_cast<size_t>(end - start); }
73  bool unaligned_start() const {
74  return reinterpret_cast<intptr_t>(start) % sizeof(Char) == 1;
75  }
76 };
77 
78 // A Char stream backed by an on-heap SeqOneByteString or SeqTwoByteString.
79 template <typename Char>
80 class OnHeapStream {
81  public:
82  typedef typename CharTraits<Char>::String String;
83 
84  OnHeapStream(Handle<String> string, size_t start_offset, size_t end)
85  : string_(string), start_offset_(start_offset), length_(end) {}
86 
87  OnHeapStream(const OnHeapStream& other) : start_offset_(0), length_(0) {
88  UNREACHABLE();
89  }
90 
91  Range<Char> GetDataAt(size_t pos, RuntimeCallStats* stats) {
92  return {&string_->GetChars()[start_offset_ + Min(length_, pos)],
93  &string_->GetChars()[start_offset_ + length_]};
94  }
95 
96  static const bool kCanBeCloned = false;
97  static const bool kCanAccessHeap = true;
98 
99  private:
100  Handle<String> string_;
101  const size_t start_offset_;
102  const size_t length_;
103 };
104 
105 // A Char stream backed by an off-heap ExternalOneByteString or
106 // ExternalTwoByteString.
107 template <typename Char>
109  typedef typename CharTraits<Char>::ExternalString ExternalString;
110 
111  public:
112  ExternalStringStream(ExternalString string, size_t start_offset,
113  size_t length)
114  : lock_(string),
115  data_(string->GetChars() + start_offset),
116  length_(length) {}
117 
119  : lock_(other.lock_), data_(other.data_), length_(other.length_) {}
120 
121  Range<Char> GetDataAt(size_t pos, RuntimeCallStats* stats) {
122  return {&data_[Min(length_, pos)], &data_[length_]};
123  }
124 
125  static const bool kCanBeCloned = true;
126  static const bool kCanAccessHeap = false;
127 
128  private:
130  const Char* const data_;
131  const size_t length_;
132 };
133 
134 // A Char stream backed by a C array. Testing only.
135 template <typename Char>
137  public:
138  TestingStream(const Char* data, size_t length)
139  : data_(data), length_(length) {}
140  Range<Char> GetDataAt(size_t pos, RuntimeCallStats* stats) {
141  return {&data_[Min(length_, pos)], &data_[length_]};
142  }
143 
144  static const bool kCanBeCloned = true;
145  static const bool kCanAccessHeap = false;
146 
147  private:
148  const Char* const data_;
149  const size_t length_;
150 };
151 
152 // A Char stream backed by multiple source-stream provided off-heap chunks.
153 template <typename Char>
155  public:
157  : source_(source) {}
158 
159  ChunkedStream(const ChunkedStream& other) {
160  // TODO(rmcilroy): Implement cloning for chunked streams.
161  UNREACHABLE();
162  }
163 
164  Range<Char> GetDataAt(size_t pos, RuntimeCallStats* stats) {
165  Chunk chunk = FindChunk(pos, stats);
166  size_t buffer_end = chunk.length;
167  size_t buffer_pos = Min(buffer_end, pos - chunk.position);
168  return {&chunk.data[buffer_pos], &chunk.data[buffer_end]};
169  }
170 
171  ~ChunkedStream() {
172  for (Chunk& chunk : chunks_) delete[] chunk.data;
173  }
174 
175  static const bool kCanBeCloned = false;
176  static const bool kCanAccessHeap = false;
177 
178  private:
179  struct Chunk {
180  Chunk(const Char* const data, size_t position, size_t length)
181  : data(data), position(position), length(length) {}
182  const Char* const data;
183  // The logical position of data.
184  const size_t position;
185  const size_t length;
186  size_t end_position() const { return position + length; }
187  };
188 
189  Chunk FindChunk(size_t position, RuntimeCallStats* stats) {
190  while (V8_UNLIKELY(chunks_.empty())) FetchChunk(size_t{0}, stats);
191 
192  // Walk forwards while the position is in front of the current chunk.
193  while (position >= chunks_.back().end_position() &&
194  chunks_.back().length > 0) {
195  FetchChunk(chunks_.back().end_position(), stats);
196  }
197 
198  // Walk backwards.
199  for (auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend();
200  ++reverse_it) {
201  if (reverse_it->position <= position) return *reverse_it;
202  }
203 
204  UNREACHABLE();
205  }
206 
207  virtual void ProcessChunk(const uint8_t* data, size_t position,
208  size_t length) {
209  // Incoming data has to be aligned to Char size.
210  DCHECK_EQ(0, length % sizeof(Char));
211  chunks_.emplace_back(reinterpret_cast<const Char*>(data), position,
212  length / sizeof(Char));
213  }
214 
215  void FetchChunk(size_t position, RuntimeCallStats* stats) {
216  const uint8_t* data = nullptr;
217  size_t length;
218  {
219  RuntimeCallTimerScope scope(stats,
220  RuntimeCallCounterId::kGetMoreDataCallback);
221  length = source_->GetMoreData(&data);
222  }
223  ProcessChunk(data, position, length);
224  }
225 
227 
228  protected:
229  std::vector<struct Chunk> chunks_;
230 };
231 
232 // Provides a buffered utf-16 view on the bytes from the underlying ByteStream.
233 // Chars are buffered if either the underlying stream isn't utf-16 or the
234 // underlying utf-16 stream might move (is on-heap).
235 template <template <typename T> class ByteStream>
237  public:
238  template <class... TArgs>
239  BufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
240  buffer_pos_ = pos;
241  }
242 
243  bool can_be_cloned() const final {
244  return ByteStream<uint16_t>::kCanBeCloned;
245  }
246 
247  std::unique_ptr<Utf16CharacterStream> Clone() const override {
248  CHECK(can_be_cloned());
249  return std::unique_ptr<Utf16CharacterStream>(
251  }
252 
253  protected:
254  bool ReadBlock() final {
255  size_t position = pos();
256  buffer_pos_ = position;
257  buffer_start_ = &buffer_[0];
258  buffer_cursor_ = buffer_start_;
259 
261  Range<uint8_t> range =
262  byte_stream_.GetDataAt(position, runtime_call_stats());
263  if (range.length() == 0) {
264  buffer_end_ = buffer_start_;
265  return false;
266  }
267 
268  size_t length = Min(kBufferSize, range.length());
269  i::CopyCharsUnsigned(buffer_, range.start, length);
270  buffer_end_ = &buffer_[length];
271  return true;
272  }
273 
274  bool can_access_heap() const final {
275  return ByteStream<uint8_t>::kCanAccessHeap;
276  }
277 
278  private:
280  : byte_stream_(other.byte_stream_) {}
281 
282  static const size_t kBufferSize = 512;
283  uc16 buffer_[kBufferSize];
284  ByteStream<uint8_t> byte_stream_;
285 };
286 
287 // Provides a unbuffered utf-16 view on the bytes from the underlying
288 // ByteStream.
289 template <template <typename T> class ByteStream>
291  public:
292  template <class... TArgs>
293  UnbufferedCharacterStream(size_t pos, TArgs... args) : byte_stream_(args...) {
294  buffer_pos_ = pos;
295  }
296 
297  bool can_access_heap() const final {
298  return ByteStream<uint16_t>::kCanAccessHeap;
299  }
300 
301  bool can_be_cloned() const final {
302  return ByteStream<uint16_t>::kCanBeCloned;
303  }
304 
305  std::unique_ptr<Utf16CharacterStream> Clone() const override {
306  return std::unique_ptr<Utf16CharacterStream>(
308  }
309 
310  protected:
311  bool ReadBlock() final {
312  size_t position = pos();
313  buffer_pos_ = position;
315  Range<uint16_t> range =
316  byte_stream_.GetDataAt(position, runtime_call_stats());
317  buffer_start_ = range.start;
318  buffer_end_ = range.end;
319  buffer_cursor_ = buffer_start_;
320  if (range.length() == 0) return false;
321 
322  DCHECK(!range.unaligned_start());
323  DCHECK_LE(buffer_start_, buffer_end_);
324  return true;
325  }
326 
328  : byte_stream_(other.byte_stream_) {}
329 
330  ByteStream<uint16_t> byte_stream_;
331 };
332 
333 // Provides a unbuffered utf-16 view on the bytes from the underlying
334 // ByteStream.
336  : public UnbufferedCharacterStream<OnHeapStream> {
337  public:
338  template <class... TArgs>
339  RelocatingCharacterStream(Isolate* isolate, size_t pos, TArgs... args)
341  isolate_(isolate) {
342  isolate->heap()->AddGCEpilogueCallback(UpdateBufferPointersCallback,
343  v8::kGCTypeAll, this);
344  }
345 
346  private:
347  ~RelocatingCharacterStream() final {
348  isolate_->heap()->RemoveGCEpilogueCallback(UpdateBufferPointersCallback,
349  this);
350  }
351 
352  static void UpdateBufferPointersCallback(v8::Isolate* v8_isolate,
353  v8::GCType type,
354  v8::GCCallbackFlags flags,
355  void* stream) {
356  reinterpret_cast<RelocatingCharacterStream*>(stream)
357  ->UpdateBufferPointers();
358  }
359 
360  void UpdateBufferPointers() {
362  Range<uint16_t> range = byte_stream_.GetDataAt(0, runtime_call_stats());
363  if (range.start != buffer_start_) {
364  buffer_cursor_ = (buffer_cursor_ - buffer_start_) + range.start;
365  buffer_start_ = range.start;
366  buffer_end_ = range.end;
367  }
368  }
369 
370  Isolate* isolate_;
371 };
372 
373 // ----------------------------------------------------------------------------
374 // BufferedUtf16CharacterStreams
375 //
376 // A buffered character stream based on a random access character
377 // source (ReadBlock can be called with pos() pointing to any position,
378 // even positions before the current).
379 //
380 // TODO(verwaest): Remove together with Utf8 external streaming streams.
382  public:
384 
385  protected:
386  static const size_t kBufferSize = 512;
387 
388  bool ReadBlock() final;
389 
390  // FillBuffer should read up to kBufferSize characters at position and store
391  // them into buffer_[0..]. It returns the number of characters stored.
392  virtual size_t FillBuffer(size_t position) = 0;
393 
394  // Fixed sized buffer that this class reads from.
395  // The base class' buffer_start_ should always point to buffer_.
396  uc16 buffer_[kBufferSize];
397 };
398 
399 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
400  : Utf16CharacterStream(buffer_, buffer_, buffer_, 0) {}
401 
402 bool BufferedUtf16CharacterStream::ReadBlock() {
403  DCHECK_EQ(buffer_start_, buffer_);
404 
405  size_t position = pos();
406  buffer_pos_ = position;
407  buffer_cursor_ = buffer_;
408  buffer_end_ = buffer_ + FillBuffer(position);
409  DCHECK_EQ(pos(), position);
410  DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
411  return buffer_cursor_ < buffer_end_;
412 }
413 
414 // ----------------------------------------------------------------------------
415 // Utf8ExternalStreamingStream - chunked streaming of Utf-8 data.
416 //
417 // This implementation is fairly complex, since data arrives in chunks which
418 // may 'cut' arbitrarily into utf-8 characters. Also, seeking to a given
419 // character position is tricky because the byte position cannot be dericed
420 // from the character position.
421 //
422 // TODO(verwaest): Decode utf8 chunks into utf16 chunks on the blink side
423 // instead so we don't need to buffer.
424 
426  public:
429  : current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
430  source_stream_(source_stream) {}
431  ~Utf8ExternalStreamingStream() final {
432  for (size_t i = 0; i < chunks_.size(); i++) delete[] chunks_[i].data;
433  }
434 
435  bool can_access_heap() const final { return false; }
436 
437  bool can_be_cloned() const final { return false; }
438 
439  std::unique_ptr<Utf16CharacterStream> Clone() const override {
440  UNREACHABLE();
441  }
442 
443  protected:
444  size_t FillBuffer(size_t position) final;
445 
446  private:
447  // A position within the data stream. It stores:
448  // - The 'physical' position (# of bytes in the stream),
449  // - the 'logical' position (# of ucs-2 characters, also within the stream),
450  // - a possibly incomplete utf-8 char at the current 'physical' position.
451  struct StreamPosition {
452  size_t bytes;
453  size_t chars;
454  uint32_t incomplete_char;
455  unibrow::Utf8::State state;
456  };
457 
458  // Position contains a StreamPosition and the index of the chunk the position
459  // points into. (The chunk_no could be derived from pos, but that'd be
460  // an expensive search through all chunks.)
461  struct Position {
462  size_t chunk_no;
463  StreamPosition pos;
464  };
465 
466  // A chunk in the list of chunks, containing:
467  // - The chunk data (data pointer and length), and
468  // - the position at the first byte of the chunk.
469  struct Chunk {
470  const uint8_t* data;
471  size_t length;
472  StreamPosition start;
473  };
474 
475  // Within the current chunk, skip forward from current_ towards position.
476  bool SkipToPosition(size_t position);
477  // Within the current chunk, fill the buffer_ (while it has capacity).
478  void FillBufferFromCurrentChunk();
479  // Fetch a new chunk (assuming current_ is at the end of the current data).
480  bool FetchChunk();
481  // Search through the chunks and set current_ to point to the given position.
482  // (This call is potentially expensive.)
483  void SearchPosition(size_t position);
484 
485  std::vector<Chunk> chunks_;
486  Position current_;
487  ScriptCompiler::ExternalSourceStream* source_stream_;
488 };
489 
490 bool Utf8ExternalStreamingStream::SkipToPosition(size_t position) {
491  DCHECK_LE(current_.pos.chars, position); // We can only skip forward.
492 
493  // Already there? Then return immediately.
494  if (current_.pos.chars == position) return true;
495 
496  const Chunk& chunk = chunks_[current_.chunk_no];
497  DCHECK(current_.pos.bytes >= chunk.start.bytes);
498 
499  unibrow::Utf8::State state = chunk.start.state;
500  uint32_t incomplete_char = chunk.start.incomplete_char;
501  size_t it = current_.pos.bytes - chunk.start.bytes;
502  size_t chars = chunk.start.chars;
503  while (it < chunk.length && chars < position) {
504  unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
505  chunk.data[it], &it, &state, &incomplete_char);
506  if (t == kUtf8Bom && current_.pos.chars == 0) {
507  // BOM detected at beginning of the stream. Don't copy it.
508  } else if (t != unibrow::Utf8::kIncomplete) {
509  chars++;
510  if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
511  }
512  }
513 
514  current_.pos.bytes += it;
515  current_.pos.chars = chars;
516  current_.pos.incomplete_char = incomplete_char;
517  current_.pos.state = state;
518  current_.chunk_no += (it == chunk.length);
519 
520  return current_.pos.chars == position;
521 }
522 
523 void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
524  DCHECK_LT(current_.chunk_no, chunks_.size());
525  DCHECK_EQ(buffer_start_, buffer_cursor_);
526  DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
527 
528  const Chunk& chunk = chunks_[current_.chunk_no];
529 
530  // The buffer_ is writable, but buffer_*_ members are const. So we get a
531  // non-const pointer into buffer that points to the same char as buffer_end_.
532  uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
533  DCHECK_EQ(cursor, buffer_end_);
534 
535  unibrow::Utf8::State state = current_.pos.state;
536  uint32_t incomplete_char = current_.pos.incomplete_char;
537 
538  // If the current chunk is the last (empty) chunk we'll have to process
539  // any left-over, partial characters.
540  if (chunk.length == 0) {
541  unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
542  if (t != unibrow::Utf8::kBufferEmpty) {
543  DCHECK_EQ(t, unibrow::Utf8::kBadChar);
544  *cursor = static_cast<uc16>(t);
545  buffer_end_++;
546  current_.pos.chars++;
547  current_.pos.incomplete_char = 0;
548  current_.pos.state = state;
549  }
550  return;
551  }
552 
553  size_t it = current_.pos.bytes - chunk.start.bytes;
554  while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) {
555  unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
556  chunk.data[it], &it, &state, &incomplete_char);
557  if (V8_LIKELY(t < kUtf8Bom)) {
558  *(cursor++) = static_cast<uc16>(t); // The by most frequent case.
559  } else if (t == unibrow::Utf8::kIncomplete) {
560  continue;
561  } else if (t == kUtf8Bom && current_.pos.bytes + it == 3) {
562  // BOM detected at beginning of the stream. Don't copy it.
563  } else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
564  *(cursor++) = static_cast<uc16>(t);
565  } else {
566  *(cursor++) = unibrow::Utf16::LeadSurrogate(t);
567  *(cursor++) = unibrow::Utf16::TrailSurrogate(t);
568  }
569  }
570 
571  current_.pos.bytes = chunk.start.bytes + it;
572  current_.pos.chars += (cursor - buffer_end_);
573  current_.pos.incomplete_char = incomplete_char;
574  current_.pos.state = state;
575  current_.chunk_no += (it == chunk.length);
576 
577  buffer_end_ = cursor;
578 }
579 
580 bool Utf8ExternalStreamingStream::FetchChunk() {
581  RuntimeCallTimerScope scope(runtime_call_stats(),
582  RuntimeCallCounterId::kGetMoreDataCallback);
583  DCHECK_EQ(current_.chunk_no, chunks_.size());
584  DCHECK(chunks_.empty() || chunks_.back().length != 0);
585 
586  const uint8_t* chunk = nullptr;
587  size_t length = source_stream_->GetMoreData(&chunk);
588  chunks_.push_back({chunk, length, current_.pos});
589  return length > 0;
590 }
591 
592 void Utf8ExternalStreamingStream::SearchPosition(size_t position) {
593  // If current_ already points to the right position, we're done.
594  //
595  // This is expected to be the common case, since we typically call
596  // FillBuffer right after the current buffer.
597  if (current_.pos.chars == position) return;
598 
599  // No chunks. Fetch at least one, so we can assume !chunks_.empty() below.
600  if (chunks_.empty()) {
601  DCHECK_EQ(current_.chunk_no, 0u);
602  DCHECK_EQ(current_.pos.bytes, 0u);
603  DCHECK_EQ(current_.pos.chars, 0u);
604  FetchChunk();
605  }
606 
607  // Search for the last chunk whose start position is less or equal to
608  // position.
609  size_t chunk_no = chunks_.size() - 1;
610  while (chunk_no > 0 && chunks_[chunk_no].start.chars > position) {
611  chunk_no--;
612  }
613 
614  // Did we find the terminating (zero-length) chunk? Then we're seeking
615  // behind the end of the data, and position does not exist.
616  // Set current_ to point to the terminating chunk.
617  if (chunks_[chunk_no].length == 0) {
618  current_ = {chunk_no, chunks_[chunk_no].start};
619  return;
620  }
621 
622  // Did we find the non-last chunk? Then our position must be within chunk_no.
623  if (chunk_no + 1 < chunks_.size()) {
624  // Fancy-pants optimization for ASCII chunks within a utf-8 stream.
625  // (Many web sites declare utf-8 encoding, but use only (or almost only) the
626  // ASCII subset for their JavaScript sources. We can exploit this, by
627  // checking whether the # bytes in a chunk are equal to the # chars, and if
628  // so avoid the expensive SkipToPosition.)
629  bool ascii_only_chunk =
630  chunks_[chunk_no].start.incomplete_char == 0 &&
631  (chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
632  (chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
633  if (ascii_only_chunk) {
634  size_t skip = position - chunks_[chunk_no].start.chars;
635  current_ = {chunk_no,
636  {chunks_[chunk_no].start.bytes + skip,
637  chunks_[chunk_no].start.chars + skip, 0,
638  unibrow::Utf8::State::kAccept}};
639  } else {
640  current_ = {chunk_no, chunks_[chunk_no].start};
641  SkipToPosition(position);
642  }
643 
644  // Since position was within the chunk, SkipToPosition should have found
645  // something.
646  DCHECK_EQ(position, current_.pos.chars);
647  return;
648  }
649 
650  // What's left: We're in the last, non-terminating chunk. Our position
651  // may be in the chunk, but it may also be in 'future' chunks, which we'll
652  // have to obtain.
653  DCHECK_EQ(chunk_no, chunks_.size() - 1);
654  current_ = {chunk_no, chunks_[chunk_no].start};
655  bool have_more_data = true;
656  bool found = SkipToPosition(position);
657  while (have_more_data && !found) {
658  DCHECK_EQ(current_.chunk_no, chunks_.size());
659  have_more_data = FetchChunk();
660  found = have_more_data && SkipToPosition(position);
661  }
662 
663  // We'll return with a postion != the desired position only if we're out
664  // of data. In that case, we'll point to the terminating chunk.
665  DCHECK_EQ(found, current_.pos.chars == position);
666  DCHECK_EQ(have_more_data, chunks_.back().length != 0);
667  DCHECK_IMPLIES(!found, !have_more_data);
668  DCHECK_IMPLIES(!found, current_.chunk_no == chunks_.size() - 1);
669 }
670 
671 size_t Utf8ExternalStreamingStream::FillBuffer(size_t position) {
672  buffer_cursor_ = buffer_;
673  buffer_end_ = buffer_;
674 
675  SearchPosition(position);
676  bool out_of_data = current_.chunk_no != chunks_.size() &&
677  chunks_[current_.chunk_no].length == 0 &&
678  current_.pos.incomplete_char == 0;
679 
680  if (out_of_data) return 0;
681 
682  // Fill the buffer, until we have at least one char (or are out of data).
683  // (The embedder might give us 1-byte blocks within a utf-8 char, so we
684  // can't guarantee progress with one chunk. Thus we iterate.)
685  while (!out_of_data && buffer_cursor_ == buffer_end_) {
686  // At end of current data, but there might be more? Then fetch it.
687  if (current_.chunk_no == chunks_.size()) {
688  out_of_data = !FetchChunk();
689  }
690  FillBufferFromCurrentChunk();
691  }
692 
693  DCHECK_EQ(current_.pos.chars - position,
694  static_cast<size_t>(buffer_end_ - buffer_cursor_));
695  return buffer_end_ - buffer_cursor_;
696 }
697 
698 // ----------------------------------------------------------------------------
699 // ScannerStream: Create stream instances.
700 
701 Utf16CharacterStream* ScannerStream::For(Isolate* isolate,
702  Handle<String> data) {
703  return ScannerStream::For(isolate, data, 0, data->length());
704 }
705 
706 Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
707  int start_pos, int end_pos) {
708  DCHECK_GE(start_pos, 0);
709  DCHECK_LE(start_pos, end_pos);
710  DCHECK_LE(end_pos, data->length());
711  size_t start_offset = 0;
712  if (data->IsSlicedString()) {
713  SlicedString string = SlicedString::cast(*data);
714  start_offset = string->offset();
715  String parent = string->parent();
716  if (parent->IsThinString()) parent = ThinString::cast(parent)->actual();
717  data = handle(parent, isolate);
718  } else {
719  data = String::Flatten(isolate, data);
720  }
721  if (data->IsExternalOneByteString()) {
722  return new BufferedCharacterStream<ExternalStringStream>(
723  static_cast<size_t>(start_pos), ExternalOneByteString::cast(*data),
724  start_offset, static_cast<size_t>(end_pos));
725  } else if (data->IsExternalTwoByteString()) {
726  return new UnbufferedCharacterStream<ExternalStringStream>(
727  static_cast<size_t>(start_pos), ExternalTwoByteString::cast(*data),
728  start_offset, static_cast<size_t>(end_pos));
729  } else if (data->IsSeqOneByteString()) {
730  return new BufferedCharacterStream<OnHeapStream>(
731  static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
732  start_offset, static_cast<size_t>(end_pos));
733  } else if (data->IsSeqTwoByteString()) {
734  return new RelocatingCharacterStream(
735  isolate, static_cast<size_t>(start_pos),
736  Handle<SeqTwoByteString>::cast(data), start_offset,
737  static_cast<size_t>(end_pos));
738  } else {
739  UNREACHABLE();
740  }
741 }
742 
743 std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
744  const char* data) {
745  return ScannerStream::ForTesting(data, strlen(data));
746 }
747 
748 std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
749  const char* data, size_t length) {
750  return std::unique_ptr<Utf16CharacterStream>(
751  new BufferedCharacterStream<TestingStream>(
752  static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
753  static_cast<size_t>(length)));
754 }
755 
756 Utf16CharacterStream* ScannerStream::For(
757  ScriptCompiler::ExternalSourceStream* source_stream,
758  v8::ScriptCompiler::StreamedSource::Encoding encoding) {
759  switch (encoding) {
760  case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
761  return new UnbufferedCharacterStream<ChunkedStream>(
762  static_cast<size_t>(0), source_stream);
763  case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
764  return new BufferedCharacterStream<ChunkedStream>(static_cast<size_t>(0),
765  source_stream);
766  case v8::ScriptCompiler::StreamedSource::UTF8:
767  return new Utf8ExternalStreamingStream(source_stream);
768  }
769  UNREACHABLE();
770 }
771 
772 } // namespace internal
773 } // namespace v8
virtual void Unlock() const
Definition: v8.h:2677
virtual size_t GetMoreData(const uint8_t **src)=0
Definition: libplatform.h:13
virtual void Lock() const
Definition: v8.h:2672