5 #include "src/parsing/scanner-character-streams.h" 10 #include "include/v8.h" 11 #include "src/counters.h" 12 #include "src/globals.h" 13 #include "src/handles.h" 14 #include "src/objects-inl.h" 15 #include "src/parsing/scanner.h" 16 #include "src/unicode-inl.h" 24 DCHECK(!
string.is_null());
25 if (string->IsExternalOneByteString()) {
26 resource_ = ExternalOneByteString::cast(
string)->resource();
28 DCHECK(string->IsExternalTwoByteString());
29 resource_ = ExternalTwoByteString::cast(
string)->resource();
37 : resource_(other.resource_) {
49 const unibrow::uchar kUtf8Bom = 0xFEFF;
52 template <
typename Char>
67 template <
typename Char>
72 size_t length() {
return static_cast<size_t>(end - start); }
73 bool unaligned_start()
const {
74 return reinterpret_cast<intptr_t
>(start) %
sizeof(Char) == 1;
79 template <
typename Char>
85 : string_(
string), start_offset_(start_offset), length_(end) {}
92 return {&string_->GetChars()[start_offset_ + Min(length_, pos)],
93 &string_->GetChars()[start_offset_ + length_]};
96 static const bool kCanBeCloned =
false;
97 static const bool kCanAccessHeap =
true;
101 const size_t start_offset_;
102 const size_t length_;
107 template <
typename Char>
115 data_(string->GetChars() + start_offset),
119 : lock_(other.lock_), data_(other.data_), length_(other.length_) {}
122 return {&data_[Min(length_, pos)], &data_[length_]};
125 static const bool kCanBeCloned =
true;
126 static const bool kCanAccessHeap =
false;
130 const Char*
const data_;
131 const size_t length_;
135 template <
typename Char>
139 : data_(data), length_(length) {}
141 return {&data_[Min(length_, pos)], &data_[length_]};
144 static const bool kCanBeCloned =
true;
145 static const bool kCanAccessHeap =
false;
148 const Char*
const data_;
149 const size_t length_;
153 template <
typename Char>
165 Chunk chunk = FindChunk(pos, stats);
166 size_t buffer_end = chunk.length;
167 size_t buffer_pos = Min(buffer_end, pos - chunk.position);
168 return {&chunk.data[buffer_pos], &chunk.data[buffer_end]};
172 for (Chunk& chunk : chunks_)
delete[] chunk.data;
175 static const bool kCanBeCloned =
false;
176 static const bool kCanAccessHeap =
false;
180 Chunk(
const Char*
const data,
size_t position,
size_t length)
181 : data(data), position(position), length(length) {}
182 const Char*
const data;
184 const size_t position;
186 size_t end_position()
const {
return position + length; }
190 while (V8_UNLIKELY(chunks_.empty())) FetchChunk(
size_t{0}, stats);
193 while (position >= chunks_.back().end_position() &&
194 chunks_.back().length > 0) {
195 FetchChunk(chunks_.back().end_position(), stats);
199 for (
auto reverse_it = chunks_.rbegin(); reverse_it != chunks_.rend();
201 if (reverse_it->position <= position)
return *reverse_it;
207 virtual void ProcessChunk(
const uint8_t* data,
size_t position,
210 DCHECK_EQ(0, length %
sizeof(Char));
211 chunks_.emplace_back(reinterpret_cast<const Char*>(data), position,
212 length /
sizeof(Char));
216 const uint8_t* data =
nullptr;
220 RuntimeCallCounterId::kGetMoreDataCallback);
223 ProcessChunk(data, position, length);
229 std::vector<struct Chunk> chunks_;
235 template <
template <
typename T>
class ByteStream>
238 template <
class... TArgs>
243 bool can_be_cloned()
const final {
244 return ByteStream<uint16_t>::kCanBeCloned;
247 std::unique_ptr<Utf16CharacterStream> Clone()
const override {
248 CHECK(can_be_cloned());
249 return std::unique_ptr<Utf16CharacterStream>(
254 bool ReadBlock()
final {
255 size_t position = pos();
256 buffer_pos_ = position;
257 buffer_start_ = &buffer_[0];
258 buffer_cursor_ = buffer_start_;
262 byte_stream_.GetDataAt(position, runtime_call_stats());
263 if (range.length() == 0) {
264 buffer_end_ = buffer_start_;
268 size_t length = Min(kBufferSize, range.length());
269 i::CopyCharsUnsigned(buffer_, range.start, length);
270 buffer_end_ = &buffer_[length];
274 bool can_access_heap()
const final {
275 return ByteStream<uint8_t>::kCanAccessHeap;
280 : byte_stream_(other.byte_stream_) {}
282 static const size_t kBufferSize = 512;
283 uc16 buffer_[kBufferSize];
284 ByteStream<uint8_t> byte_stream_;
289 template <
template <
typename T>
class ByteStream>
292 template <
class... TArgs>
297 bool can_access_heap()
const final {
298 return ByteStream<uint16_t>::kCanAccessHeap;
301 bool can_be_cloned()
const final {
302 return ByteStream<uint16_t>::kCanBeCloned;
305 std::unique_ptr<Utf16CharacterStream> Clone()
const override {
306 return std::unique_ptr<Utf16CharacterStream>(
311 bool ReadBlock()
final {
312 size_t position = pos();
313 buffer_pos_ = position;
316 byte_stream_.GetDataAt(position, runtime_call_stats());
317 buffer_start_ = range.start;
318 buffer_end_ = range.end;
319 buffer_cursor_ = buffer_start_;
320 if (range.length() == 0)
return false;
322 DCHECK(!range.unaligned_start());
323 DCHECK_LE(buffer_start_, buffer_end_);
328 : byte_stream_(other.byte_stream_) {}
330 ByteStream<uint16_t> byte_stream_;
338 template <
class... TArgs>
342 isolate->heap()->AddGCEpilogueCallback(UpdateBufferPointersCallback,
343 v8::kGCTypeAll,
this);
348 isolate_->heap()->RemoveGCEpilogueCallback(UpdateBufferPointersCallback,
352 static void UpdateBufferPointersCallback(v8::Isolate* v8_isolate,
354 v8::GCCallbackFlags flags,
357 ->UpdateBufferPointers();
360 void UpdateBufferPointers() {
362 Range<uint16_t> range = byte_stream_.GetDataAt(0, runtime_call_stats());
363 if (range.start != buffer_start_) {
364 buffer_cursor_ = (buffer_cursor_ - buffer_start_) + range.start;
365 buffer_start_ = range.start;
366 buffer_end_ = range.end;
386 static const size_t kBufferSize = 512;
388 bool ReadBlock()
final;
392 virtual size_t FillBuffer(
size_t position) = 0;
396 uc16 buffer_[kBufferSize];
399 BufferedUtf16CharacterStream::BufferedUtf16CharacterStream()
402 bool BufferedUtf16CharacterStream::ReadBlock() {
403 DCHECK_EQ(buffer_start_, buffer_);
405 size_t position = pos();
406 buffer_pos_ = position;
407 buffer_cursor_ = buffer_;
408 buffer_end_ = buffer_ + FillBuffer(position);
409 DCHECK_EQ(pos(), position);
410 DCHECK_LE(buffer_end_, buffer_start_ + kBufferSize);
411 return buffer_cursor_ < buffer_end_;
429 : current_({0, {0, 0, 0, unibrow::Utf8::State::kAccept}}),
430 source_stream_(source_stream) {}
432 for (
size_t i = 0;
i < chunks_.size();
i++)
delete[] chunks_[
i].data;
435 bool can_access_heap()
const final {
return false; }
437 bool can_be_cloned()
const final {
return false; }
439 std::unique_ptr<Utf16CharacterStream> Clone()
const override {
444 size_t FillBuffer(
size_t position)
final;
451 struct StreamPosition {
455 unibrow::Utf8::State state;
472 StreamPosition start;
476 bool SkipToPosition(
size_t position);
478 void FillBufferFromCurrentChunk();
483 void SearchPosition(
size_t position);
485 std::vector<Chunk> chunks_;
490 bool Utf8ExternalStreamingStream::SkipToPosition(
size_t position) {
491 DCHECK_LE(current_.pos.chars, position);
494 if (current_.pos.chars == position)
return true;
496 const Chunk& chunk = chunks_[current_.chunk_no];
497 DCHECK(current_.pos.bytes >= chunk.start.bytes);
499 unibrow::Utf8::State state = chunk.start.state;
500 uint32_t incomplete_char = chunk.start.incomplete_char;
501 size_t it = current_.pos.bytes - chunk.start.bytes;
502 size_t chars = chunk.start.chars;
503 while (it < chunk.length && chars < position) {
504 unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
505 chunk.data[it], &it, &state, &incomplete_char);
506 if (t == kUtf8Bom && current_.pos.chars == 0) {
508 }
else if (t != unibrow::Utf8::kIncomplete) {
510 if (t > unibrow::Utf16::kMaxNonSurrogateCharCode) chars++;
514 current_.pos.bytes += it;
515 current_.pos.chars = chars;
516 current_.pos.incomplete_char = incomplete_char;
517 current_.pos.state = state;
518 current_.chunk_no += (it == chunk.length);
520 return current_.pos.chars == position;
523 void Utf8ExternalStreamingStream::FillBufferFromCurrentChunk() {
524 DCHECK_LT(current_.chunk_no, chunks_.size());
525 DCHECK_EQ(buffer_start_, buffer_cursor_);
526 DCHECK_LT(buffer_end_ + 1, buffer_start_ + kBufferSize);
528 const Chunk& chunk = chunks_[current_.chunk_no];
532 uint16_t* cursor = buffer_ + (buffer_end_ - buffer_start_);
533 DCHECK_EQ(cursor, buffer_end_);
535 unibrow::Utf8::State state = current_.pos.state;
536 uint32_t incomplete_char = current_.pos.incomplete_char;
540 if (chunk.length == 0) {
541 unibrow::uchar t = unibrow::Utf8::ValueOfIncrementalFinish(&state);
542 if (t != unibrow::Utf8::kBufferEmpty) {
543 DCHECK_EQ(t, unibrow::Utf8::kBadChar);
544 *cursor =
static_cast<uc16
>(t);
546 current_.pos.chars++;
547 current_.pos.incomplete_char = 0;
548 current_.pos.state = state;
553 size_t it = current_.pos.bytes - chunk.start.bytes;
554 while (it < chunk.length && cursor + 1 < buffer_start_ + kBufferSize) {
555 unibrow::uchar t = unibrow::Utf8::ValueOfIncremental(
556 chunk.data[it], &it, &state, &incomplete_char);
557 if (V8_LIKELY(t < kUtf8Bom)) {
558 *(cursor++) = static_cast<uc16>(t);
559 }
else if (t == unibrow::Utf8::kIncomplete) {
561 }
else if (t == kUtf8Bom && current_.pos.bytes + it == 3) {
563 }
else if (t <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
564 *(cursor++) = static_cast<uc16>(t);
566 *(cursor++) = unibrow::Utf16::LeadSurrogate(t);
567 *(cursor++) = unibrow::Utf16::TrailSurrogate(t);
571 current_.pos.bytes = chunk.start.bytes + it;
572 current_.pos.chars += (cursor - buffer_end_);
573 current_.pos.incomplete_char = incomplete_char;
574 current_.pos.state = state;
575 current_.chunk_no += (it == chunk.length);
577 buffer_end_ = cursor;
580 bool Utf8ExternalStreamingStream::FetchChunk() {
581 RuntimeCallTimerScope scope(runtime_call_stats(),
582 RuntimeCallCounterId::kGetMoreDataCallback);
583 DCHECK_EQ(current_.chunk_no, chunks_.size());
584 DCHECK(chunks_.empty() || chunks_.back().length != 0);
586 const uint8_t* chunk =
nullptr;
587 size_t length = source_stream_->
GetMoreData(&chunk);
588 chunks_.push_back({chunk, length, current_.pos});
592 void Utf8ExternalStreamingStream::SearchPosition(
size_t position) {
597 if (current_.pos.chars == position)
return;
600 if (chunks_.empty()) {
601 DCHECK_EQ(current_.chunk_no, 0u);
602 DCHECK_EQ(current_.pos.bytes, 0u);
603 DCHECK_EQ(current_.pos.chars, 0u);
609 size_t chunk_no = chunks_.size() - 1;
610 while (chunk_no > 0 && chunks_[chunk_no].start.chars > position) {
617 if (chunks_[chunk_no].length == 0) {
618 current_ = {chunk_no, chunks_[chunk_no].start};
623 if (chunk_no + 1 < chunks_.size()) {
629 bool ascii_only_chunk =
630 chunks_[chunk_no].start.incomplete_char == 0 &&
631 (chunks_[chunk_no + 1].start.bytes - chunks_[chunk_no].start.bytes) ==
632 (chunks_[chunk_no + 1].start.chars - chunks_[chunk_no].start.chars);
633 if (ascii_only_chunk) {
634 size_t skip = position - chunks_[chunk_no].start.chars;
635 current_ = {chunk_no,
636 {chunks_[chunk_no].start.bytes + skip,
637 chunks_[chunk_no].start.chars + skip, 0,
638 unibrow::Utf8::State::kAccept}};
640 current_ = {chunk_no, chunks_[chunk_no].start};
641 SkipToPosition(position);
646 DCHECK_EQ(position, current_.pos.chars);
653 DCHECK_EQ(chunk_no, chunks_.size() - 1);
654 current_ = {chunk_no, chunks_[chunk_no].start};
655 bool have_more_data =
true;
656 bool found = SkipToPosition(position);
657 while (have_more_data && !found) {
658 DCHECK_EQ(current_.chunk_no, chunks_.size());
659 have_more_data = FetchChunk();
660 found = have_more_data && SkipToPosition(position);
665 DCHECK_EQ(found, current_.pos.chars == position);
666 DCHECK_EQ(have_more_data, chunks_.back().length != 0);
667 DCHECK_IMPLIES(!found, !have_more_data);
668 DCHECK_IMPLIES(!found, current_.chunk_no == chunks_.size() - 1);
671 size_t Utf8ExternalStreamingStream::FillBuffer(
size_t position) {
672 buffer_cursor_ = buffer_;
673 buffer_end_ = buffer_;
675 SearchPosition(position);
676 bool out_of_data = current_.chunk_no != chunks_.size() &&
677 chunks_[current_.chunk_no].length == 0 &&
678 current_.pos.incomplete_char == 0;
680 if (out_of_data)
return 0;
685 while (!out_of_data && buffer_cursor_ == buffer_end_) {
687 if (current_.chunk_no == chunks_.size()) {
688 out_of_data = !FetchChunk();
690 FillBufferFromCurrentChunk();
693 DCHECK_EQ(current_.pos.chars - position,
694 static_cast<size_t>(buffer_end_ - buffer_cursor_));
695 return buffer_end_ - buffer_cursor_;
701 Utf16CharacterStream* ScannerStream::For(Isolate* isolate,
702 Handle<String> data) {
703 return ScannerStream::For(isolate, data, 0, data->length());
706 Utf16CharacterStream* ScannerStream::For(Isolate* isolate, Handle<String> data,
707 int start_pos,
int end_pos) {
708 DCHECK_GE(start_pos, 0);
709 DCHECK_LE(start_pos, end_pos);
710 DCHECK_LE(end_pos, data->length());
711 size_t start_offset = 0;
712 if (data->IsSlicedString()) {
713 SlicedString
string = SlicedString::cast(*data);
714 start_offset =
string->offset();
715 String parent =
string->parent();
716 if (parent->IsThinString()) parent = ThinString::cast(parent)->actual();
717 data = handle(parent, isolate);
719 data = String::Flatten(isolate, data);
721 if (data->IsExternalOneByteString()) {
722 return new BufferedCharacterStream<ExternalStringStream>(
723 static_cast<size_t>(start_pos), ExternalOneByteString::cast(*data),
724 start_offset,
static_cast<size_t>(end_pos));
725 }
else if (data->IsExternalTwoByteString()) {
726 return new UnbufferedCharacterStream<ExternalStringStream>(
727 static_cast<size_t>(start_pos), ExternalTwoByteString::cast(*data),
728 start_offset,
static_cast<size_t>(end_pos));
729 }
else if (data->IsSeqOneByteString()) {
730 return new BufferedCharacterStream<OnHeapStream>(
731 static_cast<size_t>(start_pos), Handle<SeqOneByteString>::cast(data),
732 start_offset,
static_cast<size_t>(end_pos));
733 }
else if (data->IsSeqTwoByteString()) {
734 return new RelocatingCharacterStream(
735 isolate, static_cast<size_t>(start_pos),
736 Handle<SeqTwoByteString>::cast(data), start_offset,
737 static_cast<size_t>(end_pos));
743 std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
745 return ScannerStream::ForTesting(data, strlen(data));
748 std::unique_ptr<Utf16CharacterStream> ScannerStream::ForTesting(
749 const char* data,
size_t length) {
750 return std::unique_ptr<Utf16CharacterStream>(
751 new BufferedCharacterStream<TestingStream>(
752 static_cast<size_t>(0), reinterpret_cast<const uint8_t*>(data),
753 static_cast<size_t>(length)));
756 Utf16CharacterStream* ScannerStream::For(
757 ScriptCompiler::ExternalSourceStream* source_stream,
758 v8::ScriptCompiler::StreamedSource::Encoding encoding) {
760 case v8::ScriptCompiler::StreamedSource::TWO_BYTE:
761 return new UnbufferedCharacterStream<ChunkedStream>(
762 static_cast<size_t>(0), source_stream);
763 case v8::ScriptCompiler::StreamedSource::ONE_BYTE:
764 return new BufferedCharacterStream<ChunkedStream>(
static_cast<size_t>(0),
766 case v8::ScriptCompiler::StreamedSource::UTF8:
767 return new Utf8ExternalStreamingStream(source_stream);
virtual void Unlock() const
virtual size_t GetMoreData(const uint8_t **src)=0
virtual void Lock() const