7 #ifndef V8_PARSING_SCANNER_H_ 8 #define V8_PARSING_SCANNER_H_ 12 #include "src/allocation.h" 13 #include "src/base/logging.h" 14 #include "src/char-predicates.h" 15 #include "src/globals.h" 16 #include "src/message-template.h" 17 #include "src/parsing/token.h" 18 #include "src/pointer-with-payload.h" 19 #include "src/unicode-decoder.h" 26 class AstValueFactory;
27 class ExternalOneByteString;
28 class ExternalTwoByteString;
30 class RuntimeCallStats;
39 static const uc32 kEndOfInput = -1;
43 V8_INLINE
void set_parser_error() {
44 buffer_cursor_ = buffer_end_;
45 has_parser_error_ =
true;
47 V8_INLINE
void reset_parser_error_flag() { has_parser_error_ =
false; }
48 V8_INLINE
bool has_parser_error()
const {
return has_parser_error_; }
51 if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
52 return static_cast<uc32
>(*buffer_cursor_);
53 }
else if (ReadBlockChecked()) {
54 return static_cast<uc32
>(*buffer_cursor_);
62 inline uc32 Advance() {
71 template <
typename FunctionType>
72 V8_INLINE uc32 AdvanceUntil(FunctionType check) {
74 auto next_cursor_pos =
75 std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) {
76 uc32 c0_ =
static_cast<uc32
>(raw_c0_);
80 if (next_cursor_pos == buffer_end_) {
81 buffer_cursor_ = buffer_end_;
82 if (!ReadBlockChecked()) {
87 buffer_cursor_ = next_cursor_pos + 1;
88 return static_cast<uc32
>(*next_cursor_pos);
99 if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
102 ReadBlockAt(pos() - 1);
106 inline size_t pos()
const {
107 return buffer_pos_ + (buffer_cursor_ - buffer_start_);
110 inline void Seek(
size_t pos) {
111 if (V8_LIKELY(pos >= buffer_pos_ &&
112 pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
113 buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
120 bool can_be_cloned_for_parallel_access()
const {
121 return can_be_cloned() && !can_access_heap();
126 virtual bool can_be_cloned()
const = 0;
130 virtual std::unique_ptr<Utf16CharacterStream> Clone()
const = 0;
133 virtual bool can_access_heap()
const = 0;
135 RuntimeCallStats* runtime_call_stats()
const {
return runtime_call_stats_; }
137 runtime_call_stats_ = runtime_call_stats;
142 const uint16_t* buffer_cursor,
143 const uint16_t* buffer_end,
size_t buffer_pos)
144 : buffer_start_(buffer_start),
145 buffer_cursor_(buffer_cursor),
146 buffer_end_(buffer_end),
147 buffer_pos_(buffer_pos) {}
150 bool ReadBlockChecked() {
151 size_t position = pos();
153 bool success = !has_parser_error() && ReadBlock();
158 DCHECK_EQ(pos(), position);
159 DCHECK_LE(buffer_cursor_, buffer_end_);
160 DCHECK_LE(buffer_start_, buffer_cursor_);
161 DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
165 void ReadBlockAt(
size_t new_pos) {
170 DCHECK(new_pos < buffer_pos_ ||
171 new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
174 buffer_pos_ = new_pos;
175 buffer_cursor_ = buffer_start_;
176 DCHECK_EQ(pos(), new_pos);
194 virtual bool ReadBlock() = 0;
196 const uint16_t* buffer_start_;
197 const uint16_t* buffer_cursor_;
198 const uint16_t* buffer_end_;
201 bool has_parser_error_ =
false;
214 bookmark_(kNoBookmark),
215 had_parser_error_(scanner->has_parser_error()) {
216 DCHECK_NOT_NULL(scanner_);
222 bool HasBeenSet()
const;
223 bool HasBeenApplied()
const;
226 static const size_t kNoBookmark;
227 static const size_t kBookmarkWasApplied;
228 static const size_t kBookmarkAtFirstPos;
232 bool had_parser_error_;
239 V8_INLINE
void set_parser_error() {
240 if (!has_parser_error()) {
242 source_->set_parser_error();
243 for (TokenDesc& desc : token_storage_) desc.token = Token::ILLEGAL;
246 V8_INLINE
void reset_parser_error_flag() {
247 source_->reset_parser_error_flag();
249 V8_INLINE
bool has_parser_error()
const {
250 return source_->has_parser_error();
255 Location(
int b,
int e) : beg_pos(b), end_pos(e) { }
256 Location() : beg_pos(0), end_pos(0) { }
258 int length()
const {
return end_pos - beg_pos; }
259 bool IsValid()
const {
return beg_pos >= 0 && end_pos >= beg_pos; }
268 static const int kNoOctalLocation = -1;
269 static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
278 Token::Value PeekAhead();
280 Token::Value current_token()
const {
return current().token; }
284 const Location& location()
const {
return current().location; }
287 bool has_error()
const {
return scanner_error_ != MessageTemplate::kNone; }
288 MessageTemplate error()
const {
return scanner_error_; }
289 const Location& error_location()
const {
return scanner_error_location_; }
291 bool has_invalid_template_escape()
const {
292 return current().invalid_template_escape_message != MessageTemplate::kNone;
294 MessageTemplate invalid_template_escape_message()
const {
295 DCHECK(has_invalid_template_escape());
296 return current().invalid_template_escape_message;
299 void clear_invalid_template_escape_message() {
300 DCHECK(has_invalid_template_escape());
301 current_->invalid_template_escape_message = MessageTemplate::kNone;
304 Location invalid_template_escape_location()
const {
305 DCHECK(has_invalid_template_escape());
306 return current().invalid_template_escape_location;
312 Token::Value peek()
const {
return next().token; }
314 const Location& peek_location()
const {
return next().location; }
316 bool literal_contains_escapes()
const {
317 return LiteralContainsEscapes(current());
320 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory)
const;
322 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory)
const;
323 const AstRawString* CurrentRawSymbol(
324 AstValueFactory* ast_value_factory)
const;
326 double DoubleValue();
328 const char* CurrentLiteralAsCString(Zone* zone)
const;
330 inline bool CurrentMatches(Token::Value token)
const {
331 DCHECK(Token::IsKeyword(token));
332 return current().token == token;
336 bool NextLiteralEquals(
const char (&s)[N]) {
337 DCHECK_EQ(Token::STRING, peek());
341 if (!is_next_literal_one_byte())
return false;
342 if (peek_location().length() != N + 1)
return false;
344 Vector<const uint8_t> next = next_literal_one_byte_string();
345 const char* chars =
reinterpret_cast<const char*
>(next.start());
346 return next.length() == N - 1 && strncmp(s, chars, N - 1) == 0;
350 Location octal_position()
const {
return octal_pos_; }
351 void clear_octal_position() {
352 octal_pos_ = Location::invalid();
353 octal_message_ = MessageTemplate::kNone;
355 MessageTemplate octal_message()
const {
return octal_message_; }
358 uint32_t smi_value()
const {
return current().smi_value_; }
364 void SeekForward(
int pos);
368 bool HasLineTerminatorBeforeNext()
const {
369 return next().after_line_terminator;
372 bool HasLineTerminatorAfterNext() {
373 Token::Value ensure_next_next = PeekAhead();
374 USE(ensure_next_next);
375 return next_next().after_line_terminator;
380 bool ScanRegExpPattern();
382 Maybe<RegExp::Flags> ScanRegExpFlags();
385 Token::Value ScanTemplateContinuation() {
386 DCHECK_EQ(next().token, Token::RBRACE);
387 DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
388 return ScanTemplateSpan();
391 Handle<String> SourceUrl(Isolate* isolate)
const;
392 Handle<String> SourceMappingUrl(Isolate* isolate)
const;
394 bool FoundHtmlComment()
const {
return found_html_comment_; }
396 bool allow_harmony_private_fields()
const {
397 return allow_harmony_private_fields_;
399 void set_allow_harmony_private_fields(
bool allow) {
400 allow_harmony_private_fields_ = allow;
402 bool allow_harmony_numeric_separator()
const {
403 return allow_harmony_numeric_separator_;
405 void set_allow_harmony_numeric_separator(
bool allow) {
406 allow_harmony_numeric_separator_ = allow;
409 const Utf16CharacterStream* stream()
const {
return source_; }
418 class LiteralBuffer {
420 LiteralBuffer() : backing_store_(), position_(0), is_one_byte_(true) {}
422 ~LiteralBuffer() { backing_store_.Dispose(); }
424 V8_INLINE
void AddChar(
char code_unit) {
425 DCHECK(IsValidAscii(code_unit));
426 AddOneByteChar(static_cast<byte>(code_unit));
429 V8_INLINE
void AddChar(uc32 code_unit) {
431 if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
432 AddOneByteChar(static_cast<byte>(code_unit));
437 AddTwoByteChar(code_unit);
440 bool is_one_byte()
const {
return is_one_byte_; }
442 bool Equals(Vector<const char> keyword)
const {
443 return is_one_byte() && keyword.length() == position_ &&
444 (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
447 Vector<const uint16_t> two_byte_literal()
const {
448 DCHECK(!is_one_byte());
449 DCHECK_EQ(position_ & 0x1, 0);
450 return Vector<const uint16_t>(
451 reinterpret_cast<const uint16_t*
>(backing_store_.start()),
455 Vector<const uint8_t> one_byte_literal()
const {
456 DCHECK(is_one_byte());
457 return Vector<const uint8_t>(
458 reinterpret_cast<const uint8_t*
>(backing_store_.start()), position_);
461 int length()
const {
return is_one_byte() ? position_ : (position_ >> 1); }
468 Handle<String> Internalize(Isolate* isolate)
const;
471 static const int kInitialCapacity = 16;
472 static const int kGrowthFactory = 4;
473 static const int kMinConversionSlack = 256;
474 static const int kMaxGrowth = 1 * MB;
476 inline bool IsValidAscii(
char code_unit) {
481 return iscntrl(code_unit) || isprint(code_unit);
484 V8_INLINE
void AddOneByteChar(byte one_byte_char) {
485 DCHECK(is_one_byte());
486 if (position_ >= backing_store_.length()) ExpandBuffer();
487 backing_store_[position_] = one_byte_char;
488 position_ += kOneByteSize;
491 void AddTwoByteChar(uc32 code_unit);
492 int NewCapacity(
int min_capacity);
494 void ConvertToTwoByte();
496 Vector<byte> backing_store_;
501 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
506 Location location = {0, 0};
507 LiteralBuffer literal_chars;
508 LiteralBuffer raw_literal_chars;
509 Token::Value token = Token::UNINITIALIZED;
510 MessageTemplate invalid_template_escape_message = MessageTemplate::kNone;
511 Location invalid_template_escape_location;
513 bool after_line_terminator =
false;
516 bool CanAccessLiteral()
const {
517 return token == Token::PRIVATE_NAME || token == Token::ILLEGAL ||
518 token == Token::UNINITIALIZED || token == Token::REGEXP_LITERAL ||
519 token == Token::ESCAPED_KEYWORD ||
520 IsInRange(token, Token::NUMBER, Token::STRING) ||
521 (Token::IsAnyIdentifier(token) && !Token::IsKeyword(token)) ||
522 IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL);
524 bool CanAccessRawLiteral()
const {
525 return token == Token::ILLEGAL || token == Token::UNINITIALIZED ||
526 IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL);
537 DECIMAL_WITH_LEADING_ZERO
540 static const int kCharacterLookaheadBufferSize = 1;
541 static const int kMaxAscii = 127;
544 template <
bool capture_raw>
545 uc32 ScanOctalEscape(uc32 c,
int length);
550 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
553 current_ = &token_storage_[0];
554 next_ = &token_storage_[1];
555 next_next_ = &token_storage_[2];
557 found_html_comment_ =
false;
558 scanner_error_ = MessageTemplate::kNone;
561 void ReportScannerError(
const Location& location, MessageTemplate error) {
562 if (has_error())
return;
563 scanner_error_ = error;
564 scanner_error_location_ = location;
567 void ReportScannerError(
int pos, MessageTemplate error) {
568 if (has_error())
return;
569 scanner_error_ = error;
570 scanner_error_location_ = Location(pos, pos + 1);
574 void SeekNext(
size_t position);
576 V8_INLINE
void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
578 V8_INLINE
void AddLiteralChar(
char c) { next().literal_chars.AddChar(c); }
580 V8_INLINE
void AddRawLiteralChar(uc32 c) {
581 next().raw_literal_chars.AddChar(c);
584 V8_INLINE
void AddLiteralCharAdvance() {
590 template <
bool capture_raw = false>
593 AddRawLiteralChar(c0_);
595 c0_ = source_->Advance();
598 template <
typename FunctionType>
599 V8_INLINE
void AdvanceUntil(FunctionType check) {
600 c0_ = source_->AdvanceUntil(check);
603 bool CombineSurrogatePair() {
604 DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
605 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
606 uc32 c1 = source_->Advance();
607 DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
608 if (unibrow::Utf16::IsTrailSurrogate(c1)) {
609 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
617 void PushBack(uc32 ch) {
618 DCHECK_LE(c0_, static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode));
623 uc32 Peek()
const {
return source_->Peek(); }
625 inline Token::Value Select(Token::Value tok) {
630 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
652 Vector<const uint8_t> literal_one_byte_string()
const {
653 DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
654 return current().literal_chars.one_byte_literal();
656 Vector<const uint16_t> literal_two_byte_string()
const {
657 DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
658 return current().literal_chars.two_byte_literal();
660 bool is_literal_one_byte()
const {
661 DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
662 return current().literal_chars.is_one_byte();
666 Vector<const uint8_t> next_literal_one_byte_string()
const {
667 DCHECK(next().CanAccessLiteral());
668 return next().literal_chars.one_byte_literal();
670 Vector<const uint16_t> next_literal_two_byte_string()
const {
671 DCHECK(next().CanAccessLiteral());
672 return next().literal_chars.two_byte_literal();
674 bool is_next_literal_one_byte()
const {
675 DCHECK(next().CanAccessLiteral());
676 return next().literal_chars.is_one_byte();
678 Vector<const uint8_t> raw_literal_one_byte_string()
const {
679 DCHECK(current().CanAccessRawLiteral());
680 return current().raw_literal_chars.one_byte_literal();
682 Vector<const uint16_t> raw_literal_two_byte_string()
const {
683 DCHECK(current().CanAccessRawLiteral());
684 return current().raw_literal_chars.two_byte_literal();
686 bool is_raw_literal_one_byte()
const {
687 DCHECK(current().CanAccessRawLiteral());
688 return current().raw_literal_chars.is_one_byte();
691 template <
bool capture_raw,
bool unicode = false>
692 uc32 ScanHexNumber(
int expected_length);
696 template <
bool capture_raw>
697 uc32 ScanUnlimitedLengthHexNumber(
int max_value,
int beg_pos);
700 V8_INLINE Token::Value ScanSingleToken();
701 V8_INLINE
void Scan();
706 V8_INLINE
void Scan(TokenDesc* next_desc);
708 V8_INLINE Token::Value SkipWhiteSpace();
709 Token::Value SkipSingleHTMLComment();
710 Token::Value SkipSingleLineComment();
711 Token::Value SkipSourceURLComment();
712 void TryToParseSourceURLComment();
713 Token::Value SkipMultiLineComment();
715 Token::Value ScanHtmlComment();
717 bool ScanDigitsWithNumericSeparators(
bool (*predicate)(uc32 ch),
718 bool is_check_first_digit);
719 bool ScanDecimalDigits();
721 bool ScanDecimalAsSmi(uint64_t* value);
722 bool ScanDecimalAsSmiWithNumericSeparators(uint64_t* value);
723 bool ScanHexDigits();
724 bool ScanBinaryDigits();
725 bool ScanSignedInteger();
726 bool ScanOctalDigits();
727 bool ScanImplicitOctalDigits(
int start_pos, NumberKind* kind);
729 Token::Value ScanNumber(
bool seen_period);
730 V8_INLINE Token::Value ScanIdentifierOrKeyword();
731 V8_INLINE Token::Value ScanIdentifierOrKeywordInner();
732 Token::Value ScanIdentifierOrKeywordInnerSlow(
bool escaped,
733 bool can_be_keyword);
735 Token::Value ScanString();
736 Token::Value ScanPrivateName();
741 template <
bool capture_raw>
746 uc32 ScanIdentifierUnicodeEscape();
748 template <
bool capture_raw>
749 uc32 ScanUnicodeEscape();
751 Token::Value ScanTemplateSpan();
755 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
758 static bool LiteralContainsEscapes(
const TokenDesc& token) {
759 Location location = token.location;
760 int source_length = (location.end_pos - location.beg_pos);
761 if (token.token == Token::STRING) {
765 return token.literal_chars.length() != source_length;
769 void SanityCheckTokenDesc(
const TokenDesc&)
const;
772 TokenDesc& next() {
return *next_; }
774 const TokenDesc& current()
const {
return *current_; }
775 const TokenDesc& next()
const {
return *next_; }
776 const TokenDesc& next_next()
const {
return *next_next_; }
780 TokenDesc* next_next_;
783 Utf16CharacterStream*
const source_;
788 TokenDesc token_storage_[3];
791 bool found_html_comment_;
794 bool allow_harmony_private_fields_;
795 bool allow_harmony_numeric_separator_;
797 const bool is_module_;
800 LiteralBuffer source_url_;
801 LiteralBuffer source_mapping_url_;
805 MessageTemplate octal_message_;
807 MessageTemplate scanner_error_;
808 Location scanner_error_location_;
814 #endif // V8_PARSING_SCANNER_H_