5 #ifndef V8_PARSING_SCANNER_INL_H_ 6 #define V8_PARSING_SCANNER_INL_H_ 8 #include "src/char-predicates-inl.h" 9 #include "src/parsing/keywords-gen.h" 10 #include "src/parsing/scanner.h" 18 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ 20 KEYWORD("async", Token::ASYNC) \ 21 KEYWORD("await", Token::AWAIT) \ 23 KEYWORD("break", Token::BREAK) \ 25 KEYWORD("case", Token::CASE) \ 26 KEYWORD("catch", Token::CATCH) \ 27 KEYWORD("class", Token::CLASS) \ 28 KEYWORD("const", Token::CONST) \ 29 KEYWORD("continue", Token::CONTINUE) \ 31 KEYWORD("debugger", Token::DEBUGGER) \ 32 KEYWORD("default", Token::DEFAULT) \ 33 KEYWORD("delete", Token::DELETE) \ 34 KEYWORD("do", Token::DO) \ 36 KEYWORD("else", Token::ELSE) \ 37 KEYWORD("enum", Token::ENUM) \ 38 KEYWORD("export", Token::EXPORT) \ 39 KEYWORD("extends", Token::EXTENDS) \ 41 KEYWORD("false", Token::FALSE_LITERAL) \ 42 KEYWORD("finally", Token::FINALLY) \ 43 KEYWORD("for", Token::FOR) \ 44 KEYWORD("function", Token::FUNCTION) \ 46 KEYWORD("if", Token::IF) \ 47 KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ 48 KEYWORD("import", Token::IMPORT) \ 49 KEYWORD("in", Token::IN) \ 50 KEYWORD("instanceof", Token::INSTANCEOF) \ 51 KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ 53 KEYWORD("let", Token::LET) \ 55 KEYWORD("new", Token::NEW) \ 56 KEYWORD("null", Token::NULL_LITERAL) \ 58 KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ 59 KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ 60 KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ 61 KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ 63 KEYWORD("return", Token::RETURN) \ 65 KEYWORD("static", Token::STATIC) \ 66 KEYWORD("super", Token::SUPER) \ 67 KEYWORD("switch", Token::SWITCH) \ 69 KEYWORD("this", Token::THIS) \ 70 KEYWORD("throw", Token::THROW) \ 71 KEYWORD("true", Token::TRUE_LITERAL) \ 72 KEYWORD("try", Token::TRY) \ 73 KEYWORD("typeof", Token::TYPEOF) \ 75 KEYWORD("var", Token::VAR) \ 76 KEYWORD("void", Token::VOID) \ 78 KEYWORD("while", Token::WHILE) \ 79 KEYWORD("with", Token::WITH) \ 81 KEYWORD("yield", Token::YIELD) 83 constexpr
bool IsKeywordStart(
char c) {
84 #define KEYWORD_GROUP_CHECK(ch) c == ch || 85 #define KEYWORD_CHECK(keyword, token) 86 return KEYWORDS(KEYWORD_GROUP_CHECK, KEYWORD_CHECK)
false;
87 #undef KEYWORD_GROUP_CHECK 91 V8_INLINE Token::Value KeywordOrIdentifierToken(
const uint8_t* input,
93 DCHECK_GE(input_length, 1);
94 return PerfectKeywordHash::GetToken(reinterpret_cast<const char*>(input),
101 constexpr
bool IsInString(
const char (&s)[N],
char c,
size_t i = 0) {
102 return i >= N ? false : s[
i] == c ? true : IsInString(s, c,
i + 1);
105 inline constexpr
bool CanBeKeywordCharacter(
char c) {
107 #define KEYWORD_GROUP_CASE(ch)
108 #define KEYWORD(keyword, token) keyword
111 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
113 #undef KEYWORD_GROUP_CASE
119 STATIC_ASSERT(
sizeof(Token::Value) == 1);
123 constexpr Token::Value GetOneCharToken(
char c) {
126 c ==
'(' ? Token::LPAREN :
127 c ==
')' ? Token::RPAREN :
128 c ==
'{' ? Token::LBRACE :
129 c ==
'}' ? Token::RBRACE :
130 c ==
'[' ? Token::LBRACK :
131 c ==
']' ? Token::RBRACK :
132 c ==
'?' ? Token::CONDITIONAL :
133 c ==
':' ? Token::COLON :
134 c ==
';' ? Token::SEMICOLON :
135 c ==
',' ? Token::COMMA :
136 c ==
'.' ? Token::PERIOD :
137 c ==
'|' ? Token::BIT_OR :
138 c ==
'&' ? Token::BIT_AND :
139 c ==
'^' ? Token::BIT_XOR :
140 c ==
'~' ? Token::BIT_NOT :
141 c ==
'!' ? Token::NOT :
142 c ==
'<' ? Token::LT :
143 c ==
'>' ? Token::GT :
144 c ==
'%' ? Token::MOD :
145 c ==
'=' ? Token::ASSIGN :
146 c ==
'+' ? Token::ADD :
147 c ==
'-' ? Token::SUB :
148 c ==
'*' ? Token::MUL :
149 c ==
'/' ? Token::DIV :
150 c ==
'#' ? Token::PRIVATE_NAME :
151 c ==
'"' ? Token::STRING :
152 c ==
'\'' ? Token::STRING :
153 c ==
'`' ? Token::TEMPLATE_SPAN :
154 c ==
'\\' ? Token::IDENTIFIER :
156 c ==
' ' ? Token::WHITESPACE :
157 c ==
'\t' ? Token::WHITESPACE :
158 c ==
'\v' ? Token::WHITESPACE :
159 c ==
'\f' ? Token::WHITESPACE :
160 c ==
'\r' ? Token::WHITESPACE :
161 c ==
'\n' ? Token::WHITESPACE :
163 IsDecimalDigit(c) ? Token::NUMBER :
164 IsAsciiIdentifier(c) ? Token::IDENTIFIER :
170 static const constexpr Token::Value one_char_tokens[128] = {
171 #define CALL_GET_SCAN_FLAGS(N) GetOneCharToken(N), 172 INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS)
173 #undef CALL_GET_SCAN_FLAGS 178 V8_INLINE Token::Value Scanner::ScanIdentifierOrKeyword() {
179 next().literal_chars.Start();
180 return ScanIdentifierOrKeywordInner();
184 enum class ScanFlags : uint8_t {
185 kTerminatesLiteral = 1 << 0,
188 kCannotBeKeyword = 1 << 1,
189 kCannotBeKeywordStart = 1 << 2,
190 kStringTerminator = 1 << 3,
191 kNeedsSlowPath = 1 << 4,
193 constexpr uint8_t GetScanFlags(
char c) {
198 (IsAsciiIdentifier(c) && !CanBeKeywordCharacter(c)
199 ?
static_cast<uint8_t
>(ScanFlags::kCannotBeKeyword)
203 :
static_cast<uint8_t
>(ScanFlags::kCannotBeKeywordStart)) |
207 (!IsAsciiIdentifier(c)
208 ?
static_cast<uint8_t
>(ScanFlags::kTerminatesLiteral)
211 ((c ==
'\'' || c ==
'"' || c ==
'\n' || c ==
'\r' || c ==
'\\')
212 ? static_cast<uint8_t>(ScanFlags::kStringTerminator)
215 (c ==
'\\' ? static_cast<uint8_t>(ScanFlags::kNeedsSlowPath) : 0);
217 inline bool TerminatesLiteral(uint8_t scan_flags) {
218 return (scan_flags & static_cast<uint8_t>(ScanFlags::kTerminatesLiteral));
220 inline bool CanBeKeyword(uint8_t scan_flags) {
221 return !(scan_flags &
static_cast<uint8_t
>(ScanFlags::kCannotBeKeyword));
223 inline bool NeedsSlowPath(uint8_t scan_flags) {
224 return (scan_flags & static_cast<uint8_t>(ScanFlags::kNeedsSlowPath));
226 inline bool MayTerminateString(uint8_t scan_flags) {
227 return (scan_flags & static_cast<uint8_t>(ScanFlags::kStringTerminator));
231 static constexpr
const uint8_t character_scan_flags[128] = {
232 #define CALL_GET_SCAN_FLAGS(N) GetScanFlags(N), 233 INT_0_TO_127_LIST(CALL_GET_SCAN_FLAGS)
234 #undef CALL_GET_SCAN_FLAGS 237 inline bool CharCanBeKeyword(uc32 c) {
238 return static_cast<uint32_t>(c) < arraysize(character_scan_flags) &&
239 CanBeKeyword(character_scan_flags[c]);
242 V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
243 DCHECK(IsIdentifierStart(c0_));
244 bool escaped =
false;
245 bool can_be_keyword =
true;
247 STATIC_ASSERT(arraysize(character_scan_flags) == kMaxAscii + 1);
248 if (V8_LIKELY(static_cast<uint32_t>(c0_) <= kMaxAscii)) {
249 if (V8_LIKELY(c0_ !=
'\\')) {
250 uint8_t scan_flags = character_scan_flags[c0_];
251 DCHECK(!TerminatesLiteral(scan_flags));
252 STATIC_ASSERT(static_cast<uint8_t>(ScanFlags::kCannotBeKeywordStart) ==
253 static_cast<uint8_t>(ScanFlags::kCannotBeKeyword) << 1);
257 DCHECK(!NeedsSlowPath(scan_flags));
258 AddLiteralChar(static_cast<char>(c0_));
259 AdvanceUntil([
this, &scan_flags](uc32 c0) {
260 if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
265 scan_flags |=
static_cast<uint8_t
>(ScanFlags::kNeedsSlowPath);
268 uint8_t char_flags = character_scan_flags[c0];
269 scan_flags |= char_flags;
270 if (TerminatesLiteral(char_flags)) {
273 AddLiteralChar(static_cast<char>(c0));
278 if (V8_LIKELY(!NeedsSlowPath(scan_flags))) {
279 if (!CanBeKeyword(scan_flags))
return Token::IDENTIFIER;
281 Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
282 return KeywordOrIdentifierToken(chars.start(), chars.length());
285 can_be_keyword = CanBeKeyword(scan_flags);
289 uc32 c = ScanIdentifierUnicodeEscape();
290 DCHECK(!IsIdentifierStart(-1));
291 if (c ==
'\\' || !IsIdentifierStart(c)) {
292 return Token::ILLEGAL;
295 can_be_keyword = CharCanBeKeyword(c);
299 return ScanIdentifierOrKeywordInnerSlow(escaped, can_be_keyword);
302 V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
303 int start_position = source_pos();
306 DCHECK(!IsWhiteSpaceOrLineTerminator(kEndOfInput));
309 while (IsWhiteSpaceOrLineTerminator(c0_)) {
310 if (!next().after_line_terminator && unibrow::IsLineTerminator(c0_)) {
311 next().after_line_terminator =
true;
317 if (source_pos() == start_position) {
319 return Token::ILLEGAL;
322 return Token::WHITESPACE;
325 V8_INLINE Token::Value Scanner::ScanSingleToken() {
328 next().location.beg_pos = source_pos();
330 if (V8_LIKELY(static_cast<unsigned>(c0_) <= kMaxAscii)) {
331 token = one_char_tokens[c0_];
340 case Token::CONDITIONAL:
342 case Token::SEMICOLON:
347 return Select(token);
355 if (c0_ ==
'=')
return Select(Token::LTE);
356 if (c0_ ==
'<')
return Select(
'=', Token::ASSIGN_SHL, Token::SHL);
358 token = ScanHtmlComment();
366 if (c0_ ==
'=')
return Select(Token::GTE);
370 if (c0_ ==
'=')
return Select(Token::ASSIGN_SAR);
371 if (c0_ ==
'>')
return Select(
'=', Token::ASSIGN_SHR, Token::SHR);
379 if (c0_ ==
'=')
return Select(
'=', Token::EQ_STRICT, Token::EQ);
380 if (c0_ ==
'>')
return Select(Token::ARROW);
381 return Token::ASSIGN;
386 if (c0_ ==
'=')
return Select(
'=', Token::NE_STRICT, Token::NE);
392 if (c0_ ==
'+')
return Select(Token::INC);
393 if (c0_ ==
'=')
return Select(Token::ASSIGN_ADD);
401 if (c0_ ==
'>' && next().after_line_terminator) {
404 token = SkipSingleHTMLComment();
409 if (c0_ ==
'=')
return Select(Token::ASSIGN_SUB);
415 if (c0_ ==
'*')
return Select(
'=', Token::ASSIGN_EXP, Token::EXP);
416 if (c0_ ==
'=')
return Select(Token::ASSIGN_MUL);
421 return Select(
'=', Token::ASSIGN_MOD, Token::MOD);
428 if (c ==
'#' || c ==
'@') {
431 token = SkipSourceURLComment();
434 token = SkipSingleLineComment();
438 token = SkipMultiLineComment();
441 if (c0_ ==
'=')
return Select(Token::ASSIGN_DIV);
447 if (c0_ ==
'&')
return Select(Token::AND);
448 if (c0_ ==
'=')
return Select(Token::ASSIGN_BIT_AND);
449 return Token::BIT_AND;
454 if (c0_ ==
'|')
return Select(Token::OR);
455 if (c0_ ==
'=')
return Select(Token::ASSIGN_BIT_OR);
456 return Token::BIT_OR;
460 return Select(
'=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
465 if (IsDecimalDigit(c0_))
return ScanNumber(
true);
470 return Token::ELLIPSIS;
473 return Token::PERIOD;
475 case Token::TEMPLATE_SPAN:
477 return ScanTemplateSpan();
479 case Token::PRIVATE_NAME:
480 return ScanPrivateName();
482 case Token::WHITESPACE:
483 token = SkipWhiteSpace();
487 return ScanNumber(
false);
489 case Token::IDENTIFIER:
490 return ScanIdentifierOrKeyword();
497 if (IsIdentifierStart(c0_) ||
498 (CombineSurrogatePair() && IsIdentifierStart(c0_))) {
499 return ScanIdentifierOrKeyword();
501 if (c0_ == kEndOfInput) {
502 return source_->has_parser_error() ? Token::ILLEGAL : Token::EOS;
504 token = SkipWhiteSpace();
507 }
while (token == Token::WHITESPACE);
512 void Scanner::Scan(TokenDesc* next_desc) {
513 DCHECK_EQ(next_desc, &next());
515 next_desc->token = ScanSingleToken();
516 DCHECK_IMPLIES(has_parser_error(), next_desc->token == Token::ILLEGAL);
517 next_desc->location.end_pos = source_pos();
520 SanityCheckTokenDesc(current());
521 SanityCheckTokenDesc(next());
522 SanityCheckTokenDesc(next_next());
526 void Scanner::Scan() { Scan(next_); }
531 #endif // V8_PARSING_SCANNER_INL_H_