V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
asm-scanner.h
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_ASMJS_ASM_SCANNER_H_
6 #define V8_ASMJS_ASM_SCANNER_H_
7 
8 #include <memory>
9 #include <string>
10 #include <unordered_map>
11 
12 #include "src/asmjs/asm-names.h"
13 #include "src/base/logging.h"
14 #include "src/globals.h"
15 
16 namespace v8 {
17 namespace internal {
18 
19 class Utf16CharacterStream;
20 
21 // A custom scanner to extract the token stream needed to parse valid
22 // asm.js: http://asmjs.org/spec/latest/
23 // This scanner intentionally avoids the portion of JavaScript lexing
24 // that are not required to determine if code is valid asm.js code.
25 // * Strings are disallowed except for 'use asm'.
26 // * Only the subset of keywords needed to check asm.js invariants are
27 // included.
28 // * Identifiers are accumulated into local + global string tables
29 // (for performance).
30 class V8_EXPORT_PRIVATE AsmJsScanner {
31  public:
32  typedef int32_t token_t;
33 
34  explicit AsmJsScanner(Utf16CharacterStream* stream);
35 
36  // Get current token.
37  token_t Token() const { return token_; }
38  // Get position of current token.
39  size_t Position() const { return position_; }
40  // Advance to the next token.
41  void Next();
42  // Back up by one token.
43  void Rewind();
44 
45  // Get raw string for current identifier. Note that the returned string will
46  // become invalid when the scanner advances, create a copy to preserve it.
47  const std::string& GetIdentifierString() const {
48  // Identifier strings don't work after a rewind.
49  DCHECK(!rewind_);
50  return identifier_string_;
51  }
52 
53  // Check if we just passed a newline.
54  bool IsPrecededByNewline() const {
55  // Newline tracking doesn't work if you back up.
56  DCHECK(!rewind_);
57  return preceded_by_newline_;
58  }
59 
60 #if DEBUG
61  // Debug only method to go from a token back to its name.
62  // Slow, only use for debugging.
63  std::string Name(token_t token) const;
64 #endif
65 
66  // Restores old position (token after that position). Note that it is not
67  // allowed to rewind right after a seek, because previous tokens are unknown.
68  void Seek(size_t pos);
69 
70  // Select whether identifiers are resolved in global or local scope,
71  // and which scope new identifiers are added to.
72  void EnterLocalScope() { in_local_scope_ = true; }
73  void EnterGlobalScope() { in_local_scope_ = false; }
74  // Drop all current local identifiers.
75  void ResetLocals();
76 
77  // Methods to check if a token is an identifier and which scope.
78  bool IsLocal() const { return IsLocal(Token()); }
79  bool IsGlobal() const { return IsGlobal(Token()); }
80  static bool IsLocal(token_t token) { return token <= kLocalsStart; }
81  static bool IsGlobal(token_t token) { return token >= kGlobalsStart; }
82  // Methods to find the index position of an identifier (count starting from
83  // 0 for each scope separately).
84  static size_t LocalIndex(token_t token) {
85  DCHECK(IsLocal(token));
86  return -(token - kLocalsStart);
87  }
88  static size_t GlobalIndex(token_t token) {
89  DCHECK(IsGlobal(token));
90  return token - kGlobalsStart;
91  }
92 
93  // Methods to check if the current token is a numeric literal considered an
94  // asm.js "double" (contains a dot) or an "unsigned" (without a dot). Note
95  // that numbers without a dot outside the [0 .. 2^32) range are errors.
96  bool IsUnsigned() const { return Token() == kUnsigned; }
97  uint32_t AsUnsigned() const {
98  DCHECK(IsUnsigned());
99  return unsigned_value_;
100  }
101  bool IsDouble() const { return Token() == kDouble; }
102  double AsDouble() const {
103  DCHECK(IsDouble());
104  return double_value_;
105  }
106 
107  // clang-format off
108  enum {
109  // [-10000-kMaxIdentifierCount, -10000) :: Local identifiers (counting
110  // backwards)
111  // [-10000 .. -1) :: Builtin tokens like keywords
112  // (also includes some special
113  // ones like end of input)
114  // 0 .. 255 :: Single char tokens
115  // 256 .. 256+kMaxIdentifierCount :: Global identifiers
116  kLocalsStart = -10000,
117 #define V(name, _junk1, _junk2, _junk3) kToken_##name,
118  STDLIB_MATH_FUNCTION_LIST(V)
119  STDLIB_ARRAY_TYPE_LIST(V)
120 #undef V
121 #define V(name, _junk1) kToken_##name,
122  STDLIB_MATH_VALUE_LIST(V)
123 #undef V
124 #define V(name) kToken_##name,
125  STDLIB_OTHER_LIST(V)
126  KEYWORD_NAME_LIST(V)
127 #undef V
128 #define V(rawname, name) kToken_##name,
129  LONG_SYMBOL_NAME_LIST(V)
130 #undef V
131 #define V(name, value, string_name) name = value,
132  SPECIAL_TOKEN_LIST(V)
133 #undef V
134  kGlobalsStart = 256,
135  };
136  // clang-format on
137 
138  private:
139  Utf16CharacterStream* stream_;
140  token_t token_;
141  token_t preceding_token_;
142  token_t next_token_; // Only set when in {rewind} state.
143  size_t position_; // Corresponds to {token} position.
144  size_t preceding_position_; // Corresponds to {preceding_token} position.
145  size_t next_position_; // Only set when in {rewind} state.
146  bool rewind_;
147  std::string identifier_string_;
148  bool in_local_scope_;
149  std::unordered_map<std::string, token_t> local_names_;
150  std::unordered_map<std::string, token_t> global_names_;
151  std::unordered_map<std::string, token_t> property_names_;
152  int global_count_;
153  double double_value_;
154  uint32_t unsigned_value_;
155  bool preceded_by_newline_;
156 
157  // Consume multiple characters.
158  void ConsumeIdentifier(uc32 ch);
159  void ConsumeNumber(uc32 ch);
160  bool ConsumeCComment();
161  void ConsumeCPPComment();
162  void ConsumeString(uc32 quote);
163  void ConsumeCompareOrShift(uc32 ch);
164 
165  // Classify character categories.
166  bool IsIdentifierStart(uc32 ch);
167  bool IsIdentifierPart(uc32 ch);
168  bool IsNumberStart(uc32 ch);
169 };
170 
171 } // namespace internal
172 } // namespace v8
173 
174 #endif // V8_ASMJS_ASM_SCANNER_H_
Definition: libplatform.h:13