V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
asm-scanner.cc
1 // Copyright 2017 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/asmjs/asm-scanner.h"
6 
7 #include "src/char-predicates-inl.h"
8 #include "src/conversions.h"
9 #include "src/flags.h"
10 #include "src/parsing/scanner.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 namespace {
16 // Cap number of identifiers to ensure we can assign both global and
17 // local ones a token id in the range of an int32_t.
18 static const int kMaxIdentifierCount = 0xF000000;
19 };
20 
21 AsmJsScanner::AsmJsScanner(Utf16CharacterStream* stream)
22  : stream_(stream),
23  token_(kUninitialized),
24  preceding_token_(kUninitialized),
25  next_token_(kUninitialized),
26  position_(0),
27  preceding_position_(0),
28  next_position_(0),
29  rewind_(false),
30  in_local_scope_(false),
31  global_count_(0),
32  double_value_(0.0),
33  unsigned_value_(0),
34  preceded_by_newline_(false) {
35 #define V(name, _junk1, _junk2, _junk3) property_names_[#name] = kToken_##name;
36  STDLIB_MATH_FUNCTION_LIST(V)
37  STDLIB_ARRAY_TYPE_LIST(V)
38 #undef V
39 #define V(name, _junk1) property_names_[#name] = kToken_##name;
40  STDLIB_MATH_VALUE_LIST(V)
41 #undef V
42 #define V(name) property_names_[#name] = kToken_##name;
43  STDLIB_OTHER_LIST(V)
44 #undef V
45 #define V(name) global_names_[#name] = kToken_##name;
46  KEYWORD_NAME_LIST(V)
47 #undef V
48  Next();
49 }
50 
51 void AsmJsScanner::Next() {
52  if (rewind_) {
53  preceding_token_ = token_;
54  preceding_position_ = position_;
55  token_ = next_token_;
56  position_ = next_position_;
57  next_token_ = kUninitialized;
58  next_position_ = 0;
59  rewind_ = false;
60  return;
61  }
62 
63  if (token_ == kEndOfInput || token_ == kParseError) {
64  return;
65  }
66 
67 #if DEBUG
68  if (FLAG_trace_asm_scanner) {
69  if (Token() == kDouble) {
70  PrintF("%lf ", AsDouble());
71  } else if (Token() == kUnsigned) {
72  PrintF("%" PRIu32 " ", AsUnsigned());
73  } else {
74  std::string name = Name(Token());
75  PrintF("%s ", name.c_str());
76  }
77  }
78 #endif
79 
80  preceded_by_newline_ = false;
81  preceding_token_ = token_;
82  preceding_position_ = position_;
83 
84  for (;;) {
85  position_ = stream_->pos();
86  uc32 ch = stream_->Advance();
87  switch (ch) {
88  case ' ':
89  case '\t':
90  case '\r':
91  // Ignore whitespace.
92  break;
93 
94  case '\n':
95  // Track when we've passed a newline for optional semicolon support,
96  // but keep scanning.
97  preceded_by_newline_ = true;
98  break;
99 
100  case kEndOfInput:
101  token_ = kEndOfInput;
102  return;
103 
104  case '\'':
105  case '"':
106  ConsumeString(ch);
107  return;
108 
109  case '/':
110  ch = stream_->Advance();
111  if (ch == '/') {
112  ConsumeCPPComment();
113  } else if (ch == '*') {
114  if (!ConsumeCComment()) {
115  token_ = kParseError;
116  return;
117  }
118  } else {
119  stream_->Back();
120  token_ = '/';
121  return;
122  }
123  // Breaks out of switch, but loops again (i.e. the case when we parsed
124  // a comment, but need to continue to look for the next token).
125  break;
126 
127  case '<':
128  case '>':
129  case '=':
130  case '!':
131  ConsumeCompareOrShift(ch);
132  return;
133 
134 #define V(single_char_token) case single_char_token:
135  SIMPLE_SINGLE_TOKEN_LIST(V)
136 #undef V
137  // Use fixed token IDs for ASCII.
138  token_ = ch;
139  return;
140 
141  default:
142  if (IsIdentifierStart(ch)) {
143  ConsumeIdentifier(ch);
144  } else if (IsNumberStart(ch)) {
145  ConsumeNumber(ch);
146  } else {
147  // TODO(bradnelson): Support unicode (probably via UnicodeCache).
148  token_ = kParseError;
149  }
150  return;
151  }
152  }
153 }
154 
155 void AsmJsScanner::Rewind() {
156  DCHECK_NE(kUninitialized, preceding_token_);
157  // TODO(bradnelson): Currently rewinding needs to leave in place the
158  // preceding newline state (in case a |0 ends a line).
159  // This is weird and stateful, fix me.
160  DCHECK(!rewind_);
161  next_token_ = token_;
162  next_position_ = position_;
163  token_ = preceding_token_;
164  position_ = preceding_position_;
165  preceding_token_ = kUninitialized;
166  preceding_position_ = 0;
167  rewind_ = true;
168  identifier_string_.clear();
169 }
170 
171 void AsmJsScanner::ResetLocals() { local_names_.clear(); }
172 
173 #if DEBUG
174 // Only used for debugging.
175 std::string AsmJsScanner::Name(token_t token) const {
176  if (token >= 32 && token < 127) {
177  return std::string(1, static_cast<char>(token));
178  }
179  for (auto& i : local_names_) {
180  if (i.second == token) {
181  return i.first;
182  }
183  }
184  for (auto& i : global_names_) {
185  if (i.second == token) {
186  return i.first;
187  }
188  }
189  for (auto& i : property_names_) {
190  if (i.second == token) {
191  return i.first;
192  }
193  }
194  switch (token) {
195 #define V(rawname, name) \
196  case kToken_##name: \
197  return rawname;
198  LONG_SYMBOL_NAME_LIST(V)
199 #undef V
200 #define V(name, value, string_name) \
201  case name: \
202  return string_name;
203  SPECIAL_TOKEN_LIST(V)
204  default:
205  break;
206 #undef V
207  }
208  UNREACHABLE();
209 }
210 #endif
211 
212 void AsmJsScanner::Seek(size_t pos) {
213  stream_->Seek(pos);
214  preceding_token_ = kUninitialized;
215  token_ = kUninitialized;
216  next_token_ = kUninitialized;
217  preceding_position_ = 0;
218  position_ = 0;
219  next_position_ = 0;
220  rewind_ = false;
221  Next();
222 }
223 
224 void AsmJsScanner::ConsumeIdentifier(uc32 ch) {
225  // Consume characters while still part of the identifier.
226  identifier_string_.clear();
227  while (IsIdentifierPart(ch)) {
228  identifier_string_ += ch;
229  ch = stream_->Advance();
230  }
231  // Go back one for next time.
232  stream_->Back();
233 
234  // Decode what the identifier means.
235  if (preceding_token_ == '.') {
236  auto i = property_names_.find(identifier_string_);
237  if (i != property_names_.end()) {
238  token_ = i->second;
239  return;
240  }
241  } else {
242  {
243  auto i = local_names_.find(identifier_string_);
244  if (i != local_names_.end()) {
245  token_ = i->second;
246  return;
247  }
248  }
249  if (!in_local_scope_) {
250  auto i = global_names_.find(identifier_string_);
251  if (i != global_names_.end()) {
252  token_ = i->second;
253  return;
254  }
255  }
256  }
257  if (preceding_token_ == '.') {
258  CHECK_LT(global_count_, kMaxIdentifierCount);
259  token_ = kGlobalsStart + global_count_++;
260  property_names_[identifier_string_] = token_;
261  } else if (in_local_scope_) {
262  CHECK_LT(local_names_.size(), kMaxIdentifierCount);
263  token_ = kLocalsStart - static_cast<token_t>(local_names_.size());
264  local_names_[identifier_string_] = token_;
265  } else {
266  CHECK_LT(global_count_, kMaxIdentifierCount);
267  token_ = kGlobalsStart + global_count_++;
268  global_names_[identifier_string_] = token_;
269  }
270 }
271 
272 void AsmJsScanner::ConsumeNumber(uc32 ch) {
273  std::string number;
274  number = ch;
275  bool has_dot = ch == '.';
276  bool has_prefix = false;
277  for (;;) {
278  ch = stream_->Advance();
279  if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f') ||
280  (ch >= 'A' && ch <= 'F') || ch == '.' || ch == 'b' || ch == 'o' ||
281  ch == 'x' ||
282  ((ch == '-' || ch == '+') && !has_prefix &&
283  (number[number.size() - 1] == 'e' ||
284  number[number.size() - 1] == 'E'))) {
285  // TODO(bradnelson): Test weird cases ending in -.
286  if (ch == '.') {
287  has_dot = true;
288  }
289  if (ch == 'b' || ch == 'o' || ch == 'x') {
290  has_prefix = true;
291  }
292  number.push_back(ch);
293  } else {
294  break;
295  }
296  }
297  stream_->Back();
298  // Special case the most common number.
299  if (number.size() == 1 && number[0] == '0') {
300  unsigned_value_ = 0;
301  token_ = kUnsigned;
302  return;
303  }
304  // Pick out dot.
305  if (number.size() == 1 && number[0] == '.') {
306  token_ = '.';
307  return;
308  }
309  // Decode numbers.
310  double_value_ = StringToDouble(
311  Vector<const uint8_t>::cast(VectorOf(number)),
312  ALLOW_HEX | ALLOW_OCTAL | ALLOW_BINARY | ALLOW_IMPLICIT_OCTAL);
313  if (std::isnan(double_value_)) {
314  // Check if string to number conversion didn't consume all the characters.
315  // This happens if the character filter let through something invalid
316  // like: 0123ef for example.
317  // TODO(bradnelson): Check if this happens often enough to be a perf
318  // problem.
319  if (number[0] == '.') {
320  for (size_t k = 1; k < number.size(); ++k) {
321  stream_->Back();
322  }
323  token_ = '.';
324  return;
325  }
326  // Anything else that doesn't parse is an error.
327  token_ = kParseError;
328  return;
329  }
330  if (has_dot) {
331  token_ = kDouble;
332  } else {
333  // Exceeding safe integer range is an error.
334  if (double_value_ > static_cast<double>(kMaxUInt32)) {
335  token_ = kParseError;
336  return;
337  }
338  unsigned_value_ = static_cast<uint32_t>(double_value_);
339  token_ = kUnsigned;
340  }
341 }
342 
343 bool AsmJsScanner::ConsumeCComment() {
344  for (;;) {
345  uc32 ch = stream_->Advance();
346  while (ch == '*') {
347  ch = stream_->Advance();
348  if (ch == '/') {
349  return true;
350  }
351  }
352  if (ch == '\n') {
353  preceded_by_newline_ = true;
354  }
355  if (ch == kEndOfInput) {
356  return false;
357  }
358  }
359 }
360 
361 void AsmJsScanner::ConsumeCPPComment() {
362  for (;;) {
363  uc32 ch = stream_->Advance();
364  if (ch == '\n') {
365  preceded_by_newline_ = true;
366  return;
367  }
368  if (ch == kEndOfInput) {
369  return;
370  }
371  }
372 }
373 
374 void AsmJsScanner::ConsumeString(uc32 quote) {
375  // Only string allowed is 'use asm' / "use asm".
376  const char* expected = "use asm";
377  for (; *expected != '\0'; ++expected) {
378  if (stream_->Advance() != *expected) {
379  token_ = kParseError;
380  return;
381  }
382  }
383  if (stream_->Advance() != quote) {
384  token_ = kParseError;
385  return;
386  }
387  token_ = kToken_UseAsm;
388 }
389 
390 void AsmJsScanner::ConsumeCompareOrShift(uc32 ch) {
391  uc32 next_ch = stream_->Advance();
392  if (next_ch == '=') {
393  switch (ch) {
394  case '<':
395  token_ = kToken_LE;
396  break;
397  case '>':
398  token_ = kToken_GE;
399  break;
400  case '=':
401  token_ = kToken_EQ;
402  break;
403  case '!':
404  token_ = kToken_NE;
405  break;
406  default:
407  UNREACHABLE();
408  }
409  } else if (ch == '<' && next_ch == '<') {
410  token_ = kToken_SHL;
411  } else if (ch == '>' && next_ch == '>') {
412  if (stream_->Advance() == '>') {
413  token_ = kToken_SHR;
414  } else {
415  token_ = kToken_SAR;
416  stream_->Back();
417  }
418  } else {
419  stream_->Back();
420  token_ = ch;
421  }
422 }
423 
424 bool AsmJsScanner::IsIdentifierStart(uc32 ch) {
425  return IsInRange(AsciiAlphaToLower(ch), 'a', 'z') || ch == '_' || ch == '$';
426 }
427 
428 bool AsmJsScanner::IsIdentifierPart(uc32 ch) { return IsAsciiIdentifier(ch); }
429 
430 bool AsmJsScanner::IsNumberStart(uc32 ch) {
431  return ch == '.' || IsDecimalDigit(ch);
432 }
433 
434 } // namespace internal
435 } // namespace v8
Definition: libplatform.h:13