7 #include <unordered_map> 8 #include <unordered_set> 10 #include "src/torque/ast.h" 11 #include "src/torque/earley-parser.h" 12 #include "src/torque/utils.h" 20 void UpdateSourcePosition(InputPosition from, InputPosition to,
21 SourcePosition* pos) {
35 base::Optional<ParseResult> Rule::RunAction(
const Item* completed_item,
36 const LexerResult& tokens)
const {
37 std::vector<ParseResult> results;
38 for (
const Item* child : completed_item->Children()) {
40 base::Optional<ParseResult> child_result =
41 child->left()->RunAction(child, tokens);
42 if (child_result) results.push_back(std::move(*child_result));
44 MatchedInput matched_input = completed_item->GetMatchedInput(tokens);
45 CurrentSourcePosition::Scope pos_scope(matched_input.pos);
46 ParseResultIterator iterator(std::move(results), matched_input);
47 return action_(&iterator);
50 Symbol& Symbol::operator=(std::initializer_list<Rule> rules) {
52 for (
const Rule& rule : rules) {
58 std::vector<const Item*> Item::Children()
const {
59 std::vector<const Item*> children;
60 for (
const Item* current =
this; current->prev_; current = current->prev_) {
61 children.push_back(current->child_);
64 std::reverse(children.begin(), children.end());
65 DCHECK_EQ(children.size(), right().size());
69 std::string Item::SplitByChildren(
const LexerResult& tokens)
const {
70 if (right().size() == 1) {
71 if (
const Item* child = Children()[0])
72 return child->SplitByChildren(tokens);
76 for (
const Item* item : Children()) {
79 s << item->GetMatchedInput(tokens).ToString();
85 void Item::CheckAmbiguity(
const Item& other,
const LexerResult& tokens)
const {
86 DCHECK(*
this == other);
87 if (child_ != other.child_) {
89 s <<
"Ambiguous grammer rules for \"" 90 << child_->GetMatchedInput(tokens).ToString() <<
"\":\n " 91 << child_->SplitByChildren(tokens) <<
"\nvs\n " 92 << other.child_->SplitByChildren(tokens);
95 if (prev_ != other.prev_) {
97 s <<
"Ambiguous grammer rules for \"" << GetMatchedInput(tokens).ToString()
98 <<
"\":\n " << SplitByChildren(tokens) <<
" ...\nvs\n " 99 << other.SplitByChildren(tokens) <<
" ...";
100 ReportError(s.str());
104 LexerResult Lexer::RunLexer(
const std::string& input) {
106 InputPosition
const begin = input.c_str();
107 InputPosition
const end = begin + input.size();
108 InputPosition pos = begin;
109 InputPosition token_start = pos;
110 CurrentSourcePosition::Scope scope(
111 SourcePosition{CurrentSourceFile::Get(), 0, 0});
112 match_whitespace_(&pos);
114 UpdateSourcePosition(token_start, pos, &CurrentSourcePosition::Get());
116 Symbol* symbol = MatchToken(&pos, end);
118 ReportError(
"Lexer Error: unknown token " +
119 StringLiteralQuote(std::string(
120 token_start, token_start + std::min<ptrdiff_t>(
121 end - token_start, 10))));
123 result.token_symbols.push_back(symbol);
124 result.token_contents.push_back(
125 {token_start, pos, CurrentSourcePosition::Get()});
126 match_whitespace_(&pos);
128 UpdateSourcePosition(token_start, pos, &CurrentSourcePosition::Get());
130 result.token_contents.push_back({pos, pos, CurrentSourcePosition::Get()});
134 Symbol* Lexer::MatchToken(InputPosition* pos, InputPosition end) {
135 InputPosition token_start = *pos;
136 Symbol* symbol =
nullptr;
138 for (std::pair<const PatternFunction, Symbol>& pair : patterns_) {
139 InputPosition token_end = token_start;
140 PatternFunction matchPattern = pair.first;
141 if (matchPattern(&token_end) && token_end > *pos) {
143 symbol = &pair.second;
148 if (*pos != token_start) {
149 auto found_keyword = keywords_.find(std::string(token_start, *pos));
150 if (found_keyword != keywords_.end()) {
151 return &found_keyword->second;
158 for (
auto it = keywords_.rbegin(); it != keywords_.rend(); ++it) {
159 const std::string& keyword = it->first;
160 if (static_cast<size_t>(end - *pos) < keyword.size())
continue;
161 if (keyword == std::string(*pos, *pos + keyword.size())) {
162 *pos += keyword.size();
171 const Item* RunEarleyAlgorithm(
172 Symbol* start,
const LexerResult& tokens,
173 std::unordered_set<Item, base::hash<Item>>* processed) {
175 std::vector<Item> worklist;
177 std::vector<Item> future_items;
178 CurrentSourcePosition::Scope source_position(
179 SourcePosition{CurrentSourceFile::Get(), 0, 0});
180 std::vector<const Item*> completed_items;
181 std::unordered_map<std::pair<size_t, Symbol*>, std::set<const Item*>,
182 base::hash<std::pair<size_t, Symbol*>>>
185 std::vector<const Item*> debug_trace;
191 top_level.AddRule(Rule({start}));
192 worklist.push_back(Item{top_level.rule(0), 0, 0, 0});
194 size_t input_length = tokens.token_symbols.size();
196 for (
size_t pos = 0; pos <= input_length; ++pos) {
197 while (!worklist.empty()) {
198 auto insert_result = processed->insert(worklist.back());
199 const Item& item = *insert_result.first;
200 DCHECK_EQ(pos, item.pos());
201 MatchedInput last_token = tokens.token_contents[pos];
202 CurrentSourcePosition::Get() = last_token.pos;
203 bool is_new = insert_result.second;
204 if (!is_new) item.CheckAmbiguity(worklist.back(), tokens);
206 if (!is_new)
continue;
208 debug_trace.push_back(&item);
209 if (item.IsComplete()) {
212 for (
const Item* parent : waiting[{item.start(), item.left()}]) {
213 worklist.push_back(parent->Advance(pos, &item));
216 Symbol* next = item.NextSymbol();
219 if (pos < tokens.token_symbols.size() &&
220 tokens.token_symbols[pos] == next) {
221 future_items.push_back(item.Advance(pos + 1,
nullptr));
224 if (!next->IsTerminal()) {
226 waiting[{pos, next}].insert(&item);
228 for (
size_t i = 0;
i < next->rule_number(); ++
i) {
229 Rule* rule = next->rule(
i);
230 auto already_completed =
231 processed->find(Item{rule, rule->right().size(), pos, pos});
240 if (already_completed != processed->end()) {
241 worklist.push_back(item.Advance(pos, &*already_completed));
243 worklist.push_back(Item{rule, 0, pos, pos});
248 std::swap(worklist, future_items);
252 processed->find(Item{top_level.rule(0), 1, 0, input_length});
253 if (final_item != processed->end()) {
255 return final_item->Children()[0];
258 const Item& last_item = *debug_trace.back();
259 if (last_item.pos() < tokens.token_symbols.size()) {
260 std::string next_token = tokens.token_contents[last_item.pos()].ToString();
261 reason =
"unexpected token \"" + next_token +
"\"";
263 reason =
"unexpected end of input";
265 ReportError(
"Parser Error: " + reason);
269 bool Grammar::MatchChar(
int (*char_class)(
int), InputPosition* pos) {
270 if (**pos && char_class(static_cast<unsigned char>(**pos))) {
278 bool Grammar::MatchChar(
bool (*char_class)(
char), InputPosition* pos) {
279 if (**pos && char_class(**pos)) {
287 bool Grammar::MatchString(
const char* s, InputPosition* pos) {
288 InputPosition current = *pos;
289 for (; *s != 0; ++s, ++current) {
290 if (*s != *current)
return false;
297 bool Grammar::MatchAnyChar(InputPosition* pos) {
298 return MatchChar([](
char c) {
return true; }, pos);