V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
instruction-scheduler.cc
1 // Copyright 2015 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/instruction-scheduler.h"
6 
7 #include "src/base/adapters.h"
8 #include "src/base/utils/random-number-generator.h"
9 #include "src/isolate.h"
10 
11 namespace v8 {
12 namespace internal {
13 namespace compiler {
14 
15 void InstructionScheduler::SchedulingQueueBase::AddNode(
16  ScheduleGraphNode* node) {
17  // We keep the ready list sorted by total latency so that we can quickly find
18  // the next best candidate to schedule.
19  auto it = nodes_.begin();
20  while ((it != nodes_.end()) &&
21  ((*it)->total_latency() >= node->total_latency())) {
22  ++it;
23  }
24  nodes_.insert(it, node);
25 }
26 
27 InstructionScheduler::ScheduleGraphNode*
28 InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
29  DCHECK(!IsEmpty());
30  auto candidate = nodes_.end();
31  for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
32  // We only consider instructions that have all their operands ready.
33  if (cycle >= (*iterator)->start_cycle()) {
34  candidate = iterator;
35  break;
36  }
37  }
38 
39  if (candidate != nodes_.end()) {
40  ScheduleGraphNode* result = *candidate;
41  nodes_.erase(candidate);
42  return result;
43  }
44 
45  return nullptr;
46 }
47 
48 InstructionScheduler::ScheduleGraphNode*
49 InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
50  DCHECK(!IsEmpty());
51  // Choose a random element from the ready list.
52  auto candidate = nodes_.begin();
53  std::advance(candidate, isolate()->random_number_generator()->NextInt(
54  static_cast<int>(nodes_.size())));
55  ScheduleGraphNode* result = *candidate;
56  nodes_.erase(candidate);
57  return result;
58 }
59 
60 InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(Zone* zone,
61  Instruction* instr)
62  : instr_(instr),
63  successors_(zone),
64  unscheduled_predecessors_count_(0),
65  latency_(GetInstructionLatency(instr)),
66  total_latency_(-1),
67  start_cycle_(-1) {}
68 
69 void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
70  ScheduleGraphNode* node) {
71  successors_.push_back(node);
72  node->unscheduled_predecessors_count_++;
73 }
74 
75 InstructionScheduler::InstructionScheduler(Zone* zone,
76  InstructionSequence* sequence)
77  : zone_(zone),
78  sequence_(sequence),
79  graph_(zone),
80  last_side_effect_instr_(nullptr),
81  pending_loads_(zone),
82  last_live_in_reg_marker_(nullptr),
83  last_deopt_or_trap_(nullptr),
84  operands_map_(zone) {}
85 
86 void InstructionScheduler::StartBlock(RpoNumber rpo) {
87  DCHECK(graph_.empty());
88  DCHECK_NULL(last_side_effect_instr_);
89  DCHECK(pending_loads_.empty());
90  DCHECK_NULL(last_live_in_reg_marker_);
91  DCHECK_NULL(last_deopt_or_trap_);
92  DCHECK(operands_map_.empty());
93  sequence()->StartBlock(rpo);
94 }
95 
96 void InstructionScheduler::EndBlock(RpoNumber rpo) {
97  if (FLAG_turbo_stress_instruction_scheduling) {
98  ScheduleBlock<StressSchedulerQueue>();
99  } else {
100  ScheduleBlock<CriticalPathFirstQueue>();
101  }
102  sequence()->EndBlock(rpo);
103  graph_.clear();
104  last_side_effect_instr_ = nullptr;
105  pending_loads_.clear();
106  last_live_in_reg_marker_ = nullptr;
107  last_deopt_or_trap_ = nullptr;
108  operands_map_.clear();
109 }
110 
111 void InstructionScheduler::AddTerminator(Instruction* instr) {
112  ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
113  // Make sure that basic block terminators are not moved by adding them
114  // as successor of every instruction.
115  for (ScheduleGraphNode* node : graph_) {
116  node->AddSuccessor(new_node);
117  }
118  graph_.push_back(new_node);
119 }
120 
121 void InstructionScheduler::AddInstruction(Instruction* instr) {
122  ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);
123 
124  // We should not have branches in the middle of a block.
125  DCHECK_NE(instr->flags_mode(), kFlags_branch);
126  DCHECK_NE(instr->flags_mode(), kFlags_branch_and_poison);
127 
128  if (IsFixedRegisterParameter(instr)) {
129  if (last_live_in_reg_marker_ != nullptr) {
130  last_live_in_reg_marker_->AddSuccessor(new_node);
131  }
132  last_live_in_reg_marker_ = new_node;
133  } else {
134  if (last_live_in_reg_marker_ != nullptr) {
135  last_live_in_reg_marker_->AddSuccessor(new_node);
136  }
137 
138  // Make sure that instructions are not scheduled before the last
139  // deoptimization or trap point when they depend on it.
140  if ((last_deopt_or_trap_ != nullptr) && DependsOnDeoptOrTrap(instr)) {
141  last_deopt_or_trap_->AddSuccessor(new_node);
142  }
143 
144  // Instructions with side effects and memory operations can't be
145  // reordered with respect to each other.
146  if (HasSideEffect(instr)) {
147  if (last_side_effect_instr_ != nullptr) {
148  last_side_effect_instr_->AddSuccessor(new_node);
149  }
150  for (ScheduleGraphNode* load : pending_loads_) {
151  load->AddSuccessor(new_node);
152  }
153  pending_loads_.clear();
154  last_side_effect_instr_ = new_node;
155  } else if (IsLoadOperation(instr)) {
156  // Load operations can't be reordered with side effects instructions but
157  // independent loads can be reordered with respect to each other.
158  if (last_side_effect_instr_ != nullptr) {
159  last_side_effect_instr_->AddSuccessor(new_node);
160  }
161  pending_loads_.push_back(new_node);
162  } else if (instr->IsDeoptimizeCall() || instr->IsTrap()) {
163  // Ensure that deopts or traps are not reordered with respect to
164  // side-effect instructions.
165  if (last_side_effect_instr_ != nullptr) {
166  last_side_effect_instr_->AddSuccessor(new_node);
167  }
168  last_deopt_or_trap_ = new_node;
169  }
170 
171  // Look for operand dependencies.
172  for (size_t i = 0; i < instr->InputCount(); ++i) {
173  const InstructionOperand* input = instr->InputAt(i);
174  if (input->IsUnallocated()) {
175  int32_t vreg = UnallocatedOperand::cast(input)->virtual_register();
176  auto it = operands_map_.find(vreg);
177  if (it != operands_map_.end()) {
178  it->second->AddSuccessor(new_node);
179  }
180  }
181  }
182 
183  // Record the virtual registers defined by this instruction.
184  for (size_t i = 0; i < instr->OutputCount(); ++i) {
185  const InstructionOperand* output = instr->OutputAt(i);
186  if (output->IsUnallocated()) {
187  operands_map_[UnallocatedOperand::cast(output)->virtual_register()] =
188  new_node;
189  } else if (output->IsConstant()) {
190  operands_map_[ConstantOperand::cast(output)->virtual_register()] =
191  new_node;
192  }
193  }
194  }
195 
196  graph_.push_back(new_node);
197 }
198 
199 template <typename QueueType>
200 void InstructionScheduler::ScheduleBlock() {
201  QueueType ready_list(this);
202 
203  // Compute total latencies so that we can schedule the critical path first.
204  ComputeTotalLatencies();
205 
206  // Add nodes which don't have dependencies to the ready list.
207  for (ScheduleGraphNode* node : graph_) {
208  if (!node->HasUnscheduledPredecessor()) {
209  ready_list.AddNode(node);
210  }
211  }
212 
213  // Go through the ready list and schedule the instructions.
214  int cycle = 0;
215  while (!ready_list.IsEmpty()) {
216  ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
217 
218  if (candidate != nullptr) {
219  sequence()->AddInstruction(candidate->instruction());
220 
221  for (ScheduleGraphNode* successor : candidate->successors()) {
222  successor->DropUnscheduledPredecessor();
223  successor->set_start_cycle(
224  std::max(successor->start_cycle(), cycle + candidate->latency()));
225 
226  if (!successor->HasUnscheduledPredecessor()) {
227  ready_list.AddNode(successor);
228  }
229  }
230  }
231 
232  cycle++;
233  }
234 }
235 
236 int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
237  switch (instr->arch_opcode()) {
238  case kArchNop:
239  case kArchFramePointer:
240  case kArchParentFramePointer:
241  case kArchStackSlot: // Despite its name this opcode will produce a
242  // reference to a frame slot, so it is not affected
243  // by the arm64 dual stack issues mentioned below.
244  case kArchComment:
245  case kArchDeoptimize:
246  case kArchJmp:
247  case kArchBinarySearchSwitch:
248  case kArchLookupSwitch:
249  case kArchRet:
250  case kArchTableSwitch:
251  case kArchThrowTerminator:
252  return kNoOpcodeFlags;
253 
254  case kArchTruncateDoubleToI:
255  case kIeee754Float64Acos:
256  case kIeee754Float64Acosh:
257  case kIeee754Float64Asin:
258  case kIeee754Float64Asinh:
259  case kIeee754Float64Atan:
260  case kIeee754Float64Atanh:
261  case kIeee754Float64Atan2:
262  case kIeee754Float64Cbrt:
263  case kIeee754Float64Cos:
264  case kIeee754Float64Cosh:
265  case kIeee754Float64Exp:
266  case kIeee754Float64Expm1:
267  case kIeee754Float64Log:
268  case kIeee754Float64Log1p:
269  case kIeee754Float64Log10:
270  case kIeee754Float64Log2:
271  case kIeee754Float64Pow:
272  case kIeee754Float64Sin:
273  case kIeee754Float64Sinh:
274  case kIeee754Float64Tan:
275  case kIeee754Float64Tanh:
276  return kNoOpcodeFlags;
277 
278  case kArchStackPointer:
279  // ArchStackPointer instruction loads the current stack pointer value and
280  // must not be reordered with instruction with side effects.
281  return kIsLoadOperation;
282 
283  case kArchWordPoisonOnSpeculation:
284  // While poisoning operations have no side effect, they must not be
285  // reordered relative to branches.
286  return kHasSideEffect;
287 
288  case kArchPrepareCallCFunction:
289  case kArchSaveCallerRegisters:
290  case kArchRestoreCallerRegisters:
291  case kArchPrepareTailCall:
292  case kArchCallCFunction:
293  case kArchCallCodeObject:
294  case kArchCallJSFunction:
295  case kArchCallWasmFunction:
296  case kArchTailCallCodeObjectFromJSFunction:
297  case kArchTailCallCodeObject:
298  case kArchTailCallAddress:
299  case kArchTailCallWasm:
300  case kArchDebugAbort:
301  case kArchDebugBreak:
302  return kHasSideEffect;
303 
304  case kArchStoreWithWriteBarrier:
305  return kHasSideEffect;
306 
307  case kWord32AtomicLoadInt8:
308  case kWord32AtomicLoadUint8:
309  case kWord32AtomicLoadInt16:
310  case kWord32AtomicLoadUint16:
311  case kWord32AtomicLoadWord32:
312  return kIsLoadOperation;
313 
314  case kWord32AtomicStoreWord8:
315  case kWord32AtomicStoreWord16:
316  case kWord32AtomicStoreWord32:
317  return kHasSideEffect;
318 
319  case kWord32AtomicExchangeInt8:
320  case kWord32AtomicExchangeUint8:
321  case kWord32AtomicExchangeInt16:
322  case kWord32AtomicExchangeUint16:
323  case kWord32AtomicExchangeWord32:
324  case kWord32AtomicCompareExchangeInt8:
325  case kWord32AtomicCompareExchangeUint8:
326  case kWord32AtomicCompareExchangeInt16:
327  case kWord32AtomicCompareExchangeUint16:
328  case kWord32AtomicCompareExchangeWord32:
329  case kWord32AtomicAddInt8:
330  case kWord32AtomicAddUint8:
331  case kWord32AtomicAddInt16:
332  case kWord32AtomicAddUint16:
333  case kWord32AtomicAddWord32:
334  case kWord32AtomicSubInt8:
335  case kWord32AtomicSubUint8:
336  case kWord32AtomicSubInt16:
337  case kWord32AtomicSubUint16:
338  case kWord32AtomicSubWord32:
339  case kWord32AtomicAndInt8:
340  case kWord32AtomicAndUint8:
341  case kWord32AtomicAndInt16:
342  case kWord32AtomicAndUint16:
343  case kWord32AtomicAndWord32:
344  case kWord32AtomicOrInt8:
345  case kWord32AtomicOrUint8:
346  case kWord32AtomicOrInt16:
347  case kWord32AtomicOrUint16:
348  case kWord32AtomicOrWord32:
349  case kWord32AtomicXorInt8:
350  case kWord32AtomicXorUint8:
351  case kWord32AtomicXorInt16:
352  case kWord32AtomicXorUint16:
353  case kWord32AtomicXorWord32:
354  return kHasSideEffect;
355 
356 #define CASE(Name) case k##Name:
357  TARGET_ARCH_OPCODE_LIST(CASE)
358 #undef CASE
359  return GetTargetInstructionFlags(instr);
360  }
361 
362  UNREACHABLE();
363 }
364 
365 void InstructionScheduler::ComputeTotalLatencies() {
366  for (ScheduleGraphNode* node : base::Reversed(graph_)) {
367  int max_latency = 0;
368 
369  for (ScheduleGraphNode* successor : node->successors()) {
370  DCHECK_NE(-1, successor->total_latency());
371  if (successor->total_latency() > max_latency) {
372  max_latency = successor->total_latency();
373  }
374  }
375 
376  node->set_total_latency(max_latency + node->latency());
377  }
378 }
379 
380 } // namespace compiler
381 } // namespace internal
382 } // namespace v8
Definition: libplatform.h:13