V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
code-generator-x64.cc
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/code-generator.h"
6 
7 #include <limits>
8 
9 #include "src/compiler/backend/code-generator-impl.h"
10 #include "src/compiler/backend/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/heap/heap-inl.h" // crbug.com/v8/8499
14 #include "src/macro-assembler.h"
15 #include "src/objects/smi.h"
16 #include "src/optimized-compilation-info.h"
17 #include "src/wasm/wasm-code-manager.h"
18 #include "src/wasm/wasm-objects.h"
19 #include "src/x64/assembler-x64.h"
20 
21 namespace v8 {
22 namespace internal {
23 namespace compiler {
24 
25 #define __ tasm()->
26 
27 // Adds X64 specific methods for decoding operands.
29  public:
31  : InstructionOperandConverter(gen, instr) {}
32 
33  Immediate InputImmediate(size_t index) {
34  return ToImmediate(instr_->InputAt(index));
35  }
36 
37  Operand InputOperand(size_t index, int extra = 0) {
38  return ToOperand(instr_->InputAt(index), extra);
39  }
40 
41  Operand OutputOperand() { return ToOperand(instr_->Output()); }
42 
43  Immediate ToImmediate(InstructionOperand* operand) {
44  Constant constant = ToConstant(operand);
45  if (constant.type() == Constant::kFloat64) {
46  DCHECK_EQ(0, constant.ToFloat64().AsUint64());
47  return Immediate(0);
48  }
49  if (RelocInfo::IsWasmReference(constant.rmode())) {
50  return Immediate(constant.ToInt32(), constant.rmode());
51  }
52  return Immediate(constant.ToInt32());
53  }
54 
55  Operand ToOperand(InstructionOperand* op, int extra = 0) {
56  DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
57  return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
58  }
59 
60  Operand SlotToOperand(int slot_index, int extra = 0) {
61  FrameOffset offset = frame_access_state()->GetFrameOffset(slot_index);
62  return Operand(offset.from_stack_pointer() ? rsp : rbp,
63  offset.offset() + extra);
64  }
65 
66  static size_t NextOffset(size_t* offset) {
67  size_t i = *offset;
68  (*offset)++;
69  return i;
70  }
71 
72  static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
73  STATIC_ASSERT(0 == static_cast<int>(times_1));
74  STATIC_ASSERT(1 == static_cast<int>(times_2));
75  STATIC_ASSERT(2 == static_cast<int>(times_4));
76  STATIC_ASSERT(3 == static_cast<int>(times_8));
77  int scale = static_cast<int>(mode - one);
78  DCHECK(scale >= 0 && scale < 4);
79  return static_cast<ScaleFactor>(scale);
80  }
81 
82  Operand MemoryOperand(size_t* offset) {
83  AddressingMode mode = AddressingModeField::decode(instr_->opcode());
84  switch (mode) {
85  case kMode_MR: {
86  Register base = InputRegister(NextOffset(offset));
87  int32_t disp = 0;
88  return Operand(base, disp);
89  }
90  case kMode_MRI: {
91  Register base = InputRegister(NextOffset(offset));
92  int32_t disp = InputInt32(NextOffset(offset));
93  return Operand(base, disp);
94  }
95  case kMode_MR1:
96  case kMode_MR2:
97  case kMode_MR4:
98  case kMode_MR8: {
99  Register base = InputRegister(NextOffset(offset));
100  Register index = InputRegister(NextOffset(offset));
101  ScaleFactor scale = ScaleFor(kMode_MR1, mode);
102  int32_t disp = 0;
103  return Operand(base, index, scale, disp);
104  }
105  case kMode_MR1I:
106  case kMode_MR2I:
107  case kMode_MR4I:
108  case kMode_MR8I: {
109  Register base = InputRegister(NextOffset(offset));
110  Register index = InputRegister(NextOffset(offset));
111  ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
112  int32_t disp = InputInt32(NextOffset(offset));
113  return Operand(base, index, scale, disp);
114  }
115  case kMode_M1: {
116  Register base = InputRegister(NextOffset(offset));
117  int32_t disp = 0;
118  return Operand(base, disp);
119  }
120  case kMode_M2:
121  UNREACHABLE(); // Should use kModeMR with more compact encoding instead
122  return Operand(no_reg, 0);
123  case kMode_M4:
124  case kMode_M8: {
125  Register index = InputRegister(NextOffset(offset));
126  ScaleFactor scale = ScaleFor(kMode_M1, mode);
127  int32_t disp = 0;
128  return Operand(index, scale, disp);
129  }
130  case kMode_M1I:
131  case kMode_M2I:
132  case kMode_M4I:
133  case kMode_M8I: {
134  Register index = InputRegister(NextOffset(offset));
135  ScaleFactor scale = ScaleFor(kMode_M1I, mode);
136  int32_t disp = InputInt32(NextOffset(offset));
137  return Operand(index, scale, disp);
138  }
139  case kMode_Root: {
140  Register base = kRootRegister;
141  int32_t disp = InputInt32(NextOffset(offset));
142  return Operand(base, disp);
143  }
144  case kMode_None:
145  UNREACHABLE();
146  }
147  UNREACHABLE();
148  }
149 
150  Operand MemoryOperand(size_t first_input = 0) {
151  return MemoryOperand(&first_input);
152  }
153 };
154 
155 namespace {
156 
157 bool HasImmediateInput(Instruction* instr, size_t index) {
158  return instr->InputAt(index)->IsImmediate();
159 }
160 
161 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
162  public:
163  OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
164  : OutOfLineCode(gen), result_(result) {}
165 
166  void Generate() final {
167  __ Xorps(result_, result_);
168  __ Divss(result_, result_);
169  }
170 
171  private:
172  XMMRegister const result_;
173 };
174 
175 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
176  public:
177  OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
178  : OutOfLineCode(gen), result_(result) {}
179 
180  void Generate() final {
181  __ Xorpd(result_, result_);
182  __ Divsd(result_, result_);
183  }
184 
185  private:
186  XMMRegister const result_;
187 };
188 
189 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
190  public:
191  OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
192  XMMRegister input, StubCallMode stub_mode,
193  UnwindingInfoWriter* unwinding_info_writer)
194  : OutOfLineCode(gen),
195  result_(result),
196  input_(input),
197  stub_mode_(stub_mode),
198  unwinding_info_writer_(unwinding_info_writer),
199  isolate_(gen->isolate()),
200  zone_(gen->zone()) {}
201 
202  void Generate() final {
203  __ subp(rsp, Immediate(kDoubleSize));
204  unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
205  kDoubleSize);
206  __ Movsd(MemOperand(rsp, 0), input_);
207  if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
208  // A direct call to a wasm runtime stub defined in this module.
209  // Just encode the stub index. This will be patched when the code
210  // is added to the native module and copied into wasm code space.
211  __ near_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
212  } else {
213  __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
214  }
215  __ movl(result_, MemOperand(rsp, 0));
216  __ addp(rsp, Immediate(kDoubleSize));
217  unwinding_info_writer_->MaybeIncreaseBaseOffsetAt(__ pc_offset(),
218  -kDoubleSize);
219  }
220 
221  private:
222  Register const result_;
223  XMMRegister const input_;
224  StubCallMode stub_mode_;
225  UnwindingInfoWriter* const unwinding_info_writer_;
226  Isolate* isolate_;
227  Zone* zone_;
228 };
229 
230 class OutOfLineRecordWrite final : public OutOfLineCode {
231  public:
232  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
233  Register value, Register scratch0, Register scratch1,
234  RecordWriteMode mode, StubCallMode stub_mode)
235  : OutOfLineCode(gen),
236  object_(object),
237  operand_(operand),
238  value_(value),
239  scratch0_(scratch0),
240  scratch1_(scratch1),
241  mode_(mode),
242  stub_mode_(stub_mode),
243  zone_(gen->zone()) {}
244 
245  void Generate() final {
246  if (mode_ > RecordWriteMode::kValueIsPointer) {
247  __ JumpIfSmi(value_, exit());
248  }
249  __ CheckPageFlag(value_, scratch0_,
250  MemoryChunk::kPointersToHereAreInterestingMask, zero,
251  exit());
252  __ leap(scratch1_, operand_);
253 
254  RememberedSetAction const remembered_set_action =
255  mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
256  : OMIT_REMEMBERED_SET;
257  SaveFPRegsMode const save_fp_mode =
258  frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
259 
260  if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
261  // A direct call to a wasm runtime stub defined in this module.
262  // Just encode the stub index. This will be patched when the code
263  // is added to the native module and copied into wasm code space.
264  __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
265  save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
266  } else {
267  __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
268  save_fp_mode);
269  }
270  }
271 
272  private:
273  Register const object_;
274  Operand const operand_;
275  Register const value_;
276  Register const scratch0_;
277  Register const scratch1_;
278  RecordWriteMode const mode_;
279  StubCallMode const stub_mode_;
280  Zone* zone_;
281 };
282 
283 class WasmOutOfLineTrap : public OutOfLineCode {
284  public:
285  WasmOutOfLineTrap(CodeGenerator* gen, Instruction* instr)
286  : OutOfLineCode(gen), gen_(gen), instr_(instr) {}
287 
288  void Generate() override {
289  X64OperandConverter i(gen_, instr_);
290  TrapId trap_id =
291  static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
292  GenerateWithTrapId(trap_id);
293  }
294 
295  protected:
296  CodeGenerator* gen_;
297 
298  void GenerateWithTrapId(TrapId trap_id) { GenerateCallToTrap(trap_id); }
299 
300  private:
301  void GenerateCallToTrap(TrapId trap_id) {
302  if (!gen_->wasm_runtime_exception_support()) {
303  // We cannot test calls to the runtime in cctest/test-run-wasm.
304  // Therefore we emit a call to C here instead of a call to the runtime.
305  __ PrepareCallCFunction(0);
306  __ CallCFunction(ExternalReference::wasm_call_trap_callback_for_testing(),
307  0);
308  __ LeaveFrame(StackFrame::WASM_COMPILED);
309  auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
310  size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
311  // Use rcx as a scratch register, we return anyways immediately.
312  __ Ret(static_cast<int>(pop_size), rcx);
313  } else {
314  gen_->AssembleSourcePosition(instr_);
315  // A direct call to a wasm runtime stub defined in this module.
316  // Just encode the stub index. This will be patched when the code
317  // is added to the native module and copied into wasm code space.
318  __ near_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
319  ReferenceMap* reference_map =
320  new (gen_->zone()) ReferenceMap(gen_->zone());
321  gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
322  Safepoint::kNoLazyDeopt);
323  __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
324  }
325  }
326 
327  Instruction* instr_;
328 };
329 
330 class WasmProtectedInstructionTrap final : public WasmOutOfLineTrap {
331  public:
332  WasmProtectedInstructionTrap(CodeGenerator* gen, int pc, Instruction* instr)
333  : WasmOutOfLineTrap(gen, instr), pc_(pc) {}
334 
335  void Generate() final {
336  gen_->AddProtectedInstructionLanding(pc_, __ pc_offset());
337  GenerateWithTrapId(TrapId::kTrapMemOutOfBounds);
338  }
339 
340  private:
341  int pc_;
342 };
343 
344 void EmitOOLTrapIfNeeded(Zone* zone, CodeGenerator* codegen,
345  InstructionCode opcode, Instruction* instr,
346  X64OperandConverter& i, int pc) {
347  const MemoryAccessMode access_mode =
348  static_cast<MemoryAccessMode>(MiscField::decode(opcode));
349  if (access_mode == kMemoryAccessProtected) {
350  new (zone) WasmProtectedInstructionTrap(codegen, pc, instr);
351  }
352 }
353 
354 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
355  InstructionCode opcode, Instruction* instr,
356  X64OperandConverter& i) {
357  const MemoryAccessMode access_mode =
358  static_cast<MemoryAccessMode>(MiscField::decode(opcode));
359  if (access_mode == kMemoryAccessPoisoned) {
360  Register value = i.OutputRegister();
361  codegen->tasm()->andq(value, kSpeculationPoisonRegister);
362  }
363 }
364 
365 } // namespace
366 
367 #define ASSEMBLE_UNOP(asm_instr) \
368  do { \
369  if (instr->Output()->IsRegister()) { \
370  __ asm_instr(i.OutputRegister()); \
371  } else { \
372  __ asm_instr(i.OutputOperand()); \
373  } \
374  } while (false)
375 
376 #define ASSEMBLE_BINOP(asm_instr) \
377  do { \
378  if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
379  size_t index = 1; \
380  Operand right = i.MemoryOperand(&index); \
381  __ asm_instr(i.InputRegister(0), right); \
382  } else { \
383  if (HasImmediateInput(instr, 1)) { \
384  if (instr->InputAt(0)->IsRegister()) { \
385  __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
386  } else { \
387  __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
388  } \
389  } else { \
390  if (instr->InputAt(1)->IsRegister()) { \
391  __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
392  } else { \
393  __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
394  } \
395  } \
396  } \
397  } while (false)
398 
399 #define ASSEMBLE_COMPARE(asm_instr) \
400  do { \
401  if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
402  size_t index = 0; \
403  Operand left = i.MemoryOperand(&index); \
404  if (HasImmediateInput(instr, index)) { \
405  __ asm_instr(left, i.InputImmediate(index)); \
406  } else { \
407  __ asm_instr(left, i.InputRegister(index)); \
408  } \
409  } else { \
410  if (HasImmediateInput(instr, 1)) { \
411  if (instr->InputAt(0)->IsRegister()) { \
412  __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
413  } else { \
414  __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
415  } \
416  } else { \
417  if (instr->InputAt(1)->IsRegister()) { \
418  __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
419  } else { \
420  __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
421  } \
422  } \
423  } \
424  } while (false)
425 
426 #define ASSEMBLE_MULT(asm_instr) \
427  do { \
428  if (HasImmediateInput(instr, 1)) { \
429  if (instr->InputAt(0)->IsRegister()) { \
430  __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
431  i.InputImmediate(1)); \
432  } else { \
433  __ asm_instr(i.OutputRegister(), i.InputOperand(0), \
434  i.InputImmediate(1)); \
435  } \
436  } else { \
437  if (instr->InputAt(1)->IsRegister()) { \
438  __ asm_instr(i.OutputRegister(), i.InputRegister(1)); \
439  } else { \
440  __ asm_instr(i.OutputRegister(), i.InputOperand(1)); \
441  } \
442  } \
443  } while (false)
444 
445 #define ASSEMBLE_SHIFT(asm_instr, width) \
446  do { \
447  if (HasImmediateInput(instr, 1)) { \
448  if (instr->Output()->IsRegister()) { \
449  __ asm_instr(i.OutputRegister(), Immediate(i.InputInt##width(1))); \
450  } else { \
451  __ asm_instr(i.OutputOperand(), Immediate(i.InputInt##width(1))); \
452  } \
453  } else { \
454  if (instr->Output()->IsRegister()) { \
455  __ asm_instr##_cl(i.OutputRegister()); \
456  } else { \
457  __ asm_instr##_cl(i.OutputOperand()); \
458  } \
459  } \
460  } while (false)
461 
462 #define ASSEMBLE_MOVX(asm_instr) \
463  do { \
464  if (instr->addressing_mode() != kMode_None) { \
465  __ asm_instr(i.OutputRegister(), i.MemoryOperand()); \
466  } else if (instr->InputAt(0)->IsRegister()) { \
467  __ asm_instr(i.OutputRegister(), i.InputRegister(0)); \
468  } else { \
469  __ asm_instr(i.OutputRegister(), i.InputOperand(0)); \
470  } \
471  } while (false)
472 
473 #define ASSEMBLE_SSE_BINOP(asm_instr) \
474  do { \
475  if (instr->InputAt(1)->IsFPRegister()) { \
476  __ asm_instr(i.InputDoubleRegister(0), i.InputDoubleRegister(1)); \
477  } else { \
478  __ asm_instr(i.InputDoubleRegister(0), i.InputOperand(1)); \
479  } \
480  } while (false)
481 
482 #define ASSEMBLE_SSE_UNOP(asm_instr) \
483  do { \
484  if (instr->InputAt(0)->IsFPRegister()) { \
485  __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); \
486  } else { \
487  __ asm_instr(i.OutputDoubleRegister(), i.InputOperand(0)); \
488  } \
489  } while (false)
490 
491 #define ASSEMBLE_AVX_BINOP(asm_instr) \
492  do { \
493  CpuFeatureScope avx_scope(tasm(), AVX); \
494  if (instr->InputAt(1)->IsFPRegister()) { \
495  __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
496  i.InputDoubleRegister(1)); \
497  } else { \
498  __ asm_instr(i.OutputDoubleRegister(), i.InputDoubleRegister(0), \
499  i.InputOperand(1)); \
500  } \
501  } while (false)
502 
503 #define ASSEMBLE_IEEE754_BINOP(name) \
504  do { \
505  __ PrepareCallCFunction(2); \
506  __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
507  } while (false)
508 
509 #define ASSEMBLE_IEEE754_UNOP(name) \
510  do { \
511  __ PrepareCallCFunction(1); \
512  __ CallCFunction(ExternalReference::ieee754_##name##_function(), 1); \
513  } while (false)
514 
515 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
516  do { \
517  Label binop; \
518  __ bind(&binop); \
519  __ mov_inst(rax, i.MemoryOperand(1)); \
520  __ movl(i.TempRegister(0), rax); \
521  __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
522  __ lock(); \
523  __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
524  __ j(not_equal, &binop); \
525  } while (false)
526 
527 #define ASSEMBLE_ATOMIC64_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
528  do { \
529  Label binop; \
530  __ bind(&binop); \
531  __ mov_inst(rax, i.MemoryOperand(1)); \
532  __ movq(i.TempRegister(0), rax); \
533  __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
534  __ lock(); \
535  __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
536  __ j(not_equal, &binop); \
537  } while (false)
538 
539 void CodeGenerator::AssembleDeconstructFrame() {
540  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
541  __ movq(rsp, rbp);
542  __ popq(rbp);
543 }
544 
545 void CodeGenerator::AssemblePrepareTailCall() {
546  if (frame_access_state()->has_frame()) {
547  __ movq(rbp, MemOperand(rbp, 0));
548  }
549  frame_access_state()->SetFrameAccessToSP();
550 }
551 
552 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
553  Register scratch1,
554  Register scratch2,
555  Register scratch3) {
556  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
557  Label done;
558 
559  // Check if current frame is an arguments adaptor frame.
560  __ cmpp(Operand(rbp, CommonFrameConstants::kContextOrFrameTypeOffset),
561  Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
562  __ j(not_equal, &done, Label::kNear);
563 
564  // Load arguments count from current arguments adaptor frame (note, it
565  // does not include receiver).
566  Register caller_args_count_reg = scratch1;
567  __ SmiUntag(caller_args_count_reg,
568  Operand(rbp, ArgumentsAdaptorFrameConstants::kLengthOffset));
569 
570  ParameterCount callee_args_count(args_reg);
571  __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
572  scratch3);
573  __ bind(&done);
574 }
575 
576 namespace {
577 
578 void AdjustStackPointerForTailCall(Assembler* assembler,
579  FrameAccessState* state,
580  int new_slot_above_sp,
581  bool allow_shrinkage = true) {
582  int current_sp_offset = state->GetSPToFPSlotCount() +
583  StandardFrameConstants::kFixedSlotCountAboveFp;
584  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
585  if (stack_slot_delta > 0) {
586  assembler->subq(rsp, Immediate(stack_slot_delta * kPointerSize));
587  state->IncreaseSPDelta(stack_slot_delta);
588  } else if (allow_shrinkage && stack_slot_delta < 0) {
589  assembler->addq(rsp, Immediate(-stack_slot_delta * kPointerSize));
590  state->IncreaseSPDelta(stack_slot_delta);
591  }
592 }
593 
594 } // namespace
595 
596 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
597  int first_unused_stack_slot) {
598  CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
599  ZoneVector<MoveOperands*> pushes(zone());
600  GetPushCompatibleMoves(instr, flags, &pushes);
601 
602  if (!pushes.empty() &&
603  (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
604  first_unused_stack_slot)) {
605  X64OperandConverter g(this, instr);
606  for (auto move : pushes) {
607  LocationOperand destination_location(
608  LocationOperand::cast(move->destination()));
609  InstructionOperand source(move->source());
610  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
611  destination_location.index());
612  if (source.IsStackSlot()) {
613  LocationOperand source_location(LocationOperand::cast(source));
614  __ Push(g.SlotToOperand(source_location.index()));
615  } else if (source.IsRegister()) {
616  LocationOperand source_location(LocationOperand::cast(source));
617  __ Push(source_location.GetRegister());
618  } else if (source.IsImmediate()) {
619  __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
620  } else {
621  // Pushes of non-scalar data types is not supported.
622  UNIMPLEMENTED();
623  }
624  frame_access_state()->IncreaseSPDelta(1);
625  move->Eliminate();
626  }
627  }
628  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
629  first_unused_stack_slot, false);
630 }
631 
632 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
633  int first_unused_stack_slot) {
634  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
635  first_unused_stack_slot);
636 }
637 
638 // Check that {kJavaScriptCallCodeStartRegister} is correct.
639 void CodeGenerator::AssembleCodeStartRegisterCheck() {
640  __ ComputeCodeStartAddress(rbx);
641  __ cmpq(rbx, kJavaScriptCallCodeStartRegister);
642  __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
643 }
644 
645 // Check if the code object is marked for deoptimization. If it is, then it
646 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
647 // to:
648 // 1. read from memory the word that contains that bit, which can be found in
649 // the flags in the referenced {CodeDataContainer} object;
650 // 2. test kMarkedForDeoptimizationBit in those flags; and
651 // 3. if it is not zero then it jumps to the builtin.
652 void CodeGenerator::BailoutIfDeoptimized() {
653  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
654  __ movp(rbx, Operand(kJavaScriptCallCodeStartRegister, offset));
655  __ testl(FieldOperand(rbx, CodeDataContainer::kKindSpecificFlagsOffset),
656  Immediate(1 << Code::kMarkedForDeoptimizationBit));
657  // Ensure we're not serializing (otherwise we'd need to use an indirection to
658  // access the builtin below).
659  DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
660  Handle<Code> code = isolate()->builtins()->builtin_handle(
661  Builtins::kCompileLazyDeoptimizedCode);
662  __ j(not_zero, code, RelocInfo::CODE_TARGET);
663 }
664 
665 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
666  // Set a mask which has all bits set in the normal case, but has all
667  // bits cleared if we are speculatively executing the wrong PC.
668  __ ComputeCodeStartAddress(rbx);
669  __ xorq(kSpeculationPoisonRegister, kSpeculationPoisonRegister);
670  __ cmpp(kJavaScriptCallCodeStartRegister, rbx);
671  __ movp(rbx, Immediate(-1));
672  __ cmovq(equal, kSpeculationPoisonRegister, rbx);
673 }
674 
675 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
676  __ andq(kJSFunctionRegister, kSpeculationPoisonRegister);
677  __ andq(kContextRegister, kSpeculationPoisonRegister);
678  __ andq(rsp, kSpeculationPoisonRegister);
679 }
680 
681 // Assembles an instruction after register allocation, producing machine code.
682 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
683  Instruction* instr) {
684  X64OperandConverter i(this, instr);
685  InstructionCode opcode = instr->opcode();
686  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
687  switch (arch_opcode) {
688  case kArchCallCodeObject: {
689  if (HasImmediateInput(instr, 0)) {
690  Handle<Code> code = i.InputCode(0);
691  __ Call(code, RelocInfo::CODE_TARGET);
692  } else {
693  Register reg = i.InputRegister(0);
694  DCHECK_IMPLIES(
695  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
696  reg == kJavaScriptCallCodeStartRegister);
697  __ addp(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
698  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
699  __ RetpolineCall(reg);
700  } else {
701  __ call(reg);
702  }
703  }
704  RecordCallPosition(instr);
705  frame_access_state()->ClearSPDelta();
706  break;
707  }
708  case kArchCallWasmFunction: {
709  if (HasImmediateInput(instr, 0)) {
710  Constant constant = i.ToConstant(instr->InputAt(0));
711  Address wasm_code = static_cast<Address>(constant.ToInt64());
712  if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
713  __ near_call(wasm_code, constant.rmode());
714  } else {
715  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
716  __ RetpolineCall(wasm_code, constant.rmode());
717  } else {
718  __ Call(wasm_code, constant.rmode());
719  }
720  }
721  } else {
722  Register reg = i.InputRegister(0);
723  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
724  __ RetpolineCall(reg);
725  } else {
726  __ call(reg);
727  }
728  }
729  RecordCallPosition(instr);
730  frame_access_state()->ClearSPDelta();
731  break;
732  }
733  case kArchTailCallCodeObjectFromJSFunction:
734  case kArchTailCallCodeObject: {
735  if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
736  AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
737  i.TempRegister(0), i.TempRegister(1),
738  i.TempRegister(2));
739  }
740  if (HasImmediateInput(instr, 0)) {
741  Handle<Code> code = i.InputCode(0);
742  __ Jump(code, RelocInfo::CODE_TARGET);
743  } else {
744  Register reg = i.InputRegister(0);
745  DCHECK_IMPLIES(
746  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
747  reg == kJavaScriptCallCodeStartRegister);
748  __ addp(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
749  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
750  __ RetpolineJump(reg);
751  } else {
752  __ jmp(reg);
753  }
754  }
755  unwinding_info_writer_.MarkBlockWillExit();
756  frame_access_state()->ClearSPDelta();
757  frame_access_state()->SetFrameAccessToDefault();
758  break;
759  }
760  case kArchTailCallWasm: {
761  if (HasImmediateInput(instr, 0)) {
762  Constant constant = i.ToConstant(instr->InputAt(0));
763  Address wasm_code = static_cast<Address>(constant.ToInt64());
764  if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
765  __ near_jmp(wasm_code, constant.rmode());
766  } else {
767  __ Move(kScratchRegister, wasm_code, constant.rmode());
768  __ jmp(kScratchRegister);
769  }
770  } else {
771  Register reg = i.InputRegister(0);
772  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
773  __ RetpolineJump(reg);
774  } else {
775  __ jmp(reg);
776  }
777  }
778  unwinding_info_writer_.MarkBlockWillExit();
779  frame_access_state()->ClearSPDelta();
780  frame_access_state()->SetFrameAccessToDefault();
781  break;
782  }
783  case kArchTailCallAddress: {
784  CHECK(!HasImmediateInput(instr, 0));
785  Register reg = i.InputRegister(0);
786  DCHECK_IMPLIES(
787  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
788  reg == kJavaScriptCallCodeStartRegister);
789  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
790  __ RetpolineJump(reg);
791  } else {
792  __ jmp(reg);
793  }
794  unwinding_info_writer_.MarkBlockWillExit();
795  frame_access_state()->ClearSPDelta();
796  frame_access_state()->SetFrameAccessToDefault();
797  break;
798  }
799  case kArchCallJSFunction: {
800  Register func = i.InputRegister(0);
801  if (FLAG_debug_code) {
802  // Check the function's context matches the context argument.
803  __ cmpp(rsi, FieldOperand(func, JSFunction::kContextOffset));
804  __ Assert(equal, AbortReason::kWrongFunctionContext);
805  }
806  static_assert(kJavaScriptCallCodeStartRegister == rcx, "ABI mismatch");
807  __ movp(rcx, FieldOperand(func, JSFunction::kCodeOffset));
808  __ addp(rcx, Immediate(Code::kHeaderSize - kHeapObjectTag));
809  __ call(rcx);
810  frame_access_state()->ClearSPDelta();
811  RecordCallPosition(instr);
812  break;
813  }
814  case kArchPrepareCallCFunction: {
815  // Frame alignment requires using FP-relative frame addressing.
816  frame_access_state()->SetFrameAccessToFP();
817  int const num_parameters = MiscField::decode(instr->opcode());
818  __ PrepareCallCFunction(num_parameters);
819  break;
820  }
821  case kArchSaveCallerRegisters: {
822  fp_mode_ =
823  static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
824  DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
825  // kReturnRegister0 should have been saved before entering the stub.
826  int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
827  DCHECK_EQ(0, bytes % kPointerSize);
828  DCHECK_EQ(0, frame_access_state()->sp_delta());
829  frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
830  DCHECK(!caller_registers_saved_);
831  caller_registers_saved_ = true;
832  break;
833  }
834  case kArchRestoreCallerRegisters: {
835  DCHECK(fp_mode_ ==
836  static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
837  DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
838  // Don't overwrite the returned value.
839  int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
840  frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
841  DCHECK_EQ(0, frame_access_state()->sp_delta());
842  DCHECK(caller_registers_saved_);
843  caller_registers_saved_ = false;
844  break;
845  }
846  case kArchPrepareTailCall:
847  AssemblePrepareTailCall();
848  break;
849  case kArchCallCFunction: {
850  int const num_parameters = MiscField::decode(instr->opcode());
851  if (HasImmediateInput(instr, 0)) {
852  ExternalReference ref = i.InputExternalReference(0);
853  __ CallCFunction(ref, num_parameters);
854  } else {
855  Register func = i.InputRegister(0);
856  __ CallCFunction(func, num_parameters);
857  }
858  frame_access_state()->SetFrameAccessToDefault();
859  // Ideally, we should decrement SP delta to match the change of stack
860  // pointer in CallCFunction. However, for certain architectures (e.g.
861  // ARM), there may be more strict alignment requirement, causing old SP
862  // to be saved on the stack. In those cases, we can not calculate the SP
863  // delta statically.
864  frame_access_state()->ClearSPDelta();
865  if (caller_registers_saved_) {
866  // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
867  // Here, we assume the sequence to be:
868  // kArchSaveCallerRegisters;
869  // kArchCallCFunction;
870  // kArchRestoreCallerRegisters;
871  int bytes =
872  __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
873  frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
874  }
875  // TODO(tebbi): Do we need an lfence here?
876  break;
877  }
878  case kArchJmp:
879  AssembleArchJump(i.InputRpo(0));
880  break;
881  case kArchBinarySearchSwitch:
882  AssembleArchBinarySearchSwitch(instr);
883  break;
884  case kArchLookupSwitch:
885  AssembleArchLookupSwitch(instr);
886  break;
887  case kArchTableSwitch:
888  AssembleArchTableSwitch(instr);
889  break;
890  case kArchComment:
891  __ RecordComment(reinterpret_cast<const char*>(i.InputInt64(0)));
892  break;
893  case kArchDebugAbort:
894  DCHECK(i.InputRegister(0) == rdx);
895  if (!frame_access_state()->has_frame()) {
896  // We don't actually want to generate a pile of code for this, so just
897  // claim there is a stack frame, without generating one.
898  FrameScope scope(tasm(), StackFrame::NONE);
899  __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
900  RelocInfo::CODE_TARGET);
901  } else {
902  __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
903  RelocInfo::CODE_TARGET);
904  }
905  __ int3();
906  unwinding_info_writer_.MarkBlockWillExit();
907  break;
908  case kArchDebugBreak:
909  __ int3();
910  break;
911  case kArchThrowTerminator:
912  unwinding_info_writer_.MarkBlockWillExit();
913  break;
914  case kArchNop:
915  // don't emit code for nops.
916  break;
917  case kArchDeoptimize: {
918  int deopt_state_id =
919  BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
920  CodeGenResult result =
921  AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
922  if (result != kSuccess) return result;
923  unwinding_info_writer_.MarkBlockWillExit();
924  break;
925  }
926  case kArchRet:
927  AssembleReturn(instr->InputAt(0));
928  break;
929  case kArchStackPointer:
930  __ movq(i.OutputRegister(), rsp);
931  break;
932  case kArchFramePointer:
933  __ movq(i.OutputRegister(), rbp);
934  break;
935  case kArchParentFramePointer:
936  if (frame_access_state()->has_frame()) {
937  __ movq(i.OutputRegister(), Operand(rbp, 0));
938  } else {
939  __ movq(i.OutputRegister(), rbp);
940  }
941  break;
942  case kArchTruncateDoubleToI: {
943  auto result = i.OutputRegister();
944  auto input = i.InputDoubleRegister(0);
945  auto ool = new (zone()) OutOfLineTruncateDoubleToI(
946  this, result, input, DetermineStubCallMode(),
947  &unwinding_info_writer_);
948  // We use Cvttsd2siq instead of Cvttsd2si due to performance reasons. The
949  // use of Cvttsd2siq requires the movl below to avoid sign extension.
950  __ Cvttsd2siq(result, input);
951  __ cmpq(result, Immediate(1));
952  __ j(overflow, ool->entry());
953  __ bind(ool->exit());
954  __ movl(result, result);
955  break;
956  }
957  case kArchStoreWithWriteBarrier: {
958  RecordWriteMode mode =
959  static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
960  Register object = i.InputRegister(0);
961  size_t index = 0;
962  Operand operand = i.MemoryOperand(&index);
963  Register value = i.InputRegister(index);
964  Register scratch0 = i.TempRegister(0);
965  Register scratch1 = i.TempRegister(1);
966  auto ool = new (zone())
967  OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
968  mode, DetermineStubCallMode());
969  __ movp(operand, value);
970  __ CheckPageFlag(object, scratch0,
971  MemoryChunk::kPointersFromHereAreInterestingMask,
972  not_zero, ool->entry());
973  __ bind(ool->exit());
974  break;
975  }
976  case kArchWordPoisonOnSpeculation:
977  DCHECK_EQ(i.OutputRegister(), i.InputRegister(0));
978  __ andq(i.InputRegister(0), kSpeculationPoisonRegister);
979  break;
980  case kLFence:
981  __ lfence();
982  break;
983  case kArchStackSlot: {
984  FrameOffset offset =
985  frame_access_state()->GetFrameOffset(i.InputInt32(0));
986  Register base = offset.from_stack_pointer() ? rsp : rbp;
987  __ leaq(i.OutputRegister(), Operand(base, offset.offset()));
988  break;
989  }
990  case kIeee754Float64Acos:
991  ASSEMBLE_IEEE754_UNOP(acos);
992  break;
993  case kIeee754Float64Acosh:
994  ASSEMBLE_IEEE754_UNOP(acosh);
995  break;
996  case kIeee754Float64Asin:
997  ASSEMBLE_IEEE754_UNOP(asin);
998  break;
999  case kIeee754Float64Asinh:
1000  ASSEMBLE_IEEE754_UNOP(asinh);
1001  break;
1002  case kIeee754Float64Atan:
1003  ASSEMBLE_IEEE754_UNOP(atan);
1004  break;
1005  case kIeee754Float64Atanh:
1006  ASSEMBLE_IEEE754_UNOP(atanh);
1007  break;
1008  case kIeee754Float64Atan2:
1009  ASSEMBLE_IEEE754_BINOP(atan2);
1010  break;
1011  case kIeee754Float64Cbrt:
1012  ASSEMBLE_IEEE754_UNOP(cbrt);
1013  break;
1014  case kIeee754Float64Cos:
1015  ASSEMBLE_IEEE754_UNOP(cos);
1016  break;
1017  case kIeee754Float64Cosh:
1018  ASSEMBLE_IEEE754_UNOP(cosh);
1019  break;
1020  case kIeee754Float64Exp:
1021  ASSEMBLE_IEEE754_UNOP(exp);
1022  break;
1023  case kIeee754Float64Expm1:
1024  ASSEMBLE_IEEE754_UNOP(expm1);
1025  break;
1026  case kIeee754Float64Log:
1027  ASSEMBLE_IEEE754_UNOP(log);
1028  break;
1029  case kIeee754Float64Log1p:
1030  ASSEMBLE_IEEE754_UNOP(log1p);
1031  break;
1032  case kIeee754Float64Log2:
1033  ASSEMBLE_IEEE754_UNOP(log2);
1034  break;
1035  case kIeee754Float64Log10:
1036  ASSEMBLE_IEEE754_UNOP(log10);
1037  break;
1038  case kIeee754Float64Pow: {
1039  // TODO(bmeurer): Improve integration of the stub.
1040  __ Movsd(xmm2, xmm0);
1041  __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
1042  __ Movsd(xmm0, xmm3);
1043  break;
1044  }
1045  case kIeee754Float64Sin:
1046  ASSEMBLE_IEEE754_UNOP(sin);
1047  break;
1048  case kIeee754Float64Sinh:
1049  ASSEMBLE_IEEE754_UNOP(sinh);
1050  break;
1051  case kIeee754Float64Tan:
1052  ASSEMBLE_IEEE754_UNOP(tan);
1053  break;
1054  case kIeee754Float64Tanh:
1055  ASSEMBLE_IEEE754_UNOP(tanh);
1056  break;
1057  case kX64Add32:
1058  ASSEMBLE_BINOP(addl);
1059  break;
1060  case kX64Add:
1061  ASSEMBLE_BINOP(addq);
1062  break;
1063  case kX64Sub32:
1064  ASSEMBLE_BINOP(subl);
1065  break;
1066  case kX64Sub:
1067  ASSEMBLE_BINOP(subq);
1068  break;
1069  case kX64And32:
1070  ASSEMBLE_BINOP(andl);
1071  break;
1072  case kX64And:
1073  ASSEMBLE_BINOP(andq);
1074  break;
1075  case kX64Cmp8:
1076  ASSEMBLE_COMPARE(cmpb);
1077  break;
1078  case kX64Cmp16:
1079  ASSEMBLE_COMPARE(cmpw);
1080  break;
1081  case kX64Cmp32:
1082  ASSEMBLE_COMPARE(cmpl);
1083  break;
1084  case kX64Cmp:
1085  ASSEMBLE_COMPARE(cmpq);
1086  break;
1087  case kX64Test8:
1088  ASSEMBLE_COMPARE(testb);
1089  break;
1090  case kX64Test16:
1091  ASSEMBLE_COMPARE(testw);
1092  break;
1093  case kX64Test32:
1094  ASSEMBLE_COMPARE(testl);
1095  break;
1096  case kX64Test:
1097  ASSEMBLE_COMPARE(testq);
1098  break;
1099  case kX64Imul32:
1100  ASSEMBLE_MULT(imull);
1101  break;
1102  case kX64Imul:
1103  ASSEMBLE_MULT(imulq);
1104  break;
1105  case kX64ImulHigh32:
1106  if (instr->InputAt(1)->IsRegister()) {
1107  __ imull(i.InputRegister(1));
1108  } else {
1109  __ imull(i.InputOperand(1));
1110  }
1111  break;
1112  case kX64UmulHigh32:
1113  if (instr->InputAt(1)->IsRegister()) {
1114  __ mull(i.InputRegister(1));
1115  } else {
1116  __ mull(i.InputOperand(1));
1117  }
1118  break;
1119  case kX64Idiv32:
1120  __ cdq();
1121  __ idivl(i.InputRegister(1));
1122  break;
1123  case kX64Idiv:
1124  __ cqo();
1125  __ idivq(i.InputRegister(1));
1126  break;
1127  case kX64Udiv32:
1128  __ xorl(rdx, rdx);
1129  __ divl(i.InputRegister(1));
1130  break;
1131  case kX64Udiv:
1132  __ xorq(rdx, rdx);
1133  __ divq(i.InputRegister(1));
1134  break;
1135  case kX64Not:
1136  ASSEMBLE_UNOP(notq);
1137  break;
1138  case kX64Not32:
1139  ASSEMBLE_UNOP(notl);
1140  break;
1141  case kX64Neg:
1142  ASSEMBLE_UNOP(negq);
1143  break;
1144  case kX64Neg32:
1145  ASSEMBLE_UNOP(negl);
1146  break;
1147  case kX64Or32:
1148  ASSEMBLE_BINOP(orl);
1149  break;
1150  case kX64Or:
1151  ASSEMBLE_BINOP(orq);
1152  break;
1153  case kX64Xor32:
1154  ASSEMBLE_BINOP(xorl);
1155  break;
1156  case kX64Xor:
1157  ASSEMBLE_BINOP(xorq);
1158  break;
1159  case kX64Shl32:
1160  ASSEMBLE_SHIFT(shll, 5);
1161  break;
1162  case kX64Shl:
1163  ASSEMBLE_SHIFT(shlq, 6);
1164  break;
1165  case kX64Shr32:
1166  ASSEMBLE_SHIFT(shrl, 5);
1167  break;
1168  case kX64Shr:
1169  ASSEMBLE_SHIFT(shrq, 6);
1170  break;
1171  case kX64Sar32:
1172  ASSEMBLE_SHIFT(sarl, 5);
1173  break;
1174  case kX64Sar:
1175  ASSEMBLE_SHIFT(sarq, 6);
1176  break;
1177  case kX64Ror32:
1178  ASSEMBLE_SHIFT(rorl, 5);
1179  break;
1180  case kX64Ror:
1181  ASSEMBLE_SHIFT(rorq, 6);
1182  break;
1183  case kX64Lzcnt:
1184  if (instr->InputAt(0)->IsRegister()) {
1185  __ Lzcntq(i.OutputRegister(), i.InputRegister(0));
1186  } else {
1187  __ Lzcntq(i.OutputRegister(), i.InputOperand(0));
1188  }
1189  break;
1190  case kX64Lzcnt32:
1191  if (instr->InputAt(0)->IsRegister()) {
1192  __ Lzcntl(i.OutputRegister(), i.InputRegister(0));
1193  } else {
1194  __ Lzcntl(i.OutputRegister(), i.InputOperand(0));
1195  }
1196  break;
1197  case kX64Tzcnt:
1198  if (instr->InputAt(0)->IsRegister()) {
1199  __ Tzcntq(i.OutputRegister(), i.InputRegister(0));
1200  } else {
1201  __ Tzcntq(i.OutputRegister(), i.InputOperand(0));
1202  }
1203  break;
1204  case kX64Tzcnt32:
1205  if (instr->InputAt(0)->IsRegister()) {
1206  __ Tzcntl(i.OutputRegister(), i.InputRegister(0));
1207  } else {
1208  __ Tzcntl(i.OutputRegister(), i.InputOperand(0));
1209  }
1210  break;
1211  case kX64Popcnt:
1212  if (instr->InputAt(0)->IsRegister()) {
1213  __ Popcntq(i.OutputRegister(), i.InputRegister(0));
1214  } else {
1215  __ Popcntq(i.OutputRegister(), i.InputOperand(0));
1216  }
1217  break;
1218  case kX64Popcnt32:
1219  if (instr->InputAt(0)->IsRegister()) {
1220  __ Popcntl(i.OutputRegister(), i.InputRegister(0));
1221  } else {
1222  __ Popcntl(i.OutputRegister(), i.InputOperand(0));
1223  }
1224  break;
1225  case kX64Bswap:
1226  __ bswapq(i.OutputRegister());
1227  break;
1228  case kX64Bswap32:
1229  __ bswapl(i.OutputRegister());
1230  break;
1231  case kSSEFloat32Cmp:
1232  ASSEMBLE_SSE_BINOP(Ucomiss);
1233  break;
1234  case kSSEFloat32Add:
1235  ASSEMBLE_SSE_BINOP(addss);
1236  break;
1237  case kSSEFloat32Sub:
1238  ASSEMBLE_SSE_BINOP(subss);
1239  break;
1240  case kSSEFloat32Mul:
1241  ASSEMBLE_SSE_BINOP(mulss);
1242  break;
1243  case kSSEFloat32Div:
1244  ASSEMBLE_SSE_BINOP(divss);
1245  // Don't delete this mov. It may improve performance on some CPUs,
1246  // when there is a (v)mulss depending on the result.
1247  __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1248  break;
1249  case kSSEFloat32Abs: {
1250  // TODO(bmeurer): Use RIP relative 128-bit constants.
1251  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1252  __ psrlq(kScratchDoubleReg, 33);
1253  __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1254  break;
1255  }
1256  case kSSEFloat32Neg: {
1257  // TODO(bmeurer): Use RIP relative 128-bit constants.
1258  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1259  __ psllq(kScratchDoubleReg, 31);
1260  __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1261  break;
1262  }
1263  case kSSEFloat32Sqrt:
1264  ASSEMBLE_SSE_UNOP(sqrtss);
1265  break;
1266  case kSSEFloat32ToFloat64:
1267  ASSEMBLE_SSE_UNOP(Cvtss2sd);
1268  break;
1269  case kSSEFloat32Round: {
1270  CpuFeatureScope sse_scope(tasm(), SSE4_1);
1271  RoundingMode const mode =
1272  static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1273  __ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1274  break;
1275  }
1276  case kSSEFloat32ToInt32:
1277  if (instr->InputAt(0)->IsFPRegister()) {
1278  __ Cvttss2si(i.OutputRegister(), i.InputDoubleRegister(0));
1279  } else {
1280  __ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
1281  }
1282  break;
1283  case kSSEFloat32ToUint32: {
1284  if (instr->InputAt(0)->IsFPRegister()) {
1285  __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1286  } else {
1287  __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1288  }
1289  break;
1290  }
1291  case kSSEFloat64Cmp:
1292  ASSEMBLE_SSE_BINOP(Ucomisd);
1293  break;
1294  case kSSEFloat64Add:
1295  ASSEMBLE_SSE_BINOP(addsd);
1296  break;
1297  case kSSEFloat64Sub:
1298  ASSEMBLE_SSE_BINOP(subsd);
1299  break;
1300  case kSSEFloat64Mul:
1301  ASSEMBLE_SSE_BINOP(mulsd);
1302  break;
1303  case kSSEFloat64Div:
1304  ASSEMBLE_SSE_BINOP(divsd);
1305  // Don't delete this mov. It may improve performance on some CPUs,
1306  // when there is a (v)mulsd depending on the result.
1307  __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1308  break;
1309  case kSSEFloat64Mod: {
1310  __ subq(rsp, Immediate(kDoubleSize));
1311  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1312  kDoubleSize);
1313  // Move values to st(0) and st(1).
1314  __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(1));
1315  __ fld_d(Operand(rsp, 0));
1316  __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
1317  __ fld_d(Operand(rsp, 0));
1318  // Loop while fprem isn't done.
1319  Label mod_loop;
1320  __ bind(&mod_loop);
1321  // This instructions traps on all kinds inputs, but we are assuming the
1322  // floating point control word is set to ignore them all.
1323  __ fprem();
1324  // The following 2 instruction implicitly use rax.
1325  __ fnstsw_ax();
1326  if (CpuFeatures::IsSupported(SAHF)) {
1327  CpuFeatureScope sahf_scope(tasm(), SAHF);
1328  __ sahf();
1329  } else {
1330  __ shrl(rax, Immediate(8));
1331  __ andl(rax, Immediate(0xFF));
1332  __ pushq(rax);
1333  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1334  kPointerSize);
1335  __ popfq();
1336  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1337  -kPointerSize);
1338  }
1339  __ j(parity_even, &mod_loop);
1340  // Move output to stack and clean up.
1341  __ fstp(1);
1342  __ fstp_d(Operand(rsp, 0));
1343  __ Movsd(i.OutputDoubleRegister(), Operand(rsp, 0));
1344  __ addq(rsp, Immediate(kDoubleSize));
1345  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
1346  -kDoubleSize);
1347  break;
1348  }
1349  case kSSEFloat32Max: {
1350  Label compare_nan, compare_swap, done_compare;
1351  if (instr->InputAt(1)->IsFPRegister()) {
1352  __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1353  } else {
1354  __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1355  }
1356  auto ool =
1357  new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1358  __ j(parity_even, ool->entry());
1359  __ j(above, &done_compare, Label::kNear);
1360  __ j(below, &compare_swap, Label::kNear);
1361  __ Movmskps(kScratchRegister, i.InputDoubleRegister(0));
1362  __ testl(kScratchRegister, Immediate(1));
1363  __ j(zero, &done_compare, Label::kNear);
1364  __ bind(&compare_swap);
1365  if (instr->InputAt(1)->IsFPRegister()) {
1366  __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1367  } else {
1368  __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1369  }
1370  __ bind(&done_compare);
1371  __ bind(ool->exit());
1372  break;
1373  }
1374  case kSSEFloat32Min: {
1375  Label compare_swap, done_compare;
1376  if (instr->InputAt(1)->IsFPRegister()) {
1377  __ Ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1378  } else {
1379  __ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1380  }
1381  auto ool =
1382  new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1383  __ j(parity_even, ool->entry());
1384  __ j(below, &done_compare, Label::kNear);
1385  __ j(above, &compare_swap, Label::kNear);
1386  if (instr->InputAt(1)->IsFPRegister()) {
1387  __ Movmskps(kScratchRegister, i.InputDoubleRegister(1));
1388  } else {
1389  __ Movss(kScratchDoubleReg, i.InputOperand(1));
1390  __ Movmskps(kScratchRegister, kScratchDoubleReg);
1391  }
1392  __ testl(kScratchRegister, Immediate(1));
1393  __ j(zero, &done_compare, Label::kNear);
1394  __ bind(&compare_swap);
1395  if (instr->InputAt(1)->IsFPRegister()) {
1396  __ Movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1397  } else {
1398  __ Movss(i.InputDoubleRegister(0), i.InputOperand(1));
1399  }
1400  __ bind(&done_compare);
1401  __ bind(ool->exit());
1402  break;
1403  }
1404  case kSSEFloat64Max: {
1405  Label compare_nan, compare_swap, done_compare;
1406  if (instr->InputAt(1)->IsFPRegister()) {
1407  __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1408  } else {
1409  __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1410  }
1411  auto ool =
1412  new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1413  __ j(parity_even, ool->entry());
1414  __ j(above, &done_compare, Label::kNear);
1415  __ j(below, &compare_swap, Label::kNear);
1416  __ Movmskpd(kScratchRegister, i.InputDoubleRegister(0));
1417  __ testl(kScratchRegister, Immediate(1));
1418  __ j(zero, &done_compare, Label::kNear);
1419  __ bind(&compare_swap);
1420  if (instr->InputAt(1)->IsFPRegister()) {
1421  __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1422  } else {
1423  __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1424  }
1425  __ bind(&done_compare);
1426  __ bind(ool->exit());
1427  break;
1428  }
1429  case kSSEFloat64Min: {
1430  Label compare_swap, done_compare;
1431  if (instr->InputAt(1)->IsFPRegister()) {
1432  __ Ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1433  } else {
1434  __ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1435  }
1436  auto ool =
1437  new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1438  __ j(parity_even, ool->entry());
1439  __ j(below, &done_compare, Label::kNear);
1440  __ j(above, &compare_swap, Label::kNear);
1441  if (instr->InputAt(1)->IsFPRegister()) {
1442  __ Movmskpd(kScratchRegister, i.InputDoubleRegister(1));
1443  } else {
1444  __ Movsd(kScratchDoubleReg, i.InputOperand(1));
1445  __ Movmskpd(kScratchRegister, kScratchDoubleReg);
1446  }
1447  __ testl(kScratchRegister, Immediate(1));
1448  __ j(zero, &done_compare, Label::kNear);
1449  __ bind(&compare_swap);
1450  if (instr->InputAt(1)->IsFPRegister()) {
1451  __ Movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1452  } else {
1453  __ Movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1454  }
1455  __ bind(&done_compare);
1456  __ bind(ool->exit());
1457  break;
1458  }
1459  case kSSEFloat64Abs: {
1460  // TODO(bmeurer): Use RIP relative 128-bit constants.
1461  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1462  __ psrlq(kScratchDoubleReg, 1);
1463  __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1464  break;
1465  }
1466  case kSSEFloat64Neg: {
1467  // TODO(bmeurer): Use RIP relative 128-bit constants.
1468  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1469  __ psllq(kScratchDoubleReg, 63);
1470  __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1471  break;
1472  }
1473  case kSSEFloat64Sqrt:
1474  ASSEMBLE_SSE_UNOP(Sqrtsd);
1475  break;
1476  case kSSEFloat64Round: {
1477  CpuFeatureScope sse_scope(tasm(), SSE4_1);
1478  RoundingMode const mode =
1479  static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1480  __ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1481  break;
1482  }
1483  case kSSEFloat64ToFloat32:
1484  ASSEMBLE_SSE_UNOP(Cvtsd2ss);
1485  break;
1486  case kSSEFloat64ToInt32:
1487  if (instr->InputAt(0)->IsFPRegister()) {
1488  __ Cvttsd2si(i.OutputRegister(), i.InputDoubleRegister(0));
1489  } else {
1490  __ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1491  }
1492  break;
1493  case kSSEFloat64ToUint32: {
1494  if (instr->InputAt(0)->IsFPRegister()) {
1495  __ Cvttsd2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1496  } else {
1497  __ Cvttsd2siq(i.OutputRegister(), i.InputOperand(0));
1498  }
1499  if (MiscField::decode(instr->opcode())) {
1500  __ AssertZeroExtended(i.OutputRegister());
1501  }
1502  break;
1503  }
1504  case kSSEFloat32ToInt64:
1505  if (instr->InputAt(0)->IsFPRegister()) {
1506  __ Cvttss2siq(i.OutputRegister(), i.InputDoubleRegister(0));
1507  } else {
1508  __ Cvttss2siq(i.OutputRegister(), i.InputOperand(0));
1509  }
1510  if (instr->OutputCount() > 1) {
1511  __ Set(i.OutputRegister(1), 1);
1512  Label done;
1513  Label fail;
1514  __ Move(kScratchDoubleReg, static_cast<float>(INT64_MIN));
1515  if (instr->InputAt(0)->IsFPRegister()) {
1516  __ Ucomiss(kScratchDoubleReg, i.InputDoubleRegister(0));
1517  } else {
1518  __ Ucomiss(kScratchDoubleReg, i.InputOperand(0));
1519  }
1520  // If the input is NaN, then the conversion fails.
1521  __ j(parity_even, &fail);
1522  // If the input is INT64_MIN, then the conversion succeeds.
1523  __ j(equal, &done);
1524  __ cmpq(i.OutputRegister(0), Immediate(1));
1525  // If the conversion results in INT64_MIN, but the input was not
1526  // INT64_MIN, then the conversion fails.
1527  __ j(no_overflow, &done);
1528  __ bind(&fail);
1529  __ Set(i.OutputRegister(1), 0);
1530  __ bind(&done);
1531  }
1532  break;
1533  case kSSEFloat64ToInt64:
1534  if (instr->InputAt(0)->IsFPRegister()) {
1535  __ Cvttsd2siq(i.OutputRegister(0), i.InputDoubleRegister(0));
1536  } else {
1537  __ Cvttsd2siq(i.OutputRegister(0), i.InputOperand(0));
1538  }
1539  if (instr->OutputCount() > 1) {
1540  __ Set(i.OutputRegister(1), 1);
1541  Label done;
1542  Label fail;
1543  __ Move(kScratchDoubleReg, static_cast<double>(INT64_MIN));
1544  if (instr->InputAt(0)->IsFPRegister()) {
1545  __ Ucomisd(kScratchDoubleReg, i.InputDoubleRegister(0));
1546  } else {
1547  __ Ucomisd(kScratchDoubleReg, i.InputOperand(0));
1548  }
1549  // If the input is NaN, then the conversion fails.
1550  __ j(parity_even, &fail);
1551  // If the input is INT64_MIN, then the conversion succeeds.
1552  __ j(equal, &done);
1553  __ cmpq(i.OutputRegister(0), Immediate(1));
1554  // If the conversion results in INT64_MIN, but the input was not
1555  // INT64_MIN, then the conversion fails.
1556  __ j(no_overflow, &done);
1557  __ bind(&fail);
1558  __ Set(i.OutputRegister(1), 0);
1559  __ bind(&done);
1560  }
1561  break;
1562  case kSSEFloat32ToUint64: {
1563  Label fail;
1564  if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1565  if (instr->InputAt(0)->IsFPRegister()) {
1566  __ Cvttss2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1567  } else {
1568  __ Cvttss2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1569  }
1570  if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1571  __ bind(&fail);
1572  break;
1573  }
1574  case kSSEFloat64ToUint64: {
1575  Label fail;
1576  if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 0);
1577  if (instr->InputAt(0)->IsFPRegister()) {
1578  __ Cvttsd2uiq(i.OutputRegister(), i.InputDoubleRegister(0), &fail);
1579  } else {
1580  __ Cvttsd2uiq(i.OutputRegister(), i.InputOperand(0), &fail);
1581  }
1582  if (instr->OutputCount() > 1) __ Set(i.OutputRegister(1), 1);
1583  __ bind(&fail);
1584  break;
1585  }
1586  case kSSEInt32ToFloat64:
1587  if (instr->InputAt(0)->IsRegister()) {
1588  __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1589  } else {
1590  __ Cvtlsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1591  }
1592  break;
1593  case kSSEInt32ToFloat32:
1594  if (instr->InputAt(0)->IsRegister()) {
1595  __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1596  } else {
1597  __ Cvtlsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1598  }
1599  break;
1600  case kSSEInt64ToFloat32:
1601  if (instr->InputAt(0)->IsRegister()) {
1602  __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1603  } else {
1604  __ Cvtqsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1605  }
1606  break;
1607  case kSSEInt64ToFloat64:
1608  if (instr->InputAt(0)->IsRegister()) {
1609  __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1610  } else {
1611  __ Cvtqsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1612  }
1613  break;
1614  case kSSEUint64ToFloat32:
1615  if (instr->InputAt(0)->IsRegister()) {
1616  __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1617  } else {
1618  __ Cvtqui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1619  }
1620  break;
1621  case kSSEUint64ToFloat64:
1622  if (instr->InputAt(0)->IsRegister()) {
1623  __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1624  } else {
1625  __ Cvtqui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1626  }
1627  break;
1628  case kSSEUint32ToFloat64:
1629  if (instr->InputAt(0)->IsRegister()) {
1630  __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputRegister(0));
1631  } else {
1632  __ Cvtlui2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1633  }
1634  break;
1635  case kSSEUint32ToFloat32:
1636  if (instr->InputAt(0)->IsRegister()) {
1637  __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputRegister(0));
1638  } else {
1639  __ Cvtlui2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1640  }
1641  break;
1642  case kSSEFloat64ExtractLowWord32:
1643  if (instr->InputAt(0)->IsFPStackSlot()) {
1644  __ movl(i.OutputRegister(), i.InputOperand(0));
1645  } else {
1646  __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1647  }
1648  break;
1649  case kSSEFloat64ExtractHighWord32:
1650  if (instr->InputAt(0)->IsFPStackSlot()) {
1651  __ movl(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1652  } else {
1653  __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1654  }
1655  break;
1656  case kSSEFloat64InsertLowWord32:
1657  if (instr->InputAt(1)->IsRegister()) {
1658  __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 0);
1659  } else {
1660  __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1661  }
1662  break;
1663  case kSSEFloat64InsertHighWord32:
1664  if (instr->InputAt(1)->IsRegister()) {
1665  __ Pinsrd(i.OutputDoubleRegister(), i.InputRegister(1), 1);
1666  } else {
1667  __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1668  }
1669  break;
1670  case kSSEFloat64LoadLowWord32:
1671  if (instr->InputAt(0)->IsRegister()) {
1672  __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1673  } else {
1674  __ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
1675  }
1676  break;
1677  case kAVXFloat32Cmp: {
1678  CpuFeatureScope avx_scope(tasm(), AVX);
1679  if (instr->InputAt(1)->IsFPRegister()) {
1680  __ vucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1681  } else {
1682  __ vucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1683  }
1684  break;
1685  }
1686  case kAVXFloat32Add:
1687  ASSEMBLE_AVX_BINOP(vaddss);
1688  break;
1689  case kAVXFloat32Sub:
1690  ASSEMBLE_AVX_BINOP(vsubss);
1691  break;
1692  case kAVXFloat32Mul:
1693  ASSEMBLE_AVX_BINOP(vmulss);
1694  break;
1695  case kAVXFloat32Div:
1696  ASSEMBLE_AVX_BINOP(vdivss);
1697  // Don't delete this mov. It may improve performance on some CPUs,
1698  // when there is a (v)mulss depending on the result.
1699  __ Movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1700  break;
1701  case kAVXFloat64Cmp: {
1702  CpuFeatureScope avx_scope(tasm(), AVX);
1703  if (instr->InputAt(1)->IsFPRegister()) {
1704  __ vucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1705  } else {
1706  __ vucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1707  }
1708  break;
1709  }
1710  case kAVXFloat64Add:
1711  ASSEMBLE_AVX_BINOP(vaddsd);
1712  break;
1713  case kAVXFloat64Sub:
1714  ASSEMBLE_AVX_BINOP(vsubsd);
1715  break;
1716  case kAVXFloat64Mul:
1717  ASSEMBLE_AVX_BINOP(vmulsd);
1718  break;
1719  case kAVXFloat64Div:
1720  ASSEMBLE_AVX_BINOP(vdivsd);
1721  // Don't delete this mov. It may improve performance on some CPUs,
1722  // when there is a (v)mulsd depending on the result.
1723  __ Movapd(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1724  break;
1725  case kAVXFloat32Abs: {
1726  // TODO(bmeurer): Use RIP relative 128-bit constants.
1727  CpuFeatureScope avx_scope(tasm(), AVX);
1728  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1729  __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 33);
1730  if (instr->InputAt(0)->IsFPRegister()) {
1731  __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1732  i.InputDoubleRegister(0));
1733  } else {
1734  __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg,
1735  i.InputOperand(0));
1736  }
1737  break;
1738  }
1739  case kAVXFloat32Neg: {
1740  // TODO(bmeurer): Use RIP relative 128-bit constants.
1741  CpuFeatureScope avx_scope(tasm(), AVX);
1742  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1743  __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 31);
1744  if (instr->InputAt(0)->IsFPRegister()) {
1745  __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1746  i.InputDoubleRegister(0));
1747  } else {
1748  __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg,
1749  i.InputOperand(0));
1750  }
1751  break;
1752  }
1753  case kAVXFloat64Abs: {
1754  // TODO(bmeurer): Use RIP relative 128-bit constants.
1755  CpuFeatureScope avx_scope(tasm(), AVX);
1756  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1757  __ vpsrlq(kScratchDoubleReg, kScratchDoubleReg, 1);
1758  if (instr->InputAt(0)->IsFPRegister()) {
1759  __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1760  i.InputDoubleRegister(0));
1761  } else {
1762  __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1763  i.InputOperand(0));
1764  }
1765  break;
1766  }
1767  case kAVXFloat64Neg: {
1768  // TODO(bmeurer): Use RIP relative 128-bit constants.
1769  CpuFeatureScope avx_scope(tasm(), AVX);
1770  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1771  __ vpsllq(kScratchDoubleReg, kScratchDoubleReg, 63);
1772  if (instr->InputAt(0)->IsFPRegister()) {
1773  __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1774  i.InputDoubleRegister(0));
1775  } else {
1776  __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg,
1777  i.InputOperand(0));
1778  }
1779  break;
1780  }
1781  case kSSEFloat64SilenceNaN:
1782  __ Xorpd(kScratchDoubleReg, kScratchDoubleReg);
1783  __ Subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1784  break;
1785  case kX64Movsxbl:
1786  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1787  ASSEMBLE_MOVX(movsxbl);
1788  __ AssertZeroExtended(i.OutputRegister());
1789  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1790  break;
1791  case kX64Movzxbl:
1792  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1793  ASSEMBLE_MOVX(movzxbl);
1794  __ AssertZeroExtended(i.OutputRegister());
1795  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1796  break;
1797  case kX64Movsxbq:
1798  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1799  ASSEMBLE_MOVX(movsxbq);
1800  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1801  break;
1802  case kX64Movzxbq:
1803  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1804  ASSEMBLE_MOVX(movzxbq);
1805  __ AssertZeroExtended(i.OutputRegister());
1806  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1807  break;
1808  case kX64Movb: {
1809  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1810  size_t index = 0;
1811  Operand operand = i.MemoryOperand(&index);
1812  if (HasImmediateInput(instr, index)) {
1813  __ movb(operand, Immediate(i.InputInt8(index)));
1814  } else {
1815  __ movb(operand, i.InputRegister(index));
1816  }
1817  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1818  break;
1819  }
1820  case kX64Movsxwl:
1821  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1822  ASSEMBLE_MOVX(movsxwl);
1823  __ AssertZeroExtended(i.OutputRegister());
1824  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1825  break;
1826  case kX64Movzxwl:
1827  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1828  ASSEMBLE_MOVX(movzxwl);
1829  __ AssertZeroExtended(i.OutputRegister());
1830  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1831  break;
1832  case kX64Movsxwq:
1833  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1834  ASSEMBLE_MOVX(movsxwq);
1835  break;
1836  case kX64Movzxwq:
1837  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1838  ASSEMBLE_MOVX(movzxwq);
1839  __ AssertZeroExtended(i.OutputRegister());
1840  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1841  break;
1842  case kX64Movw: {
1843  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1844  size_t index = 0;
1845  Operand operand = i.MemoryOperand(&index);
1846  if (HasImmediateInput(instr, index)) {
1847  __ movw(operand, Immediate(i.InputInt16(index)));
1848  } else {
1849  __ movw(operand, i.InputRegister(index));
1850  }
1851  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1852  break;
1853  }
1854  case kX64Movl:
1855  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1856  if (instr->HasOutput()) {
1857  if (instr->addressing_mode() == kMode_None) {
1858  if (instr->InputAt(0)->IsRegister()) {
1859  __ movl(i.OutputRegister(), i.InputRegister(0));
1860  } else {
1861  __ movl(i.OutputRegister(), i.InputOperand(0));
1862  }
1863  } else {
1864  __ movl(i.OutputRegister(), i.MemoryOperand());
1865  }
1866  __ AssertZeroExtended(i.OutputRegister());
1867  } else {
1868  size_t index = 0;
1869  Operand operand = i.MemoryOperand(&index);
1870  if (HasImmediateInput(instr, index)) {
1871  __ movl(operand, i.InputImmediate(index));
1872  } else {
1873  __ movl(operand, i.InputRegister(index));
1874  }
1875  }
1876  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1877  break;
1878  case kX64Movsxlq:
1879  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1880  ASSEMBLE_MOVX(movsxlq);
1881  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1882  break;
1883  case kX64MovqDecompressTaggedSigned: {
1884  CHECK(instr->HasOutput());
1885  __ DecompressTaggedSigned(i.OutputRegister(), i.MemoryOperand(),
1886  DEBUG_BOOL ? i.TempRegister(0) : no_reg);
1887  break;
1888  }
1889  case kX64MovqDecompressTaggedPointer: {
1890  CHECK(instr->HasOutput());
1891  __ DecompressTaggedPointer(i.OutputRegister(), i.MemoryOperand(),
1892  DEBUG_BOOL ? i.TempRegister(0) : no_reg);
1893  break;
1894  }
1895  case kX64MovqDecompressAnyTagged: {
1896  CHECK(instr->HasOutput());
1897  __ DecompressAnyTagged(i.OutputRegister(), i.MemoryOperand(),
1898  i.TempRegister(0),
1899  DEBUG_BOOL ? i.TempRegister(1) : no_reg);
1900  break;
1901  }
1902  case kX64Movq:
1903  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1904  if (instr->HasOutput()) {
1905  __ movq(i.OutputRegister(), i.MemoryOperand());
1906  } else {
1907  size_t index = 0;
1908  Operand operand = i.MemoryOperand(&index);
1909  if (HasImmediateInput(instr, index)) {
1910  __ movq(operand, i.InputImmediate(index));
1911  } else {
1912  __ movq(operand, i.InputRegister(index));
1913  }
1914  }
1915  EmitWordLoadPoisoningIfNeeded(this, opcode, instr, i);
1916  break;
1917  case kX64Movss:
1918  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1919  if (instr->HasOutput()) {
1920  __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1921  } else {
1922  size_t index = 0;
1923  Operand operand = i.MemoryOperand(&index);
1924  __ movss(operand, i.InputDoubleRegister(index));
1925  }
1926  break;
1927  case kX64Movsd: {
1928  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1929  if (instr->HasOutput()) {
1930  const MemoryAccessMode access_mode =
1931  static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1932  if (access_mode == kMemoryAccessPoisoned) {
1933  // If we have to poison the loaded value, we load into a general
1934  // purpose register first, mask it with the poison, and move the
1935  // value from the general purpose register into the double register.
1936  __ movq(kScratchRegister, i.MemoryOperand());
1937  __ andq(kScratchRegister, kSpeculationPoisonRegister);
1938  __ Movq(i.OutputDoubleRegister(), kScratchRegister);
1939  } else {
1940  __ Movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1941  }
1942  } else {
1943  size_t index = 0;
1944  Operand operand = i.MemoryOperand(&index);
1945  __ Movsd(operand, i.InputDoubleRegister(index));
1946  }
1947  break;
1948  }
1949  case kX64Movdqu: {
1950  CpuFeatureScope sse_scope(tasm(), SSSE3);
1951  EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
1952  if (instr->HasOutput()) {
1953  __ movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1954  } else {
1955  size_t index = 0;
1956  Operand operand = i.MemoryOperand(&index);
1957  __ movdqu(operand, i.InputSimd128Register(index));
1958  }
1959  break;
1960  }
1961  case kX64BitcastFI:
1962  if (instr->InputAt(0)->IsFPStackSlot()) {
1963  __ movl(i.OutputRegister(), i.InputOperand(0));
1964  } else {
1965  __ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
1966  }
1967  break;
1968  case kX64BitcastDL:
1969  if (instr->InputAt(0)->IsFPStackSlot()) {
1970  __ movq(i.OutputRegister(), i.InputOperand(0));
1971  } else {
1972  __ Movq(i.OutputRegister(), i.InputDoubleRegister(0));
1973  }
1974  break;
1975  case kX64BitcastIF:
1976  if (instr->InputAt(0)->IsRegister()) {
1977  __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
1978  } else {
1979  __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
1980  }
1981  break;
1982  case kX64BitcastLD:
1983  if (instr->InputAt(0)->IsRegister()) {
1984  __ Movq(i.OutputDoubleRegister(), i.InputRegister(0));
1985  } else {
1986  __ Movsd(i.OutputDoubleRegister(), i.InputOperand(0));
1987  }
1988  break;
1989  case kX64Lea32: {
1990  AddressingMode mode = AddressingModeField::decode(instr->opcode());
1991  // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1992  // and addressing mode just happens to work out. The "addl"/"subl" forms
1993  // in these cases are faster based on measurements.
1994  if (i.InputRegister(0) == i.OutputRegister()) {
1995  if (mode == kMode_MRI) {
1996  int32_t constant_summand = i.InputInt32(1);
1997  DCHECK_NE(0, constant_summand);
1998  if (constant_summand > 0) {
1999  __ addl(i.OutputRegister(), Immediate(constant_summand));
2000  } else {
2001  __ subl(i.OutputRegister(), Immediate(-constant_summand));
2002  }
2003  } else if (mode == kMode_MR1) {
2004  if (i.InputRegister(1) == i.OutputRegister()) {
2005  __ shll(i.OutputRegister(), Immediate(1));
2006  } else {
2007  __ addl(i.OutputRegister(), i.InputRegister(1));
2008  }
2009  } else if (mode == kMode_M2) {
2010  __ shll(i.OutputRegister(), Immediate(1));
2011  } else if (mode == kMode_M4) {
2012  __ shll(i.OutputRegister(), Immediate(2));
2013  } else if (mode == kMode_M8) {
2014  __ shll(i.OutputRegister(), Immediate(3));
2015  } else {
2016  __ leal(i.OutputRegister(), i.MemoryOperand());
2017  }
2018  } else if (mode == kMode_MR1 &&
2019  i.InputRegister(1) == i.OutputRegister()) {
2020  __ addl(i.OutputRegister(), i.InputRegister(0));
2021  } else {
2022  __ leal(i.OutputRegister(), i.MemoryOperand());
2023  }
2024  __ AssertZeroExtended(i.OutputRegister());
2025  break;
2026  }
2027  case kX64Lea: {
2028  AddressingMode mode = AddressingModeField::decode(instr->opcode());
2029  // Shorten "leaq" to "addq", "subq" or "shlq" if the register allocation
2030  // and addressing mode just happens to work out. The "addq"/"subq" forms
2031  // in these cases are faster based on measurements.
2032  if (i.InputRegister(0) == i.OutputRegister()) {
2033  if (mode == kMode_MRI) {
2034  int32_t constant_summand = i.InputInt32(1);
2035  if (constant_summand > 0) {
2036  __ addq(i.OutputRegister(), Immediate(constant_summand));
2037  } else if (constant_summand < 0) {
2038  __ subq(i.OutputRegister(), Immediate(-constant_summand));
2039  }
2040  } else if (mode == kMode_MR1) {
2041  if (i.InputRegister(1) == i.OutputRegister()) {
2042  __ shlq(i.OutputRegister(), Immediate(1));
2043  } else {
2044  __ addq(i.OutputRegister(), i.InputRegister(1));
2045  }
2046  } else if (mode == kMode_M2) {
2047  __ shlq(i.OutputRegister(), Immediate(1));
2048  } else if (mode == kMode_M4) {
2049  __ shlq(i.OutputRegister(), Immediate(2));
2050  } else if (mode == kMode_M8) {
2051  __ shlq(i.OutputRegister(), Immediate(3));
2052  } else {
2053  __ leaq(i.OutputRegister(), i.MemoryOperand());
2054  }
2055  } else if (mode == kMode_MR1 &&
2056  i.InputRegister(1) == i.OutputRegister()) {
2057  __ addq(i.OutputRegister(), i.InputRegister(0));
2058  } else {
2059  __ leaq(i.OutputRegister(), i.MemoryOperand());
2060  }
2061  break;
2062  }
2063  case kX64Dec32:
2064  __ decl(i.OutputRegister());
2065  break;
2066  case kX64Inc32:
2067  __ incl(i.OutputRegister());
2068  break;
2069  case kX64Push:
2070  if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
2071  size_t index = 0;
2072  Operand operand = i.MemoryOperand(&index);
2073  __ pushq(operand);
2074  frame_access_state()->IncreaseSPDelta(1);
2075  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2076  kPointerSize);
2077  } else if (HasImmediateInput(instr, 0)) {
2078  __ pushq(i.InputImmediate(0));
2079  frame_access_state()->IncreaseSPDelta(1);
2080  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2081  kPointerSize);
2082  } else if (instr->InputAt(0)->IsRegister()) {
2083  __ pushq(i.InputRegister(0));
2084  frame_access_state()->IncreaseSPDelta(1);
2085  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2086  kPointerSize);
2087  } else if (instr->InputAt(0)->IsFloatRegister() ||
2088  instr->InputAt(0)->IsDoubleRegister()) {
2089  // TODO(titzer): use another machine instruction?
2090  __ subq(rsp, Immediate(kDoubleSize));
2091  frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
2092  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2093  kDoubleSize);
2094  __ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
2095  } else if (instr->InputAt(0)->IsSimd128Register()) {
2096  // TODO(titzer): use another machine instruction?
2097  __ subq(rsp, Immediate(kSimd128Size));
2098  frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
2099  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2100  kSimd128Size);
2101  __ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
2102  } else if (instr->InputAt(0)->IsStackSlot() ||
2103  instr->InputAt(0)->IsFloatStackSlot() ||
2104  instr->InputAt(0)->IsDoubleStackSlot()) {
2105  __ pushq(i.InputOperand(0));
2106  frame_access_state()->IncreaseSPDelta(1);
2107  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2108  kPointerSize);
2109  } else {
2110  DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
2111  __ Movups(kScratchDoubleReg, i.InputOperand(0));
2112  // TODO(titzer): use another machine instruction?
2113  __ subq(rsp, Immediate(kSimd128Size));
2114  frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
2115  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
2116  kSimd128Size);
2117  __ Movups(Operand(rsp, 0), kScratchDoubleReg);
2118  }
2119  break;
2120  case kX64Poke: {
2121  int slot = MiscField::decode(instr->opcode());
2122  if (HasImmediateInput(instr, 0)) {
2123  __ movq(Operand(rsp, slot * kPointerSize), i.InputImmediate(0));
2124  } else {
2125  __ movq(Operand(rsp, slot * kPointerSize), i.InputRegister(0));
2126  }
2127  break;
2128  }
2129  case kX64Peek: {
2130  int reverse_slot = i.InputInt32(0);
2131  int offset =
2132  FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
2133  if (instr->OutputAt(0)->IsFPRegister()) {
2134  LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
2135  if (op->representation() == MachineRepresentation::kFloat64) {
2136  __ Movsd(i.OutputDoubleRegister(), Operand(rbp, offset));
2137  } else {
2138  DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
2139  __ Movss(i.OutputFloatRegister(), Operand(rbp, offset));
2140  }
2141  } else {
2142  __ movq(i.OutputRegister(), Operand(rbp, offset));
2143  }
2144  break;
2145  }
2146  // TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
2147  case kX64F32x4Splat: {
2148  XMMRegister dst = i.OutputSimd128Register();
2149  if (instr->InputAt(0)->IsFPRegister()) {
2150  __ movss(dst, i.InputDoubleRegister(0));
2151  } else {
2152  __ movss(dst, i.InputOperand(0));
2153  }
2154  __ shufps(dst, dst, 0x0);
2155  break;
2156  }
2157  case kX64F32x4ExtractLane: {
2158  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2159  __ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
2160  __ movd(i.OutputDoubleRegister(), kScratchRegister);
2161  break;
2162  }
2163  case kX64F32x4ReplaceLane: {
2164  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2165  // The insertps instruction uses imm8[5:4] to indicate the lane
2166  // that needs to be replaced.
2167  byte select = i.InputInt8(1) << 4 & 0x30;
2168  __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
2169  break;
2170  }
2171  case kX64F32x4SConvertI32x4: {
2172  __ cvtdq2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2173  break;
2174  }
2175  case kX64F32x4UConvertI32x4: {
2176  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2177  DCHECK_NE(i.OutputSimd128Register(), kScratchDoubleReg);
2178  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2179  XMMRegister dst = i.OutputSimd128Register();
2180  __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
2181  __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
2182  __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
2183  __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
2184  __ psrld(dst, 1); // divide by 2 to get in unsigned range
2185  __ cvtdq2ps(dst, dst); // convert hi exactly
2186  __ addps(dst, dst); // double hi, exactly
2187  __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
2188  break;
2189  }
2190  case kX64F32x4Abs: {
2191  XMMRegister dst = i.OutputSimd128Register();
2192  XMMRegister src = i.InputSimd128Register(0);
2193  if (dst == src) {
2194  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2195  __ psrld(kScratchDoubleReg, 1);
2196  __ andps(i.OutputSimd128Register(), kScratchDoubleReg);
2197  } else {
2198  __ pcmpeqd(dst, dst);
2199  __ psrld(dst, 1);
2200  __ andps(dst, i.InputSimd128Register(0));
2201  }
2202  break;
2203  }
2204  case kX64F32x4Neg: {
2205  XMMRegister dst = i.OutputSimd128Register();
2206  XMMRegister src = i.InputSimd128Register(0);
2207  if (dst == src) {
2208  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2209  __ pslld(kScratchDoubleReg, 31);
2210  __ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
2211  } else {
2212  __ pcmpeqd(dst, dst);
2213  __ pslld(dst, 31);
2214  __ xorps(dst, i.InputSimd128Register(0));
2215  }
2216  break;
2217  }
2218  case kX64F32x4RecipApprox: {
2219  __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2220  break;
2221  }
2222  case kX64F32x4RecipSqrtApprox: {
2223  __ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
2224  break;
2225  }
2226  case kX64F32x4Add: {
2227  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2228  __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2229  break;
2230  }
2231  case kX64F32x4AddHoriz: {
2232  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2233  CpuFeatureScope sse_scope(tasm(), SSE3);
2234  __ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2235  break;
2236  }
2237  case kX64F32x4Sub: {
2238  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2239  __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2240  break;
2241  }
2242  case kX64F32x4Mul: {
2243  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2244  __ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2245  break;
2246  }
2247  case kX64F32x4Min: {
2248  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2249  __ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2250  break;
2251  }
2252  case kX64F32x4Max: {
2253  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2254  __ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2255  break;
2256  }
2257  case kX64F32x4Eq: {
2258  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2259  __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
2260  break;
2261  }
2262  case kX64F32x4Ne: {
2263  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2264  __ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
2265  break;
2266  }
2267  case kX64F32x4Lt: {
2268  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2269  __ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2270  break;
2271  }
2272  case kX64F32x4Le: {
2273  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2274  __ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
2275  break;
2276  }
2277  case kX64I32x4Splat: {
2278  XMMRegister dst = i.OutputSimd128Register();
2279  __ movd(dst, i.InputRegister(0));
2280  __ pshufd(dst, dst, 0x0);
2281  break;
2282  }
2283  case kX64I32x4ExtractLane: {
2284  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2285  __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2286  break;
2287  }
2288  case kX64I32x4ReplaceLane: {
2289  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2290  if (instr->InputAt(2)->IsRegister()) {
2291  __ Pinsrd(i.OutputSimd128Register(), i.InputRegister(2),
2292  i.InputInt8(1));
2293  } else {
2294  __ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2295  }
2296  break;
2297  }
2298  case kX64I32x4SConvertF32x4: {
2299  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2300  XMMRegister dst = i.OutputSimd128Register();
2301  // NAN->0
2302  __ movaps(kScratchDoubleReg, dst);
2303  __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2304  __ pand(dst, kScratchDoubleReg);
2305  // Set top bit if >= 0 (but not -0.0!)
2306  __ pxor(kScratchDoubleReg, dst);
2307  // Convert
2308  __ cvttps2dq(dst, dst);
2309  // Set top bit if >=0 is now < 0
2310  __ pand(kScratchDoubleReg, dst);
2311  __ psrad(kScratchDoubleReg, 31);
2312  // Set positive overflow lanes to 0x7FFFFFFF
2313  __ pxor(dst, kScratchDoubleReg);
2314  break;
2315  }
2316  case kX64I32x4SConvertI16x8Low: {
2317  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2318  __ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2319  break;
2320  }
2321  case kX64I32x4SConvertI16x8High: {
2322  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2323  XMMRegister dst = i.OutputSimd128Register();
2324  __ palignr(dst, i.InputSimd128Register(0), 8);
2325  __ pmovsxwd(dst, dst);
2326  break;
2327  }
2328  case kX64I32x4Neg: {
2329  CpuFeatureScope sse_scope(tasm(), SSSE3);
2330  XMMRegister dst = i.OutputSimd128Register();
2331  XMMRegister src = i.InputSimd128Register(0);
2332  if (dst == src) {
2333  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2334  __ psignd(dst, kScratchDoubleReg);
2335  } else {
2336  __ pxor(dst, dst);
2337  __ psubd(dst, src);
2338  }
2339  break;
2340  }
2341  case kX64I32x4Shl: {
2342  __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2343  break;
2344  }
2345  case kX64I32x4ShrS: {
2346  __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2347  break;
2348  }
2349  case kX64I32x4Add: {
2350  __ paddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2351  break;
2352  }
2353  case kX64I32x4AddHoriz: {
2354  CpuFeatureScope sse_scope(tasm(), SSSE3);
2355  __ phaddd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2356  break;
2357  }
2358  case kX64I32x4Sub: {
2359  __ psubd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2360  break;
2361  }
2362  case kX64I32x4Mul: {
2363  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2364  __ pmulld(i.OutputSimd128Register(), i.InputSimd128Register(1));
2365  break;
2366  }
2367  case kX64I32x4MinS: {
2368  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2369  __ pminsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2370  break;
2371  }
2372  case kX64I32x4MaxS: {
2373  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2374  __ pmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2375  break;
2376  }
2377  case kX64I32x4Eq: {
2378  __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2379  break;
2380  }
2381  case kX64I32x4Ne: {
2382  __ pcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2383  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2384  __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2385  break;
2386  }
2387  case kX64I32x4GtS: {
2388  __ pcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(1));
2389  break;
2390  }
2391  case kX64I32x4GeS: {
2392  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2393  XMMRegister dst = i.OutputSimd128Register();
2394  XMMRegister src = i.InputSimd128Register(1);
2395  __ pminsd(dst, src);
2396  __ pcmpeqd(dst, src);
2397  break;
2398  }
2399  case kX64I32x4UConvertF32x4: {
2400  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2401  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2402  XMMRegister dst = i.OutputSimd128Register();
2403  XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2404  // NAN->0, negative->0
2405  __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2406  __ maxps(dst, kScratchDoubleReg);
2407  // scratch: float representation of max_signed
2408  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2409  __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2410  __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2411  // tmp: convert (src-max_signed).
2412  // Positive overflow lanes -> 0x7FFFFFFF
2413  // Negative lanes -> 0
2414  __ movaps(tmp, dst);
2415  __ subps(tmp, kScratchDoubleReg);
2416  __ cmpleps(kScratchDoubleReg, tmp);
2417  __ cvttps2dq(tmp, tmp);
2418  __ pxor(tmp, kScratchDoubleReg);
2419  __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2420  __ pmaxsd(tmp, kScratchDoubleReg);
2421  // convert. Overflow lanes above max_signed will be 0x80000000
2422  __ cvttps2dq(dst, dst);
2423  // Add (src-max_signed) for overflow lanes.
2424  __ paddd(dst, tmp);
2425  break;
2426  }
2427  case kX64I32x4UConvertI16x8Low: {
2428  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2429  __ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
2430  break;
2431  }
2432  case kX64I32x4UConvertI16x8High: {
2433  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2434  XMMRegister dst = i.OutputSimd128Register();
2435  __ palignr(dst, i.InputSimd128Register(0), 8);
2436  __ pmovzxwd(dst, dst);
2437  break;
2438  }
2439  case kX64I32x4ShrU: {
2440  __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2441  break;
2442  }
2443  case kX64I32x4MinU: {
2444  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2445  __ pminud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2446  break;
2447  }
2448  case kX64I32x4MaxU: {
2449  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2450  __ pmaxud(i.OutputSimd128Register(), i.InputSimd128Register(1));
2451  break;
2452  }
2453  case kX64I32x4GtU: {
2454  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2455  XMMRegister dst = i.OutputSimd128Register();
2456  XMMRegister src = i.InputSimd128Register(1);
2457  __ pmaxud(dst, src);
2458  __ pcmpeqd(dst, src);
2459  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2460  __ pxor(dst, kScratchDoubleReg);
2461  break;
2462  }
2463  case kX64I32x4GeU: {
2464  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2465  XMMRegister dst = i.OutputSimd128Register();
2466  XMMRegister src = i.InputSimd128Register(1);
2467  __ pminud(dst, src);
2468  __ pcmpeqd(dst, src);
2469  break;
2470  }
2471  case kX64S128Zero: {
2472  XMMRegister dst = i.OutputSimd128Register();
2473  __ xorps(dst, dst);
2474  break;
2475  }
2476  case kX64I16x8Splat: {
2477  XMMRegister dst = i.OutputSimd128Register();
2478  __ movd(dst, i.InputRegister(0));
2479  __ pshuflw(dst, dst, 0x0);
2480  __ pshufd(dst, dst, 0x0);
2481  break;
2482  }
2483  case kX64I16x8ExtractLane: {
2484  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2485  Register dst = i.OutputRegister();
2486  __ pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2487  __ movsxwl(dst, dst);
2488  break;
2489  }
2490  case kX64I16x8ReplaceLane: {
2491  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2492  if (instr->InputAt(2)->IsRegister()) {
2493  __ pinsrw(i.OutputSimd128Register(), i.InputRegister(2),
2494  i.InputInt8(1));
2495  } else {
2496  __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2497  }
2498  break;
2499  }
2500  case kX64I16x8SConvertI8x16Low: {
2501  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2502  __ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2503  break;
2504  }
2505  case kX64I16x8SConvertI8x16High: {
2506  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2507  XMMRegister dst = i.OutputSimd128Register();
2508  __ palignr(dst, i.InputSimd128Register(0), 8);
2509  __ pmovsxbw(dst, dst);
2510  break;
2511  }
2512  case kX64I16x8Neg: {
2513  CpuFeatureScope sse_scope(tasm(), SSSE3);
2514  XMMRegister dst = i.OutputSimd128Register();
2515  XMMRegister src = i.InputSimd128Register(0);
2516  if (dst == src) {
2517  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2518  __ psignw(dst, kScratchDoubleReg);
2519  } else {
2520  __ pxor(dst, dst);
2521  __ psubw(dst, src);
2522  }
2523  break;
2524  }
2525  case kX64I16x8Shl: {
2526  __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2527  break;
2528  }
2529  case kX64I16x8ShrS: {
2530  __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2531  break;
2532  }
2533  case kX64I16x8SConvertI32x4: {
2534  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2535  __ packssdw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2536  break;
2537  }
2538  case kX64I16x8Add: {
2539  __ paddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2540  break;
2541  }
2542  case kX64I16x8AddSaturateS: {
2543  __ paddsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2544  break;
2545  }
2546  case kX64I16x8AddHoriz: {
2547  CpuFeatureScope sse_scope(tasm(), SSSE3);
2548  __ phaddw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2549  break;
2550  }
2551  case kX64I16x8Sub: {
2552  __ psubw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2553  break;
2554  }
2555  case kX64I16x8SubSaturateS: {
2556  __ psubsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2557  break;
2558  }
2559  case kX64I16x8Mul: {
2560  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2561  __ pmullw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2562  break;
2563  }
2564  case kX64I16x8MinS: {
2565  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2566  __ pminsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2567  break;
2568  }
2569  case kX64I16x8MaxS: {
2570  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2571  __ pmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2572  break;
2573  }
2574  case kX64I16x8Eq: {
2575  __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2576  break;
2577  }
2578  case kX64I16x8Ne: {
2579  __ pcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2580  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2581  __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2582  break;
2583  }
2584  case kX64I16x8GtS: {
2585  __ pcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2586  break;
2587  }
2588  case kX64I16x8GeS: {
2589  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2590  XMMRegister dst = i.OutputSimd128Register();
2591  XMMRegister src = i.InputSimd128Register(1);
2592  __ pminsw(dst, src);
2593  __ pcmpeqw(dst, src);
2594  break;
2595  }
2596  case kX64I16x8UConvertI8x16Low: {
2597  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2598  __ pmovzxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
2599  break;
2600  }
2601  case kX64I16x8UConvertI8x16High: {
2602  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2603  XMMRegister dst = i.OutputSimd128Register();
2604  __ palignr(dst, i.InputSimd128Register(0), 8);
2605  __ pmovzxbw(dst, dst);
2606  break;
2607  }
2608  case kX64I16x8ShrU: {
2609  __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2610  break;
2611  }
2612  case kX64I16x8UConvertI32x4: {
2613  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2614  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2615  XMMRegister dst = i.OutputSimd128Register();
2616  // Change negative lanes to 0x7FFFFFFF
2617  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2618  __ psrld(kScratchDoubleReg, 1);
2619  __ pminud(dst, kScratchDoubleReg);
2620  __ pminud(kScratchDoubleReg, i.InputSimd128Register(1));
2621  __ packusdw(dst, kScratchDoubleReg);
2622  break;
2623  }
2624  case kX64I16x8AddSaturateU: {
2625  __ paddusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2626  break;
2627  }
2628  case kX64I16x8SubSaturateU: {
2629  __ psubusw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2630  break;
2631  }
2632  case kX64I16x8MinU: {
2633  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2634  __ pminuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2635  break;
2636  }
2637  case kX64I16x8MaxU: {
2638  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2639  __ pmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(1));
2640  break;
2641  }
2642  case kX64I16x8GtU: {
2643  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2644  XMMRegister dst = i.OutputSimd128Register();
2645  XMMRegister src = i.InputSimd128Register(1);
2646  __ pmaxuw(dst, src);
2647  __ pcmpeqw(dst, src);
2648  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2649  __ pxor(dst, kScratchDoubleReg);
2650  break;
2651  }
2652  case kX64I16x8GeU: {
2653  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2654  XMMRegister dst = i.OutputSimd128Register();
2655  XMMRegister src = i.InputSimd128Register(1);
2656  __ pminuw(dst, src);
2657  __ pcmpeqw(dst, src);
2658  break;
2659  }
2660  case kX64I8x16Splat: {
2661  CpuFeatureScope sse_scope(tasm(), SSSE3);
2662  XMMRegister dst = i.OutputSimd128Register();
2663  __ movd(dst, i.InputRegister(0));
2664  __ xorps(kScratchDoubleReg, kScratchDoubleReg);
2665  __ pshufb(dst, kScratchDoubleReg);
2666  break;
2667  }
2668  case kX64I8x16ExtractLane: {
2669  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2670  Register dst = i.OutputRegister();
2671  __ pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2672  __ movsxbl(dst, dst);
2673  break;
2674  }
2675  case kX64I8x16ReplaceLane: {
2676  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2677  if (instr->InputAt(2)->IsRegister()) {
2678  __ pinsrb(i.OutputSimd128Register(), i.InputRegister(2),
2679  i.InputInt8(1));
2680  } else {
2681  __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2682  }
2683  break;
2684  }
2685  case kX64I8x16SConvertI16x8: {
2686  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2687  __ packsswb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2688  break;
2689  }
2690  case kX64I8x16Neg: {
2691  CpuFeatureScope sse_scope(tasm(), SSSE3);
2692  XMMRegister dst = i.OutputSimd128Register();
2693  XMMRegister src = i.InputSimd128Register(0);
2694  if (dst == src) {
2695  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2696  __ psignb(dst, kScratchDoubleReg);
2697  } else {
2698  __ pxor(dst, dst);
2699  __ psubb(dst, src);
2700  }
2701  break;
2702  }
2703  case kX64I8x16Shl: {
2704  XMMRegister dst = i.OutputSimd128Register();
2705  DCHECK_EQ(dst, i.InputSimd128Register(0));
2706  int8_t shift = i.InputInt8(1) & 0x7;
2707  if (shift < 4) {
2708  // For small shifts, doubling is faster.
2709  for (int i = 0; i < shift; ++i) {
2710  __ paddb(dst, dst);
2711  }
2712  } else {
2713  // Mask off the unwanted bits before word-shifting.
2714  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2715  __ psrlw(kScratchDoubleReg, 8 + shift);
2716  __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2717  __ pand(dst, kScratchDoubleReg);
2718  __ psllw(dst, shift);
2719  }
2720  break;
2721  }
2722  case kX64I8x16ShrS: {
2723  XMMRegister dst = i.OutputSimd128Register();
2724  XMMRegister src = i.InputSimd128Register(0);
2725  int8_t shift = i.InputInt8(1) & 0x7;
2726  // Unpack the bytes into words, do arithmetic shifts, and repack.
2727  __ punpckhbw(kScratchDoubleReg, src);
2728  __ punpcklbw(dst, src);
2729  __ psraw(kScratchDoubleReg, 8 + shift);
2730  __ psraw(dst, 8 + shift);
2731  __ packsswb(dst, kScratchDoubleReg);
2732  break;
2733  }
2734  case kX64I8x16Add: {
2735  __ paddb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2736  break;
2737  }
2738  case kX64I8x16AddSaturateS: {
2739  __ paddsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2740  break;
2741  }
2742  case kX64I8x16Sub: {
2743  __ psubb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2744  break;
2745  }
2746  case kX64I8x16SubSaturateS: {
2747  __ psubsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2748  break;
2749  }
2750  case kX64I8x16Mul: {
2751  XMMRegister dst = i.OutputSimd128Register();
2752  DCHECK_EQ(dst, i.InputSimd128Register(0));
2753  XMMRegister right = i.InputSimd128Register(1);
2754  XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2755  // I16x8 view of I8x16
2756  // left = AAaa AAaa ... AAaa AAaa
2757  // right= BBbb BBbb ... BBbb BBbb
2758  // t = 00AA 00AA ... 00AA 00AA
2759  // s = 00BB 00BB ... 00BB 00BB
2760  __ movaps(tmp, dst);
2761  __ movaps(kScratchDoubleReg, right);
2762  __ psrlw(tmp, 8);
2763  __ psrlw(kScratchDoubleReg, 8);
2764  // dst = left * 256
2765  __ psllw(dst, 8);
2766  // t = I16x8Mul(t, s)
2767  // => __PP __PP ... __PP __PP
2768  __ pmullw(tmp, kScratchDoubleReg);
2769  // dst = I16x8Mul(left * 256, right)
2770  // => pp__ pp__ ... pp__ pp__
2771  __ pmullw(dst, right);
2772  // t = I16x8Shl(t, 8)
2773  // => PP00 PP00 ... PP00 PP00
2774  __ psllw(tmp, 8);
2775  // dst = I16x8Shr(dst, 8)
2776  // => 00pp 00pp ... 00pp 00pp
2777  __ psrlw(dst, 8);
2778  // dst = I16x8Or(dst, t)
2779  // => PPpp PPpp ... PPpp PPpp
2780  __ por(dst, tmp);
2781  break;
2782  }
2783  case kX64I8x16MinS: {
2784  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2785  __ pminsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2786  break;
2787  }
2788  case kX64I8x16MaxS: {
2789  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2790  __ pmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2791  break;
2792  }
2793  case kX64I8x16Eq: {
2794  __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2795  break;
2796  }
2797  case kX64I8x16Ne: {
2798  __ pcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2799  __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2800  __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2801  break;
2802  }
2803  case kX64I8x16GtS: {
2804  __ pcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2805  break;
2806  }
2807  case kX64I8x16GeS: {
2808  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2809  XMMRegister dst = i.OutputSimd128Register();
2810  XMMRegister src = i.InputSimd128Register(1);
2811  __ pminsb(dst, src);
2812  __ pcmpeqb(dst, src);
2813  break;
2814  }
2815  case kX64I8x16UConvertI16x8: {
2816  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2817  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2818  XMMRegister dst = i.OutputSimd128Register();
2819  // Change negative lanes to 0x7FFF
2820  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2821  __ psrlw(kScratchDoubleReg, 1);
2822  __ pminuw(dst, kScratchDoubleReg);
2823  __ pminuw(kScratchDoubleReg, i.InputSimd128Register(1));
2824  __ packuswb(dst, kScratchDoubleReg);
2825  break;
2826  }
2827  case kX64I8x16ShrU: {
2828  XMMRegister dst = i.OutputSimd128Register();
2829  XMMRegister src = i.InputSimd128Register(0);
2830  int8_t shift = i.InputInt8(1) & 0x7;
2831  // Unpack the bytes into words, do logical shifts, and repack.
2832  __ punpckhbw(kScratchDoubleReg, src);
2833  __ punpcklbw(dst, src);
2834  __ psrlw(kScratchDoubleReg, 8 + shift);
2835  __ psrlw(dst, 8 + shift);
2836  __ packuswb(dst, kScratchDoubleReg);
2837  break;
2838  }
2839  case kX64I8x16AddSaturateU: {
2840  __ paddusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2841  break;
2842  }
2843  case kX64I8x16SubSaturateU: {
2844  __ psubusb(i.OutputSimd128Register(), i.InputSimd128Register(1));
2845  break;
2846  }
2847  case kX64I8x16MinU: {
2848  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2849  __ pminub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2850  break;
2851  }
2852  case kX64I8x16MaxU: {
2853  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2854  __ pmaxub(i.OutputSimd128Register(), i.InputSimd128Register(1));
2855  break;
2856  }
2857  case kX64I8x16GtU: {
2858  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2859  XMMRegister dst = i.OutputSimd128Register();
2860  XMMRegister src = i.InputSimd128Register(1);
2861  __ pmaxub(dst, src);
2862  __ pcmpeqb(dst, src);
2863  __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
2864  __ pxor(dst, kScratchDoubleReg);
2865  break;
2866  }
2867  case kX64I8x16GeU: {
2868  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2869  XMMRegister dst = i.OutputSimd128Register();
2870  XMMRegister src = i.InputSimd128Register(1);
2871  __ pminub(dst, src);
2872  __ pcmpeqb(dst, src);
2873  break;
2874  }
2875  case kX64S128And: {
2876  __ pand(i.OutputSimd128Register(), i.InputSimd128Register(1));
2877  break;
2878  }
2879  case kX64S128Or: {
2880  __ por(i.OutputSimd128Register(), i.InputSimd128Register(1));
2881  break;
2882  }
2883  case kX64S128Xor: {
2884  __ pxor(i.OutputSimd128Register(), i.InputSimd128Register(1));
2885  break;
2886  }
2887  case kX64S128Not: {
2888  XMMRegister dst = i.OutputSimd128Register();
2889  XMMRegister src = i.InputSimd128Register(0);
2890  if (dst == src) {
2891  __ movaps(kScratchDoubleReg, dst);
2892  __ pcmpeqd(dst, dst);
2893  __ pxor(dst, kScratchDoubleReg);
2894  } else {
2895  __ pcmpeqd(dst, dst);
2896  __ pxor(dst, src);
2897  }
2898 
2899  break;
2900  }
2901  case kX64S128Select: {
2902  // Mask used here is stored in dst.
2903  XMMRegister dst = i.OutputSimd128Register();
2904  __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
2905  __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
2906  __ andps(dst, kScratchDoubleReg);
2907  __ xorps(dst, i.InputSimd128Register(2));
2908  break;
2909  }
2910  case kX64S1x4AnyTrue:
2911  case kX64S1x8AnyTrue:
2912  case kX64S1x16AnyTrue: {
2913  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2914  Register dst = i.OutputRegister();
2915  XMMRegister src = i.InputSimd128Register(0);
2916  Register tmp = i.TempRegister(0);
2917  __ xorq(tmp, tmp);
2918  __ movq(dst, Immediate(-1));
2919  __ ptest(src, src);
2920  __ cmovq(zero, dst, tmp);
2921  break;
2922  }
2923  case kX64S1x4AllTrue:
2924  case kX64S1x8AllTrue:
2925  case kX64S1x16AllTrue: {
2926  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2927  Register dst = i.OutputRegister();
2928  XMMRegister src = i.InputSimd128Register(0);
2929  Register tmp = i.TempRegister(0);
2930  __ movq(tmp, Immediate(-1));
2931  __ xorq(dst, dst);
2932  // Compare all src lanes to false.
2933  __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2934  if (arch_opcode == kX64S1x4AllTrue) {
2935  __ pcmpeqd(kScratchDoubleReg, src);
2936  } else if (arch_opcode == kX64S1x8AllTrue) {
2937  __ pcmpeqw(kScratchDoubleReg, src);
2938  } else {
2939  __ pcmpeqb(kScratchDoubleReg, src);
2940  }
2941  // If kScratchDoubleReg is all zero, none of src lanes are false.
2942  __ ptest(kScratchDoubleReg, kScratchDoubleReg);
2943  __ cmovq(zero, dst, tmp);
2944  break;
2945  }
2946  case kX64StackCheck:
2947  __ CompareRoot(rsp, RootIndex::kStackLimit);
2948  break;
2949  case kWord32AtomicExchangeInt8: {
2950  __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
2951  __ movsxbl(i.InputRegister(0), i.InputRegister(0));
2952  break;
2953  }
2954  case kWord32AtomicExchangeUint8: {
2955  __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
2956  __ movzxbl(i.InputRegister(0), i.InputRegister(0));
2957  break;
2958  }
2959  case kWord32AtomicExchangeInt16: {
2960  __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
2961  __ movsxwl(i.InputRegister(0), i.InputRegister(0));
2962  break;
2963  }
2964  case kWord32AtomicExchangeUint16: {
2965  __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
2966  __ movzxwl(i.InputRegister(0), i.InputRegister(0));
2967  break;
2968  }
2969  case kWord32AtomicExchangeWord32: {
2970  __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
2971  break;
2972  }
2973  case kWord32AtomicCompareExchangeInt8: {
2974  __ lock();
2975  __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
2976  __ movsxbl(rax, rax);
2977  break;
2978  }
2979  case kWord32AtomicCompareExchangeUint8: {
2980  __ lock();
2981  __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
2982  __ movzxbl(rax, rax);
2983  break;
2984  }
2985  case kWord32AtomicCompareExchangeInt16: {
2986  __ lock();
2987  __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
2988  __ movsxwl(rax, rax);
2989  break;
2990  }
2991  case kWord32AtomicCompareExchangeUint16: {
2992  __ lock();
2993  __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
2994  __ movzxwl(rax, rax);
2995  break;
2996  }
2997  case kWord32AtomicCompareExchangeWord32: {
2998  __ lock();
2999  __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3000  break;
3001  }
3002 #define ATOMIC_BINOP_CASE(op, inst) \
3003  case kWord32Atomic##op##Int8: \
3004  ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3005  __ movsxbl(rax, rax); \
3006  break; \
3007  case kWord32Atomic##op##Uint8: \
3008  ASSEMBLE_ATOMIC_BINOP(inst, movb, cmpxchgb); \
3009  __ movzxbl(rax, rax); \
3010  break; \
3011  case kWord32Atomic##op##Int16: \
3012  ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3013  __ movsxwl(rax, rax); \
3014  break; \
3015  case kWord32Atomic##op##Uint16: \
3016  ASSEMBLE_ATOMIC_BINOP(inst, movw, cmpxchgw); \
3017  __ movzxwl(rax, rax); \
3018  break; \
3019  case kWord32Atomic##op##Word32: \
3020  ASSEMBLE_ATOMIC_BINOP(inst, movl, cmpxchgl); \
3021  break;
3022  ATOMIC_BINOP_CASE(Add, addl)
3023  ATOMIC_BINOP_CASE(Sub, subl)
3024  ATOMIC_BINOP_CASE(And, andl)
3025  ATOMIC_BINOP_CASE(Or, orl)
3026  ATOMIC_BINOP_CASE(Xor, xorl)
3027 #undef ATOMIC_BINOP_CASE
3028  case kX64Word64AtomicExchangeUint8: {
3029  __ xchgb(i.InputRegister(0), i.MemoryOperand(1));
3030  __ movzxbq(i.InputRegister(0), i.InputRegister(0));
3031  break;
3032  }
3033  case kX64Word64AtomicExchangeUint16: {
3034  __ xchgw(i.InputRegister(0), i.MemoryOperand(1));
3035  __ movzxwq(i.InputRegister(0), i.InputRegister(0));
3036  break;
3037  }
3038  case kX64Word64AtomicExchangeUint32: {
3039  __ xchgl(i.InputRegister(0), i.MemoryOperand(1));
3040  break;
3041  }
3042  case kX64Word64AtomicExchangeUint64: {
3043  __ xchgq(i.InputRegister(0), i.MemoryOperand(1));
3044  break;
3045  }
3046  case kX64Word64AtomicCompareExchangeUint8: {
3047  __ lock();
3048  __ cmpxchgb(i.MemoryOperand(2), i.InputRegister(1));
3049  __ movzxbq(rax, rax);
3050  break;
3051  }
3052  case kX64Word64AtomicCompareExchangeUint16: {
3053  __ lock();
3054  __ cmpxchgw(i.MemoryOperand(2), i.InputRegister(1));
3055  __ movzxwq(rax, rax);
3056  break;
3057  }
3058  case kX64Word64AtomicCompareExchangeUint32: {
3059  __ lock();
3060  __ cmpxchgl(i.MemoryOperand(2), i.InputRegister(1));
3061  break;
3062  }
3063  case kX64Word64AtomicCompareExchangeUint64: {
3064  __ lock();
3065  __ cmpxchgq(i.MemoryOperand(2), i.InputRegister(1));
3066  break;
3067  }
3068 #define ATOMIC64_BINOP_CASE(op, inst) \
3069  case kX64Word64Atomic##op##Uint8: \
3070  ASSEMBLE_ATOMIC64_BINOP(inst, movb, cmpxchgb); \
3071  __ movzxbq(rax, rax); \
3072  break; \
3073  case kX64Word64Atomic##op##Uint16: \
3074  ASSEMBLE_ATOMIC64_BINOP(inst, movw, cmpxchgw); \
3075  __ movzxwq(rax, rax); \
3076  break; \
3077  case kX64Word64Atomic##op##Uint32: \
3078  ASSEMBLE_ATOMIC64_BINOP(inst, movl, cmpxchgl); \
3079  break; \
3080  case kX64Word64Atomic##op##Uint64: \
3081  ASSEMBLE_ATOMIC64_BINOP(inst, movq, cmpxchgq); \
3082  break;
3083  ATOMIC64_BINOP_CASE(Add, addq)
3084  ATOMIC64_BINOP_CASE(Sub, subq)
3085  ATOMIC64_BINOP_CASE(And, andq)
3086  ATOMIC64_BINOP_CASE(Or, orq)
3087  ATOMIC64_BINOP_CASE(Xor, xorq)
3088 #undef ATOMIC64_BINOP_CASE
3089  case kWord32AtomicLoadInt8:
3090  case kWord32AtomicLoadUint8:
3091  case kWord32AtomicLoadInt16:
3092  case kWord32AtomicLoadUint16:
3093  case kWord32AtomicLoadWord32:
3094  case kWord32AtomicStoreWord8:
3095  case kWord32AtomicStoreWord16:
3096  case kWord32AtomicStoreWord32:
3097  case kX64Word64AtomicLoadUint8:
3098  case kX64Word64AtomicLoadUint16:
3099  case kX64Word64AtomicLoadUint32:
3100  case kX64Word64AtomicLoadUint64:
3101  case kX64Word64AtomicStoreWord8:
3102  case kX64Word64AtomicStoreWord16:
3103  case kX64Word64AtomicStoreWord32:
3104  case kX64Word64AtomicStoreWord64:
3105  UNREACHABLE(); // Won't be generated by instruction selector.
3106  break;
3107  }
3108  return kSuccess;
3109 } // NOLadability/fn_size)
3110 
3111 #undef ASSEMBLE_UNOP
3112 #undef ASSEMBLE_BINOP
3113 #undef ASSEMBLE_COMPARE
3114 #undef ASSEMBLE_MULT
3115 #undef ASSEMBLE_SHIFT
3116 #undef ASSEMBLE_MOVX
3117 #undef ASSEMBLE_SSE_BINOP
3118 #undef ASSEMBLE_SSE_UNOP
3119 #undef ASSEMBLE_AVX_BINOP
3120 #undef ASSEMBLE_IEEE754_BINOP
3121 #undef ASSEMBLE_IEEE754_UNOP
3122 #undef ASSEMBLE_ATOMIC_BINOP
3123 #undef ASSEMBLE_ATOMIC64_BINOP
3124 
3125 namespace {
3126 
3127 Condition FlagsConditionToCondition(FlagsCondition condition) {
3128  switch (condition) {
3129  case kUnorderedEqual:
3130  case kEqual:
3131  return equal;
3132  case kUnorderedNotEqual:
3133  case kNotEqual:
3134  return not_equal;
3135  case kSignedLessThan:
3136  return less;
3137  case kSignedGreaterThanOrEqual:
3138  return greater_equal;
3139  case kSignedLessThanOrEqual:
3140  return less_equal;
3141  case kSignedGreaterThan:
3142  return greater;
3143  case kUnsignedLessThan:
3144  return below;
3145  case kUnsignedGreaterThanOrEqual:
3146  return above_equal;
3147  case kUnsignedLessThanOrEqual:
3148  return below_equal;
3149  case kUnsignedGreaterThan:
3150  return above;
3151  case kOverflow:
3152  return overflow;
3153  case kNotOverflow:
3154  return no_overflow;
3155  default:
3156  break;
3157  }
3158  UNREACHABLE();
3159 }
3160 
3161 } // namespace
3162 
3163 // Assembles branches after this instruction.
3164 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3165  Label::Distance flabel_distance =
3166  branch->fallthru ? Label::kNear : Label::kFar;
3167  Label* tlabel = branch->true_label;
3168  Label* flabel = branch->false_label;
3169  if (branch->condition == kUnorderedEqual) {
3170  __ j(parity_even, flabel, flabel_distance);
3171  } else if (branch->condition == kUnorderedNotEqual) {
3172  __ j(parity_even, tlabel);
3173  }
3174  __ j(FlagsConditionToCondition(branch->condition), tlabel);
3175 
3176  if (!branch->fallthru) __ jmp(flabel, flabel_distance);
3177 }
3178 
3179 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3180  Instruction* instr) {
3181  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
3182  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
3183  return;
3184  }
3185 
3186  condition = NegateFlagsCondition(condition);
3187  __ movl(kScratchRegister, Immediate(0));
3188  __ cmovq(FlagsConditionToCondition(condition), kSpeculationPoisonRegister,
3189  kScratchRegister);
3190 }
3191 
3192 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3193  BranchInfo* branch) {
3194  Label::Distance flabel_distance =
3195  branch->fallthru ? Label::kNear : Label::kFar;
3196  Label* tlabel = branch->true_label;
3197  Label* flabel = branch->false_label;
3198  Label nodeopt;
3199  if (branch->condition == kUnorderedEqual) {
3200  __ j(parity_even, flabel, flabel_distance);
3201  } else if (branch->condition == kUnorderedNotEqual) {
3202  __ j(parity_even, tlabel);
3203  }
3204  __ j(FlagsConditionToCondition(branch->condition), tlabel);
3205 
3206  if (FLAG_deopt_every_n_times > 0) {
3207  ExternalReference counter =
3208  ExternalReference::stress_deopt_count(isolate());
3209 
3210  __ pushfq();
3211  __ pushq(rax);
3212  __ load_rax(counter);
3213  __ decl(rax);
3214  __ j(not_zero, &nodeopt);
3215 
3216  __ Set(rax, FLAG_deopt_every_n_times);
3217  __ store_rax(counter);
3218  __ popq(rax);
3219  __ popfq();
3220  __ jmp(tlabel);
3221 
3222  __ bind(&nodeopt);
3223  __ store_rax(counter);
3224  __ popq(rax);
3225  __ popfq();
3226  }
3227 
3228  if (!branch->fallthru) {
3229  __ jmp(flabel, flabel_distance);
3230  }
3231 }
3232 
3233 void CodeGenerator::AssembleArchJump(RpoNumber target) {
3234  if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3235 }
3236 
3237 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3238  FlagsCondition condition) {
3239  auto ool = new (zone()) WasmOutOfLineTrap(this, instr);
3240  Label* tlabel = ool->entry();
3241  Label end;
3242  if (condition == kUnorderedEqual) {
3243  __ j(parity_even, &end);
3244  } else if (condition == kUnorderedNotEqual) {
3245  __ j(parity_even, tlabel);
3246  }
3247  __ j(FlagsConditionToCondition(condition), tlabel);
3248  __ bind(&end);
3249 }
3250 
3251 // Assembles boolean materializations after this instruction.
3252 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3253  FlagsCondition condition) {
3254  X64OperandConverter i(this, instr);
3255  Label done;
3256 
3257  // Materialize a full 64-bit 1 or 0 value. The result register is always the
3258  // last output of the instruction.
3259  Label check;
3260  DCHECK_NE(0u, instr->OutputCount());
3261  Register reg = i.OutputRegister(instr->OutputCount() - 1);
3262  if (condition == kUnorderedEqual) {
3263  __ j(parity_odd, &check, Label::kNear);
3264  __ movl(reg, Immediate(0));
3265  __ jmp(&done, Label::kNear);
3266  } else if (condition == kUnorderedNotEqual) {
3267  __ j(parity_odd, &check, Label::kNear);
3268  __ movl(reg, Immediate(1));
3269  __ jmp(&done, Label::kNear);
3270  }
3271  __ bind(&check);
3272  __ setcc(FlagsConditionToCondition(condition), reg);
3273  __ movzxbl(reg, reg);
3274  __ bind(&done);
3275 }
3276 
3277 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3278  X64OperandConverter i(this, instr);
3279  Register input = i.InputRegister(0);
3280  std::vector<std::pair<int32_t, Label*>> cases;
3281  for (size_t index = 2; index < instr->InputCount(); index += 2) {
3282  cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3283  }
3284  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
3285  cases.data() + cases.size());
3286 }
3287 
3288 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
3289  X64OperandConverter i(this, instr);
3290  Register input = i.InputRegister(0);
3291  for (size_t index = 2; index < instr->InputCount(); index += 2) {
3292  __ cmpl(input, Immediate(i.InputInt32(index + 0)));
3293  __ j(equal, GetLabel(i.InputRpo(index + 1)));
3294  }
3295  AssembleArchJump(i.InputRpo(1));
3296 }
3297 
3298 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
3299  X64OperandConverter i(this, instr);
3300  Register input = i.InputRegister(0);
3301  int32_t const case_count = static_cast<int32_t>(instr->InputCount() - 2);
3302  Label** cases = zone()->NewArray<Label*>(case_count);
3303  for (int32_t index = 0; index < case_count; ++index) {
3304  cases[index] = GetLabel(i.InputRpo(index + 2));
3305  }
3306  Label* const table = AddJumpTable(cases, case_count);
3307  __ cmpl(input, Immediate(case_count));
3308  __ j(above_equal, GetLabel(i.InputRpo(1)));
3309  __ leaq(kScratchRegister, Operand(table));
3310  __ jmp(Operand(kScratchRegister, input, times_8, 0));
3311 }
3312 
3313 namespace {
3314 
3315 static const int kQuadWordSize = 16;
3316 
3317 } // namespace
3318 
3319 void CodeGenerator::FinishFrame(Frame* frame) {
3320  auto call_descriptor = linkage()->GetIncomingDescriptor();
3321 
3322  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3323  if (saves_fp != 0) {
3324  frame->AlignSavedCalleeRegisterSlots();
3325  if (saves_fp != 0) { // Save callee-saved XMM registers.
3326  const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3327  frame->AllocateSavedCalleeRegisterSlots(saves_fp_count *
3328  (kQuadWordSize / kPointerSize));
3329  }
3330  }
3331  const RegList saves = call_descriptor->CalleeSavedRegisters();
3332  if (saves != 0) { // Save callee-saved registers.
3333  int count = 0;
3334  for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3335  if (((1 << i) & saves)) {
3336  ++count;
3337  }
3338  }
3339  frame->AllocateSavedCalleeRegisterSlots(count);
3340  }
3341 }
3342 
3343 void CodeGenerator::AssembleConstructFrame() {
3344  auto call_descriptor = linkage()->GetIncomingDescriptor();
3345  if (frame_access_state()->has_frame()) {
3346  int pc_base = __ pc_offset();
3347 
3348  if (call_descriptor->IsCFunctionCall()) {
3349  __ pushq(rbp);
3350  __ movq(rbp, rsp);
3351  } else if (call_descriptor->IsJSFunctionCall()) {
3352  __ Prologue();
3353  if (call_descriptor->PushArgumentCount()) {
3354  __ pushq(kJavaScriptCallArgCountRegister);
3355  }
3356  } else {
3357  __ StubPrologue(info()->GetOutputStackFrameType());
3358  if (call_descriptor->IsWasmFunctionCall()) {
3359  __ pushq(kWasmInstanceRegister);
3360  } else if (call_descriptor->IsWasmImportWrapper()) {
3361  // WASM import wrappers are passed a tuple in the place of the instance.
3362  // Unpack the tuple into the instance and the target callable.
3363  // This must be done here in the codegen because it cannot be expressed
3364  // properly in the graph.
3365  __ movq(kJSFunctionRegister,
3366  Operand(kWasmInstanceRegister,
3367  Tuple2::kValue2Offset - kHeapObjectTag));
3368  __ movq(kWasmInstanceRegister,
3369  Operand(kWasmInstanceRegister,
3370  Tuple2::kValue1Offset - kHeapObjectTag));
3371  __ pushq(kWasmInstanceRegister);
3372  }
3373  }
3374 
3375  unwinding_info_writer_.MarkFrameConstructed(pc_base);
3376  }
3377  int shrink_slots = frame()->GetTotalFrameSlotCount() -
3378  call_descriptor->CalculateFixedFrameSize();
3379 
3380  if (info()->is_osr()) {
3381  // TurboFan OSR-compiled functions cannot be entered directly.
3382  __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3383 
3384  // Unoptimized code jumps directly to this entrypoint while the unoptimized
3385  // frame is still on the stack. Optimized code uses OSR values directly from
3386  // the unoptimized frame. Thus, all that needs to be done is to allocate the
3387  // remaining stack slots.
3388  if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3389  osr_pc_offset_ = __ pc_offset();
3390  shrink_slots -= static_cast<int>(osr_helper()->UnoptimizedFrameSlots());
3391  ResetSpeculationPoison();
3392  }
3393 
3394  const RegList saves = call_descriptor->CalleeSavedRegisters();
3395  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3396 
3397  if (shrink_slots > 0) {
3398  DCHECK(frame_access_state()->has_frame());
3399  if (info()->IsWasm() && shrink_slots > 128) {
3400  // For WebAssembly functions with big frames we have to do the stack
3401  // overflow check before we construct the frame. Otherwise we may not
3402  // have enough space on the stack to call the runtime for the stack
3403  // overflow.
3404  Label done;
3405 
3406  // If the frame is bigger than the stack, we throw the stack overflow
3407  // exception unconditionally. Thereby we can avoid the integer overflow
3408  // check in the condition code.
3409  if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) {
3410  __ movq(kScratchRegister,
3411  FieldOperand(kWasmInstanceRegister,
3412  WasmInstanceObject::kRealStackLimitAddressOffset));
3413  __ movq(kScratchRegister, Operand(kScratchRegister, 0));
3414  __ addq(kScratchRegister, Immediate(shrink_slots * kPointerSize));
3415  __ cmpq(rsp, kScratchRegister);
3416  __ j(above_equal, &done);
3417  }
3418  __ movp(rcx, FieldOperand(kWasmInstanceRegister,
3419  WasmInstanceObject::kCEntryStubOffset));
3420  __ Move(rsi, Smi::zero());
3421  __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, rcx);
3422  ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3423  RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3424  Safepoint::kNoLazyDeopt);
3425  __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3426  __ bind(&done);
3427  }
3428 
3429  // Skip callee-saved and return slots, which are created below.
3430  shrink_slots -= base::bits::CountPopulation(saves);
3431  shrink_slots -=
3432  base::bits::CountPopulation(saves_fp) * (kQuadWordSize / kPointerSize);
3433  shrink_slots -= frame()->GetReturnSlotCount();
3434  if (shrink_slots > 0) {
3435  __ subq(rsp, Immediate(shrink_slots * kPointerSize));
3436  }
3437  }
3438 
3439  if (saves_fp != 0) { // Save callee-saved XMM registers.
3440  const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3441  const int stack_size = saves_fp_count * kQuadWordSize;
3442  // Adjust the stack pointer.
3443  __ subp(rsp, Immediate(stack_size));
3444  // Store the registers on the stack.
3445  int slot_idx = 0;
3446  for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3447  if (!((1 << i) & saves_fp)) continue;
3448  __ movdqu(Operand(rsp, kQuadWordSize * slot_idx),
3449  XMMRegister::from_code(i));
3450  slot_idx++;
3451  }
3452  }
3453 
3454  if (saves != 0) { // Save callee-saved registers.
3455  for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
3456  if (!((1 << i) & saves)) continue;
3457  __ pushq(Register::from_code(i));
3458  }
3459  }
3460 
3461  // Allocate return slots (located after callee-saved).
3462  if (frame()->GetReturnSlotCount() > 0) {
3463  __ subq(rsp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
3464  }
3465 }
3466 
3467 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3468  auto call_descriptor = linkage()->GetIncomingDescriptor();
3469 
3470  // Restore registers.
3471  const RegList saves = call_descriptor->CalleeSavedRegisters();
3472  if (saves != 0) {
3473  const int returns = frame()->GetReturnSlotCount();
3474  if (returns != 0) {
3475  __ addq(rsp, Immediate(returns * kPointerSize));
3476  }
3477  for (int i = 0; i < Register::kNumRegisters; i++) {
3478  if (!((1 << i) & saves)) continue;
3479  __ popq(Register::from_code(i));
3480  }
3481  }
3482  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3483  if (saves_fp != 0) {
3484  const uint32_t saves_fp_count = base::bits::CountPopulation(saves_fp);
3485  const int stack_size = saves_fp_count * kQuadWordSize;
3486  // Load the registers from the stack.
3487  int slot_idx = 0;
3488  for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
3489  if (!((1 << i) & saves_fp)) continue;
3490  __ movdqu(XMMRegister::from_code(i),
3491  Operand(rsp, kQuadWordSize * slot_idx));
3492  slot_idx++;
3493  }
3494  // Adjust the stack pointer.
3495  __ addp(rsp, Immediate(stack_size));
3496  }
3497 
3498  unwinding_info_writer_.MarkBlockWillExit();
3499 
3500  // Might need rcx for scratch if pop_size is too big or if there is a variable
3501  // pop count.
3502  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rcx.bit());
3503  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & rdx.bit());
3504  size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
3505  X64OperandConverter g(this, nullptr);
3506  if (call_descriptor->IsCFunctionCall()) {
3507  AssembleDeconstructFrame();
3508  } else if (frame_access_state()->has_frame()) {
3509  if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3510  // Canonicalize JSFunction return sites for now.
3511  if (return_label_.is_bound()) {
3512  __ jmp(&return_label_);
3513  return;
3514  } else {
3515  __ bind(&return_label_);
3516  AssembleDeconstructFrame();
3517  }
3518  } else {
3519  AssembleDeconstructFrame();
3520  }
3521  }
3522 
3523  if (pop->IsImmediate()) {
3524  pop_size += g.ToConstant(pop).ToInt32() * kPointerSize;
3525  CHECK_LT(pop_size, static_cast<size_t>(std::numeric_limits<int>::max()));
3526  __ Ret(static_cast<int>(pop_size), rcx);
3527  } else {
3528  Register pop_reg = g.ToRegister(pop);
3529  Register scratch_reg = pop_reg == rcx ? rdx : rcx;
3530  __ popq(scratch_reg);
3531  __ leaq(rsp, Operand(rsp, pop_reg, times_8, static_cast<int>(pop_size)));
3532  __ jmp(scratch_reg);
3533  }
3534 }
3535 
3536 void CodeGenerator::FinishCode() { tasm()->PatchConstPool(); }
3537 
3538 void CodeGenerator::AssembleMove(InstructionOperand* source,
3539  InstructionOperand* destination) {
3540  X64OperandConverter g(this, nullptr);
3541  // Helper function to write the given constant to the dst register.
3542  auto MoveConstantToRegister = [&](Register dst, Constant src) {
3543  switch (src.type()) {
3544  case Constant::kInt32: {
3545  if (RelocInfo::IsWasmReference(src.rmode())) {
3546  __ movq(dst, src.ToInt64(), src.rmode());
3547  } else {
3548  int32_t value = src.ToInt32();
3549  if (value == 0) {
3550  __ xorl(dst, dst);
3551  } else {
3552  __ movl(dst, Immediate(value));
3553  }
3554  }
3555  break;
3556  }
3557  case Constant::kInt64:
3558  if (RelocInfo::IsWasmReference(src.rmode())) {
3559  __ movq(dst, src.ToInt64(), src.rmode());
3560  } else {
3561  __ Set(dst, src.ToInt64());
3562  }
3563  break;
3564  case Constant::kFloat32:
3565  __ MoveNumber(dst, src.ToFloat32());
3566  break;
3567  case Constant::kFloat64:
3568  __ MoveNumber(dst, src.ToFloat64().value());
3569  break;
3570  case Constant::kExternalReference:
3571  __ Move(dst, src.ToExternalReference());
3572  break;
3573  case Constant::kHeapObject: {
3574  Handle<HeapObject> src_object = src.ToHeapObject();
3575  RootIndex index;
3576  if (IsMaterializableFromRoot(src_object, &index)) {
3577  __ LoadRoot(dst, index);
3578  } else {
3579  __ Move(dst, src_object);
3580  }
3581  break;
3582  }
3583  case Constant::kDelayedStringConstant: {
3584  const StringConstantBase* src_constant = src.ToDelayedStringConstant();
3585  __ MoveStringConstant(dst, src_constant);
3586  break;
3587  }
3588  case Constant::kRpoNumber:
3589  UNREACHABLE(); // TODO(dcarney): load of labels on x64.
3590  break;
3591  }
3592  };
3593  // Helper function to write the given constant to the stack.
3594  auto MoveConstantToSlot = [&](Operand dst, Constant src) {
3595  if (!RelocInfo::IsWasmReference(src.rmode())) {
3596  switch (src.type()) {
3597  case Constant::kInt32:
3598  __ movq(dst, Immediate(src.ToInt32()));
3599  return;
3600  case Constant::kInt64:
3601  __ Set(dst, src.ToInt64());
3602  return;
3603  default:
3604  break;
3605  }
3606  }
3607  MoveConstantToRegister(kScratchRegister, src);
3608  __ movq(dst, kScratchRegister);
3609  };
3610  // Dispatch on the source and destination operand kinds.
3611  switch (MoveType::InferMove(source, destination)) {
3612  case MoveType::kRegisterToRegister:
3613  if (source->IsRegister()) {
3614  __ movq(g.ToRegister(destination), g.ToRegister(source));
3615  } else {
3616  DCHECK(source->IsFPRegister());
3617  __ Movapd(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3618  }
3619  return;
3620  case MoveType::kRegisterToStack: {
3621  Operand dst = g.ToOperand(destination);
3622  if (source->IsRegister()) {
3623  __ movq(dst, g.ToRegister(source));
3624  } else {
3625  DCHECK(source->IsFPRegister());
3626  XMMRegister src = g.ToDoubleRegister(source);
3627  MachineRepresentation rep =
3628  LocationOperand::cast(source)->representation();
3629  if (rep != MachineRepresentation::kSimd128) {
3630  __ Movsd(dst, src);
3631  } else {
3632  __ Movups(dst, src);
3633  }
3634  }
3635  return;
3636  }
3637  case MoveType::kStackToRegister: {
3638  Operand src = g.ToOperand(source);
3639  if (source->IsStackSlot()) {
3640  __ movq(g.ToRegister(destination), src);
3641  } else {
3642  DCHECK(source->IsFPStackSlot());
3643  XMMRegister dst = g.ToDoubleRegister(destination);
3644  MachineRepresentation rep =
3645  LocationOperand::cast(source)->representation();
3646  if (rep != MachineRepresentation::kSimd128) {
3647  __ Movsd(dst, src);
3648  } else {
3649  __ Movups(dst, src);
3650  }
3651  }
3652  return;
3653  }
3654  case MoveType::kStackToStack: {
3655  Operand src = g.ToOperand(source);
3656  Operand dst = g.ToOperand(destination);
3657  if (source->IsStackSlot()) {
3658  // Spill on demand to use a temporary register for memory-to-memory
3659  // moves.
3660  __ movq(kScratchRegister, src);
3661  __ movq(dst, kScratchRegister);
3662  } else {
3663  MachineRepresentation rep =
3664  LocationOperand::cast(source)->representation();
3665  if (rep != MachineRepresentation::kSimd128) {
3666  __ Movsd(kScratchDoubleReg, src);
3667  __ Movsd(dst, kScratchDoubleReg);
3668  } else {
3669  DCHECK(source->IsSimd128StackSlot());
3670  __ Movups(kScratchDoubleReg, src);
3671  __ Movups(dst, kScratchDoubleReg);
3672  }
3673  }
3674  return;
3675  }
3676  case MoveType::kConstantToRegister: {
3677  Constant src = g.ToConstant(source);
3678  if (destination->IsRegister()) {
3679  MoveConstantToRegister(g.ToRegister(destination), src);
3680  } else {
3681  DCHECK(destination->IsFPRegister());
3682  XMMRegister dst = g.ToDoubleRegister(destination);
3683  if (src.type() == Constant::kFloat32) {
3684  // TODO(turbofan): Can we do better here?
3685  __ Move(dst, bit_cast<uint32_t>(src.ToFloat32()));
3686  } else {
3687  DCHECK_EQ(src.type(), Constant::kFloat64);
3688  __ Move(dst, src.ToFloat64().AsUint64());
3689  }
3690  }
3691  return;
3692  }
3693  case MoveType::kConstantToStack: {
3694  Constant src = g.ToConstant(source);
3695  Operand dst = g.ToOperand(destination);
3696  if (destination->IsStackSlot()) {
3697  MoveConstantToSlot(dst, src);
3698  } else {
3699  DCHECK(destination->IsFPStackSlot());
3700  if (src.type() == Constant::kFloat32) {
3701  __ movl(dst, Immediate(bit_cast<uint32_t>(src.ToFloat32())));
3702  } else {
3703  DCHECK_EQ(src.type(), Constant::kFloat64);
3704  __ movq(kScratchRegister, src.ToFloat64().AsUint64());
3705  __ movq(dst, kScratchRegister);
3706  }
3707  }
3708  return;
3709  }
3710  }
3711  UNREACHABLE();
3712 }
3713 
3714 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3715  InstructionOperand* destination) {
3716  X64OperandConverter g(this, nullptr);
3717  // Dispatch on the source and destination operand kinds. Not all
3718  // combinations are possible.
3719  switch (MoveType::InferSwap(source, destination)) {
3720  case MoveType::kRegisterToRegister: {
3721  if (source->IsRegister()) {
3722  Register src = g.ToRegister(source);
3723  Register dst = g.ToRegister(destination);
3724  __ movq(kScratchRegister, src);
3725  __ movq(src, dst);
3726  __ movq(dst, kScratchRegister);
3727  } else {
3728  DCHECK(source->IsFPRegister());
3729  XMMRegister src = g.ToDoubleRegister(source);
3730  XMMRegister dst = g.ToDoubleRegister(destination);
3731  __ Movapd(kScratchDoubleReg, src);
3732  __ Movapd(src, dst);
3733  __ Movapd(dst, kScratchDoubleReg);
3734  }
3735  return;
3736  }
3737  case MoveType::kRegisterToStack: {
3738  if (source->IsRegister()) {
3739  Register src = g.ToRegister(source);
3740  __ pushq(src);
3741  frame_access_state()->IncreaseSPDelta(1);
3742  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3743  kPointerSize);
3744  __ movq(src, g.ToOperand(destination));
3745  frame_access_state()->IncreaseSPDelta(-1);
3746  __ popq(g.ToOperand(destination));
3747  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3748  -kPointerSize);
3749  } else {
3750  DCHECK(source->IsFPRegister());
3751  XMMRegister src = g.ToDoubleRegister(source);
3752  Operand dst = g.ToOperand(destination);
3753  MachineRepresentation rep =
3754  LocationOperand::cast(source)->representation();
3755  if (rep != MachineRepresentation::kSimd128) {
3756  __ Movsd(kScratchDoubleReg, src);
3757  __ Movsd(src, dst);
3758  __ Movsd(dst, kScratchDoubleReg);
3759  } else {
3760  __ Movups(kScratchDoubleReg, src);
3761  __ Movups(src, dst);
3762  __ Movups(dst, kScratchDoubleReg);
3763  }
3764  }
3765  return;
3766  }
3767  case MoveType::kStackToStack: {
3768  Operand src = g.ToOperand(source);
3769  Operand dst = g.ToOperand(destination);
3770  MachineRepresentation rep =
3771  LocationOperand::cast(source)->representation();
3772  if (rep != MachineRepresentation::kSimd128) {
3773  Register tmp = kScratchRegister;
3774  __ movq(tmp, dst);
3775  __ pushq(src); // Then use stack to copy src to destination.
3776  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3777  kPointerSize);
3778  __ popq(dst);
3779  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3780  -kPointerSize);
3781  __ movq(src, tmp);
3782  } else {
3783  // Without AVX, misaligned reads and writes will trap. Move using the
3784  // stack, in two parts.
3785  __ movups(kScratchDoubleReg, dst); // Save dst in scratch register.
3786  __ pushq(src); // Then use stack to copy src to destination.
3787  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3788  kPointerSize);
3789  __ popq(dst);
3790  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3791  -kPointerSize);
3792  __ pushq(g.ToOperand(source, kPointerSize));
3793  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3794  kPointerSize);
3795  __ popq(g.ToOperand(destination, kPointerSize));
3796  unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
3797  -kPointerSize);
3798  __ movups(src, kScratchDoubleReg);
3799  }
3800  return;
3801  }
3802  default:
3803  UNREACHABLE();
3804  break;
3805  }
3806 }
3807 
3808 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3809  for (size_t index = 0; index < target_count; ++index) {
3810  __ dq(targets[index]);
3811  }
3812 }
3813 
3814 #undef __
3815 
3816 } // namespace compiler
3817 } // namespace internal
3818 } // namespace v8
Definition: libplatform.h:13