V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
code-generator-ia32.cc
1 // Copyright 2013 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/code-generator.h"
6 
7 #include "src/assembler-inl.h"
8 #include "src/callable.h"
9 #include "src/compiler/backend/code-generator-impl.h"
10 #include "src/compiler/backend/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/frame-constants.h"
14 #include "src/frames.h"
15 #include "src/heap/heap-inl.h" // crbug.com/v8/8499
16 #include "src/ia32/assembler-ia32.h"
17 #include "src/macro-assembler.h"
18 #include "src/objects/smi.h"
19 #include "src/optimized-compilation-info.h"
20 #include "src/wasm/wasm-code-manager.h"
21 #include "src/wasm/wasm-objects.h"
22 
23 namespace v8 {
24 namespace internal {
25 namespace compiler {
26 
27 #define __ tasm()->
28 
29 #define kScratchDoubleReg xmm0
30 
31 // Adds IA-32 specific methods for decoding operands.
33  public:
35  : InstructionOperandConverter(gen, instr) {}
36 
37  Operand InputOperand(size_t index, int extra = 0) {
38  return ToOperand(instr_->InputAt(index), extra);
39  }
40 
41  Immediate InputImmediate(size_t index) {
42  return ToImmediate(instr_->InputAt(index));
43  }
44 
45  Operand OutputOperand() { return ToOperand(instr_->Output()); }
46 
47  Operand ToOperand(InstructionOperand* op, int extra = 0) {
48  if (op->IsRegister()) {
49  DCHECK_EQ(0, extra);
50  return Operand(ToRegister(op));
51  } else if (op->IsFPRegister()) {
52  DCHECK_EQ(0, extra);
53  return Operand(ToDoubleRegister(op));
54  }
55  DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
56  return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
57  }
58 
59  Operand SlotToOperand(int slot, int extra = 0) {
60  FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
61  return Operand(offset.from_stack_pointer() ? esp : ebp,
62  offset.offset() + extra);
63  }
64 
65  Immediate ToImmediate(InstructionOperand* operand) {
66  Constant constant = ToConstant(operand);
67  if (constant.type() == Constant::kInt32 &&
68  RelocInfo::IsWasmReference(constant.rmode())) {
69  return Immediate(static_cast<Address>(constant.ToInt32()),
70  constant.rmode());
71  }
72  switch (constant.type()) {
73  case Constant::kInt32:
74  return Immediate(constant.ToInt32());
75  case Constant::kFloat32:
76  return Immediate::EmbeddedNumber(constant.ToFloat32());
77  case Constant::kFloat64:
78  return Immediate::EmbeddedNumber(constant.ToFloat64().value());
79  case Constant::kExternalReference:
80  return Immediate(constant.ToExternalReference());
81  case Constant::kHeapObject:
82  return Immediate(constant.ToHeapObject());
83  case Constant::kDelayedStringConstant:
84  return Immediate::EmbeddedStringConstant(
85  constant.ToDelayedStringConstant());
86  case Constant::kInt64:
87  break;
88  case Constant::kRpoNumber:
89  return Immediate::CodeRelativeOffset(ToLabel(operand));
90  }
91  UNREACHABLE();
92  }
93 
94  static size_t NextOffset(size_t* offset) {
95  size_t i = *offset;
96  (*offset)++;
97  return i;
98  }
99 
100  static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
101  STATIC_ASSERT(0 == static_cast<int>(times_1));
102  STATIC_ASSERT(1 == static_cast<int>(times_2));
103  STATIC_ASSERT(2 == static_cast<int>(times_4));
104  STATIC_ASSERT(3 == static_cast<int>(times_8));
105  int scale = static_cast<int>(mode - one);
106  DCHECK(scale >= 0 && scale < 4);
107  return static_cast<ScaleFactor>(scale);
108  }
109 
110  Operand MemoryOperand(size_t* offset) {
111  AddressingMode mode = AddressingModeField::decode(instr_->opcode());
112  switch (mode) {
113  case kMode_MR: {
114  Register base = InputRegister(NextOffset(offset));
115  int32_t disp = 0;
116  return Operand(base, disp);
117  }
118  case kMode_MRI: {
119  Register base = InputRegister(NextOffset(offset));
120  Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
121  return Operand(base, ctant.ToInt32(), ctant.rmode());
122  }
123  case kMode_MR1:
124  case kMode_MR2:
125  case kMode_MR4:
126  case kMode_MR8: {
127  Register base = InputRegister(NextOffset(offset));
128  Register index = InputRegister(NextOffset(offset));
129  ScaleFactor scale = ScaleFor(kMode_MR1, mode);
130  int32_t disp = 0;
131  return Operand(base, index, scale, disp);
132  }
133  case kMode_MR1I:
134  case kMode_MR2I:
135  case kMode_MR4I:
136  case kMode_MR8I: {
137  Register base = InputRegister(NextOffset(offset));
138  Register index = InputRegister(NextOffset(offset));
139  ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
140  Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
141  return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
142  }
143  case kMode_M1:
144  case kMode_M2:
145  case kMode_M4:
146  case kMode_M8: {
147  Register index = InputRegister(NextOffset(offset));
148  ScaleFactor scale = ScaleFor(kMode_M1, mode);
149  int32_t disp = 0;
150  return Operand(index, scale, disp);
151  }
152  case kMode_M1I:
153  case kMode_M2I:
154  case kMode_M4I:
155  case kMode_M8I: {
156  Register index = InputRegister(NextOffset(offset));
157  ScaleFactor scale = ScaleFor(kMode_M1I, mode);
158  Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
159  return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
160  }
161  case kMode_MI: {
162  Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
163  return Operand(ctant.ToInt32(), ctant.rmode());
164  }
165  case kMode_None:
166  UNREACHABLE();
167  }
168  UNREACHABLE();
169  }
170 
171  Operand MemoryOperand(size_t first_input = 0) {
172  return MemoryOperand(&first_input);
173  }
174 
175  Operand NextMemoryOperand(size_t offset = 0) {
176  AddressingMode mode = AddressingModeField::decode(instr_->opcode());
177  Register base = InputRegister(NextOffset(&offset));
178  const int32_t disp = 4;
179  if (mode == kMode_MR1) {
180  Register index = InputRegister(NextOffset(&offset));
181  ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
182  return Operand(base, index, scale, disp);
183  } else if (mode == kMode_MRI) {
184  Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
185  return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
186  } else {
187  UNREACHABLE();
188  }
189  }
190 
191  void MoveInstructionOperandToRegister(Register destination,
192  InstructionOperand* op) {
193  if (op->IsImmediate() || op->IsConstant()) {
194  gen_->tasm()->mov(destination, ToImmediate(op));
195  } else if (op->IsRegister()) {
196  gen_->tasm()->Move(destination, ToRegister(op));
197  } else {
198  gen_->tasm()->mov(destination, ToOperand(op));
199  }
200  }
201 };
202 
203 namespace {
204 
205 bool HasImmediateInput(Instruction* instr, size_t index) {
206  return instr->InputAt(index)->IsImmediate();
207 }
208 
209 class OutOfLineLoadFloat32NaN final : public OutOfLineCode {
210  public:
211  OutOfLineLoadFloat32NaN(CodeGenerator* gen, XMMRegister result)
212  : OutOfLineCode(gen), result_(result) {}
213 
214  void Generate() final {
215  __ xorps(result_, result_);
216  __ divss(result_, result_);
217  }
218 
219  private:
220  XMMRegister const result_;
221 };
222 
223 class OutOfLineLoadFloat64NaN final : public OutOfLineCode {
224  public:
225  OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
226  : OutOfLineCode(gen), result_(result) {}
227 
228  void Generate() final {
229  __ xorpd(result_, result_);
230  __ divsd(result_, result_);
231  }
232 
233  private:
234  XMMRegister const result_;
235 };
236 
237 class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
238  public:
239  OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
240  XMMRegister input, StubCallMode stub_mode)
241  : OutOfLineCode(gen),
242  result_(result),
243  input_(input),
244  stub_mode_(stub_mode),
245  isolate_(gen->isolate()),
246  zone_(gen->zone()) {}
247 
248  void Generate() final {
249  __ sub(esp, Immediate(kDoubleSize));
250  __ movsd(MemOperand(esp, 0), input_);
251  if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
252  // A direct call to a wasm runtime stub defined in this module.
253  // Just encode the stub index. This will be patched when the code
254  // is added to the native module and copied into wasm code space.
255  __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
256  } else {
257  __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
258  }
259  __ mov(result_, MemOperand(esp, 0));
260  __ add(esp, Immediate(kDoubleSize));
261  }
262 
263  private:
264  Register const result_;
265  XMMRegister const input_;
266  StubCallMode stub_mode_;
267  Isolate* isolate_;
268  Zone* zone_;
269 };
270 
271 class OutOfLineRecordWrite final : public OutOfLineCode {
272  public:
273  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Operand operand,
274  Register value, Register scratch0, Register scratch1,
275  RecordWriteMode mode, StubCallMode stub_mode)
276  : OutOfLineCode(gen),
277  object_(object),
278  operand_(operand),
279  value_(value),
280  scratch0_(scratch0),
281  scratch1_(scratch1),
282  mode_(mode),
283  stub_mode_(stub_mode),
284  zone_(gen->zone()) {}
285 
286  void Generate() final {
287  if (mode_ > RecordWriteMode::kValueIsPointer) {
288  __ JumpIfSmi(value_, exit());
289  }
290  __ CheckPageFlag(value_, scratch0_,
291  MemoryChunk::kPointersToHereAreInterestingMask, zero,
292  exit());
293  __ lea(scratch1_, operand_);
294  RememberedSetAction const remembered_set_action =
295  mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
296  : OMIT_REMEMBERED_SET;
297  SaveFPRegsMode const save_fp_mode =
298  frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
299  if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
300  // A direct call to a wasm runtime stub defined in this module.
301  // Just encode the stub index. This will be patched when the code
302  // is added to the native module and copied into wasm code space.
303  __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
304  save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
305  } else {
306  __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
307  save_fp_mode);
308  }
309  }
310 
311  private:
312  Register const object_;
313  Operand const operand_;
314  Register const value_;
315  Register const scratch0_;
316  Register const scratch1_;
317  RecordWriteMode const mode_;
318  StubCallMode const stub_mode_;
319  Zone* zone_;
320 };
321 
322 } // namespace
323 
324 #define ASSEMBLE_COMPARE(asm_instr) \
325  do { \
326  if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
327  size_t index = 0; \
328  Operand left = i.MemoryOperand(&index); \
329  if (HasImmediateInput(instr, index)) { \
330  __ asm_instr(left, i.InputImmediate(index)); \
331  } else { \
332  __ asm_instr(left, i.InputRegister(index)); \
333  } \
334  } else { \
335  if (HasImmediateInput(instr, 1)) { \
336  if (instr->InputAt(0)->IsRegister()) { \
337  __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \
338  } else { \
339  __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
340  } \
341  } else { \
342  if (instr->InputAt(1)->IsRegister()) { \
343  __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \
344  } else { \
345  __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
346  } \
347  } \
348  } \
349  } while (0)
350 
351 #define ASSEMBLE_IEEE754_BINOP(name) \
352  do { \
353  /* Pass two doubles as arguments on the stack. */ \
354  __ PrepareCallCFunction(4, eax); \
355  __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
356  __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \
357  __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \
358  /* Return value is in st(0) on ia32. */ \
359  /* Store it into the result register. */ \
360  __ sub(esp, Immediate(kDoubleSize)); \
361  __ fstp_d(Operand(esp, 0)); \
362  __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
363  __ add(esp, Immediate(kDoubleSize)); \
364  } while (false)
365 
366 #define ASSEMBLE_IEEE754_UNOP(name) \
367  do { \
368  /* Pass one double as argument on the stack. */ \
369  __ PrepareCallCFunction(2, eax); \
370  __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \
371  __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \
372  /* Return value is in st(0) on ia32. */ \
373  /* Store it into the result register. */ \
374  __ sub(esp, Immediate(kDoubleSize)); \
375  __ fstp_d(Operand(esp, 0)); \
376  __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \
377  __ add(esp, Immediate(kDoubleSize)); \
378  } while (false)
379 
380 #define ASSEMBLE_BINOP(asm_instr) \
381  do { \
382  if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \
383  size_t index = 1; \
384  Operand right = i.MemoryOperand(&index); \
385  __ asm_instr(i.InputRegister(0), right); \
386  } else { \
387  if (HasImmediateInput(instr, 1)) { \
388  __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \
389  } else { \
390  __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \
391  } \
392  } \
393  } while (0)
394 
395 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \
396  do { \
397  Label binop; \
398  __ bind(&binop); \
399  __ mov_inst(eax, i.MemoryOperand(1)); \
400  __ Move(i.TempRegister(0), eax); \
401  __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \
402  __ lock(); \
403  __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \
404  __ j(not_equal, &binop); \
405  } while (false)
406 
407 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
408  do { \
409  Label binop; \
410  __ bind(&binop); \
411  __ mov(eax, i.MemoryOperand(2)); \
412  __ mov(edx, i.NextMemoryOperand(2)); \
413  __ push(ebx); \
414  frame_access_state()->IncreaseSPDelta(1); \
415  i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \
416  __ push(i.InputRegister(1)); \
417  __ instr1(ebx, eax); \
418  __ instr2(i.InputRegister(1), edx); \
419  __ lock(); \
420  __ cmpxchg8b(i.MemoryOperand(2)); \
421  __ pop(i.InputRegister(1)); \
422  __ pop(ebx); \
423  frame_access_state()->IncreaseSPDelta(-1); \
424  __ j(not_equal, &binop); \
425  } while (false);
426 
427 #define ASSEMBLE_MOVX(mov_instr) \
428  do { \
429  if (instr->addressing_mode() != kMode_None) { \
430  __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \
431  } else if (instr->InputAt(0)->IsRegister()) { \
432  __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \
433  } else { \
434  __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \
435  } \
436  } while (0)
437 
438 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \
439  do { \
440  XMMRegister src0 = i.InputSimd128Register(0); \
441  Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \
442  if (CpuFeatures::IsSupported(AVX)) { \
443  CpuFeatureScope avx_scope(tasm(), AVX); \
444  __ v##opcode(i.OutputSimd128Register(), src0, src1); \
445  } else { \
446  DCHECK_EQ(i.OutputSimd128Register(), src0); \
447  __ opcode(i.OutputSimd128Register(), src1); \
448  } \
449  } while (false)
450 
451 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \
452  if (CpuFeatures::IsSupported(AVX)) { \
453  CpuFeatureScope avx_scope(tasm(), AVX); \
454  __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \
455  i.InputOperand(1), imm); \
456  } else { \
457  CpuFeatureScope sse_scope(tasm(), SSELevel); \
458  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \
459  __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \
460  }
461 
462 void CodeGenerator::AssembleDeconstructFrame() {
463  __ mov(esp, ebp);
464  __ pop(ebp);
465 }
466 
467 void CodeGenerator::AssemblePrepareTailCall() {
468  if (frame_access_state()->has_frame()) {
469  __ mov(ebp, MemOperand(ebp, 0));
470  }
471  frame_access_state()->SetFrameAccessToSP();
472 }
473 
474 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
475  Register, Register,
476  Register) {
477  // There are not enough temp registers left on ia32 for a call instruction
478  // so we pick some scratch registers and save/restore them manually here.
479  int scratch_count = 3;
480  Register scratch1 = esi;
481  Register scratch2 = ecx;
482  Register scratch3 = edx;
483  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
484  Label done;
485 
486  // Check if current frame is an arguments adaptor frame.
487  __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
488  Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
489  __ j(not_equal, &done, Label::kNear);
490 
491  __ push(scratch1);
492  __ push(scratch2);
493  __ push(scratch3);
494 
495  // Load arguments count from current arguments adaptor frame (note, it
496  // does not include receiver).
497  Register caller_args_count_reg = scratch1;
498  __ mov(caller_args_count_reg,
499  Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
500  __ SmiUntag(caller_args_count_reg);
501 
502  ParameterCount callee_args_count(args_reg);
503  __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
504  scratch3, scratch_count);
505  __ pop(scratch3);
506  __ pop(scratch2);
507  __ pop(scratch1);
508 
509  __ bind(&done);
510 }
511 
512 namespace {
513 
514 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
515  FrameAccessState* state,
516  int new_slot_above_sp,
517  bool allow_shrinkage = true) {
518  int current_sp_offset = state->GetSPToFPSlotCount() +
519  StandardFrameConstants::kFixedSlotCountAboveFp;
520  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
521  if (stack_slot_delta > 0) {
522  tasm->sub(esp, Immediate(stack_slot_delta * kPointerSize));
523  state->IncreaseSPDelta(stack_slot_delta);
524  } else if (allow_shrinkage && stack_slot_delta < 0) {
525  tasm->add(esp, Immediate(-stack_slot_delta * kPointerSize));
526  state->IncreaseSPDelta(stack_slot_delta);
527  }
528 }
529 
530 #ifdef DEBUG
531 bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
532  const Instruction* instr) {
533  if (instr->OutputCount() > 0) {
534  if (converter->OutputRegister(0) != eax) return false;
535  if (instr->OutputCount() == 2 && converter->OutputRegister(1) != edx)
536  return false;
537  }
538  return true;
539 }
540 #endif
541 
542 } // namespace
543 
544 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
545  int first_unused_stack_slot) {
546  CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
547  ZoneVector<MoveOperands*> pushes(zone());
548  GetPushCompatibleMoves(instr, flags, &pushes);
549 
550  if (!pushes.empty() &&
551  (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
552  first_unused_stack_slot)) {
553  IA32OperandConverter g(this, instr);
554  for (auto move : pushes) {
555  LocationOperand destination_location(
556  LocationOperand::cast(move->destination()));
557  InstructionOperand source(move->source());
558  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
559  destination_location.index());
560  if (source.IsStackSlot()) {
561  LocationOperand source_location(LocationOperand::cast(source));
562  __ push(g.SlotToOperand(source_location.index()));
563  } else if (source.IsRegister()) {
564  LocationOperand source_location(LocationOperand::cast(source));
565  __ push(source_location.GetRegister());
566  } else if (source.IsImmediate()) {
567  __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
568  } else {
569  // Pushes of non-scalar data types is not supported.
570  UNIMPLEMENTED();
571  }
572  frame_access_state()->IncreaseSPDelta(1);
573  move->Eliminate();
574  }
575  }
576  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
577  first_unused_stack_slot, false);
578 }
579 
580 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
581  int first_unused_stack_slot) {
582  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
583  first_unused_stack_slot);
584 }
585 
586 // Check that {kJavaScriptCallCodeStartRegister} is correct.
587 void CodeGenerator::AssembleCodeStartRegisterCheck() {
588  __ push(eax); // Push eax so we can use it as a scratch register.
589  __ ComputeCodeStartAddress(eax);
590  __ cmp(eax, kJavaScriptCallCodeStartRegister);
591  __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
592  __ pop(eax); // Restore eax.
593 }
594 
595 // Check if the code object is marked for deoptimization. If it is, then it
596 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
597 // to:
598 // 1. read from memory the word that contains that bit, which can be found in
599 // the flags in the referenced {CodeDataContainer} object;
600 // 2. test kMarkedForDeoptimizationBit in those flags; and
601 // 3. if it is not zero then it jumps to the builtin.
602 void CodeGenerator::BailoutIfDeoptimized() {
603  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
604  __ push(eax); // Push eax so we can use it as a scratch register.
605  __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
606  __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
607  Immediate(1 << Code::kMarkedForDeoptimizationBit));
608  __ pop(eax); // Restore eax.
609  // Ensure we're not serializing (otherwise we'd need to use an indirection to
610  // access the builtin below).
611  DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
612  Handle<Code> code = isolate()->builtins()->builtin_handle(
613  Builtins::kCompileLazyDeoptimizedCode);
614  __ j(not_zero, code, RelocInfo::CODE_TARGET);
615 }
616 
617 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
618  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
619  UNREACHABLE();
620 }
621 
622 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
623  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
624  UNREACHABLE();
625 }
626 
627 // Assembles an instruction after register allocation, producing machine code.
628 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
629  Instruction* instr) {
630  IA32OperandConverter i(this, instr);
631  InstructionCode opcode = instr->opcode();
632  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
633  switch (arch_opcode) {
634  case kArchCallCodeObject: {
635  InstructionOperand* op = instr->InputAt(0);
636  if (op->IsImmediate()) {
637  Handle<Code> code = i.InputCode(0);
638  __ Call(code, RelocInfo::CODE_TARGET);
639  } else if (op->IsRegister()) {
640  Register reg = i.InputRegister(0);
641  DCHECK_IMPLIES(
642  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
643  reg == kJavaScriptCallCodeStartRegister);
644  __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
645  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
646  __ RetpolineCall(reg);
647  } else {
648  __ call(reg);
649  }
650  } else {
651  CHECK(tasm()->root_array_available());
652  // This is used to allow calls to the arguments adaptor trampoline from
653  // code that only has 5 gp registers available and cannot call through
654  // an immediate. This happens when the arguments adaptor trampoline is
655  // not an embedded builtin.
656  // TODO(v8:6666): Remove once only embedded builtins are supported.
657  __ push(eax);
658  frame_access_state()->IncreaseSPDelta(1);
659  Operand virtual_call_target_register(
660  kRootRegister, IsolateData::virtual_call_target_register_offset());
661  __ mov(eax, i.InputOperand(0));
662  __ add(eax, Immediate(Code::kHeaderSize - kHeapObjectTag));
663  __ mov(virtual_call_target_register, eax);
664  __ pop(eax);
665  frame_access_state()->IncreaseSPDelta(-1);
666  __ call(virtual_call_target_register);
667  }
668  RecordCallPosition(instr);
669  frame_access_state()->ClearSPDelta();
670  break;
671  }
672  case kArchCallWasmFunction: {
673  if (HasImmediateInput(instr, 0)) {
674  Constant constant = i.ToConstant(instr->InputAt(0));
675  Address wasm_code = static_cast<Address>(constant.ToInt32());
676  if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
677  __ wasm_call(wasm_code, constant.rmode());
678  } else {
679  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
680  __ RetpolineCall(wasm_code, constant.rmode());
681  } else {
682  __ call(wasm_code, constant.rmode());
683  }
684  }
685  } else {
686  Register reg = i.InputRegister(0);
687  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
688  __ RetpolineCall(reg);
689  } else {
690  __ call(reg);
691  }
692  }
693  RecordCallPosition(instr);
694  frame_access_state()->ClearSPDelta();
695  break;
696  }
697  case kArchTailCallCodeObjectFromJSFunction:
698  case kArchTailCallCodeObject: {
699  if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
700  AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
701  no_reg, no_reg, no_reg);
702  }
703  if (HasImmediateInput(instr, 0)) {
704  Handle<Code> code = i.InputCode(0);
705  __ Jump(code, RelocInfo::CODE_TARGET);
706  } else {
707  Register reg = i.InputRegister(0);
708  DCHECK_IMPLIES(
709  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
710  reg == kJavaScriptCallCodeStartRegister);
711  __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
712  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
713  __ RetpolineJump(reg);
714  } else {
715  __ jmp(reg);
716  }
717  }
718  frame_access_state()->ClearSPDelta();
719  frame_access_state()->SetFrameAccessToDefault();
720  break;
721  }
722  case kArchTailCallWasm: {
723  if (HasImmediateInput(instr, 0)) {
724  Constant constant = i.ToConstant(instr->InputAt(0));
725  Address wasm_code = static_cast<Address>(constant.ToInt32());
726  __ jmp(wasm_code, constant.rmode());
727  } else {
728  Register reg = i.InputRegister(0);
729  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
730  __ RetpolineJump(reg);
731  } else {
732  __ jmp(reg);
733  }
734  }
735  frame_access_state()->ClearSPDelta();
736  frame_access_state()->SetFrameAccessToDefault();
737  break;
738  }
739  case kArchTailCallAddress: {
740  CHECK(!HasImmediateInput(instr, 0));
741  Register reg = i.InputRegister(0);
742  DCHECK_IMPLIES(
743  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
744  reg == kJavaScriptCallCodeStartRegister);
745  if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
746  __ RetpolineJump(reg);
747  } else {
748  __ jmp(reg);
749  }
750  frame_access_state()->ClearSPDelta();
751  frame_access_state()->SetFrameAccessToDefault();
752  break;
753  }
754  case kArchCallJSFunction: {
755  Register func = i.InputRegister(0);
756  if (FLAG_debug_code) {
757  // Check the function's context matches the context argument.
758  __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
759  __ Assert(equal, AbortReason::kWrongFunctionContext);
760  }
761  static_assert(kJavaScriptCallCodeStartRegister == ecx, "ABI mismatch");
762  __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
763  __ add(ecx, Immediate(Code::kHeaderSize - kHeapObjectTag));
764  __ call(ecx);
765  RecordCallPosition(instr);
766  frame_access_state()->ClearSPDelta();
767  break;
768  }
769  case kArchPrepareCallCFunction: {
770  // Frame alignment requires using FP-relative frame addressing.
771  frame_access_state()->SetFrameAccessToFP();
772  int const num_parameters = MiscField::decode(instr->opcode());
773  __ PrepareCallCFunction(num_parameters, i.TempRegister(0));
774  break;
775  }
776  case kArchSaveCallerRegisters: {
777  fp_mode_ =
778  static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
779  DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
780  // kReturnRegister0 should have been saved before entering the stub.
781  int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
782  DCHECK_EQ(0, bytes % kPointerSize);
783  DCHECK_EQ(0, frame_access_state()->sp_delta());
784  frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
785  DCHECK(!caller_registers_saved_);
786  caller_registers_saved_ = true;
787  break;
788  }
789  case kArchRestoreCallerRegisters: {
790  DCHECK(fp_mode_ ==
791  static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
792  DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
793  // Don't overwrite the returned value.
794  int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
795  frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
796  DCHECK_EQ(0, frame_access_state()->sp_delta());
797  DCHECK(caller_registers_saved_);
798  caller_registers_saved_ = false;
799  break;
800  }
801  case kArchPrepareTailCall:
802  AssemblePrepareTailCall();
803  break;
804  case kArchCallCFunction: {
805  int const num_parameters = MiscField::decode(instr->opcode());
806  if (HasImmediateInput(instr, 0)) {
807  ExternalReference ref = i.InputExternalReference(0);
808  __ CallCFunction(ref, num_parameters);
809  } else {
810  Register func = i.InputRegister(0);
811  __ CallCFunction(func, num_parameters);
812  }
813  frame_access_state()->SetFrameAccessToDefault();
814  // Ideally, we should decrement SP delta to match the change of stack
815  // pointer in CallCFunction. However, for certain architectures (e.g.
816  // ARM), there may be more strict alignment requirement, causing old SP
817  // to be saved on the stack. In those cases, we can not calculate the SP
818  // delta statically.
819  frame_access_state()->ClearSPDelta();
820  if (caller_registers_saved_) {
821  // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
822  // Here, we assume the sequence to be:
823  // kArchSaveCallerRegisters;
824  // kArchCallCFunction;
825  // kArchRestoreCallerRegisters;
826  int bytes =
827  __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
828  frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
829  }
830  break;
831  }
832  case kArchJmp:
833  AssembleArchJump(i.InputRpo(0));
834  break;
835  case kArchBinarySearchSwitch:
836  AssembleArchBinarySearchSwitch(instr);
837  break;
838  case kArchLookupSwitch:
839  AssembleArchLookupSwitch(instr);
840  break;
841  case kArchTableSwitch:
842  AssembleArchTableSwitch(instr);
843  break;
844  case kArchComment:
845  __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
846  break;
847  case kArchDebugAbort:
848  DCHECK(i.InputRegister(0) == edx);
849  if (!frame_access_state()->has_frame()) {
850  // We don't actually want to generate a pile of code for this, so just
851  // claim there is a stack frame, without generating one.
852  FrameScope scope(tasm(), StackFrame::NONE);
853  __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
854  RelocInfo::CODE_TARGET);
855  } else {
856  __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
857  RelocInfo::CODE_TARGET);
858  }
859  __ int3();
860  break;
861  case kArchDebugBreak:
862  __ int3();
863  break;
864  case kArchNop:
865  case kArchThrowTerminator:
866  // don't emit code for nops.
867  break;
868  case kArchDeoptimize: {
869  int deopt_state_id =
870  BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
871  CodeGenResult result =
872  AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
873  if (result != kSuccess) return result;
874  break;
875  }
876  case kArchRet:
877  AssembleReturn(instr->InputAt(0));
878  break;
879  case kArchStackPointer:
880  __ mov(i.OutputRegister(), esp);
881  break;
882  case kArchFramePointer:
883  __ mov(i.OutputRegister(), ebp);
884  break;
885  case kArchParentFramePointer:
886  if (frame_access_state()->has_frame()) {
887  __ mov(i.OutputRegister(), Operand(ebp, 0));
888  } else {
889  __ mov(i.OutputRegister(), ebp);
890  }
891  break;
892  case kArchTruncateDoubleToI: {
893  auto result = i.OutputRegister();
894  auto input = i.InputDoubleRegister(0);
895  auto ool = new (zone()) OutOfLineTruncateDoubleToI(
896  this, result, input, DetermineStubCallMode());
897  __ cvttsd2si(result, Operand(input));
898  __ cmp(result, 1);
899  __ j(overflow, ool->entry());
900  __ bind(ool->exit());
901  break;
902  }
903  case kArchStoreWithWriteBarrier: {
904  RecordWriteMode mode =
905  static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
906  Register object = i.InputRegister(0);
907  size_t index = 0;
908  Operand operand = i.MemoryOperand(&index);
909  Register value = i.InputRegister(index);
910  Register scratch0 = i.TempRegister(0);
911  Register scratch1 = i.TempRegister(1);
912  auto ool = new (zone())
913  OutOfLineRecordWrite(this, object, operand, value, scratch0, scratch1,
914  mode, DetermineStubCallMode());
915  __ mov(operand, value);
916  __ CheckPageFlag(object, scratch0,
917  MemoryChunk::kPointersFromHereAreInterestingMask,
918  not_zero, ool->entry());
919  __ bind(ool->exit());
920  break;
921  }
922  case kArchStackSlot: {
923  FrameOffset offset =
924  frame_access_state()->GetFrameOffset(i.InputInt32(0));
925  Register base = offset.from_stack_pointer() ? esp : ebp;
926  __ lea(i.OutputRegister(), Operand(base, offset.offset()));
927  break;
928  }
929  case kIeee754Float64Acos:
930  ASSEMBLE_IEEE754_UNOP(acos);
931  break;
932  case kIeee754Float64Acosh:
933  ASSEMBLE_IEEE754_UNOP(acosh);
934  break;
935  case kIeee754Float64Asin:
936  ASSEMBLE_IEEE754_UNOP(asin);
937  break;
938  case kIeee754Float64Asinh:
939  ASSEMBLE_IEEE754_UNOP(asinh);
940  break;
941  case kIeee754Float64Atan:
942  ASSEMBLE_IEEE754_UNOP(atan);
943  break;
944  case kIeee754Float64Atanh:
945  ASSEMBLE_IEEE754_UNOP(atanh);
946  break;
947  case kIeee754Float64Atan2:
948  ASSEMBLE_IEEE754_BINOP(atan2);
949  break;
950  case kIeee754Float64Cbrt:
951  ASSEMBLE_IEEE754_UNOP(cbrt);
952  break;
953  case kIeee754Float64Cos:
954  ASSEMBLE_IEEE754_UNOP(cos);
955  break;
956  case kIeee754Float64Cosh:
957  ASSEMBLE_IEEE754_UNOP(cosh);
958  break;
959  case kIeee754Float64Expm1:
960  ASSEMBLE_IEEE754_UNOP(expm1);
961  break;
962  case kIeee754Float64Exp:
963  ASSEMBLE_IEEE754_UNOP(exp);
964  break;
965  case kIeee754Float64Log:
966  ASSEMBLE_IEEE754_UNOP(log);
967  break;
968  case kIeee754Float64Log1p:
969  ASSEMBLE_IEEE754_UNOP(log1p);
970  break;
971  case kIeee754Float64Log2:
972  ASSEMBLE_IEEE754_UNOP(log2);
973  break;
974  case kIeee754Float64Log10:
975  ASSEMBLE_IEEE754_UNOP(log10);
976  break;
977  case kIeee754Float64Pow: {
978  // TODO(bmeurer): Improve integration of the stub.
979  if (i.InputDoubleRegister(1) != xmm2) {
980  __ movaps(xmm2, i.InputDoubleRegister(0));
981  __ movaps(xmm1, i.InputDoubleRegister(1));
982  } else {
983  __ movaps(xmm0, i.InputDoubleRegister(0));
984  __ movaps(xmm1, xmm2);
985  __ movaps(xmm2, xmm0);
986  }
987  __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
988  __ movaps(i.OutputDoubleRegister(), xmm3);
989  break;
990  }
991  case kIeee754Float64Sin:
992  ASSEMBLE_IEEE754_UNOP(sin);
993  break;
994  case kIeee754Float64Sinh:
995  ASSEMBLE_IEEE754_UNOP(sinh);
996  break;
997  case kIeee754Float64Tan:
998  ASSEMBLE_IEEE754_UNOP(tan);
999  break;
1000  case kIeee754Float64Tanh:
1001  ASSEMBLE_IEEE754_UNOP(tanh);
1002  break;
1003  case kIA32Add:
1004  ASSEMBLE_BINOP(add);
1005  break;
1006  case kIA32And:
1007  ASSEMBLE_BINOP(and_);
1008  break;
1009  case kIA32Cmp:
1010  ASSEMBLE_COMPARE(cmp);
1011  break;
1012  case kIA32Cmp16:
1013  ASSEMBLE_COMPARE(cmpw);
1014  break;
1015  case kIA32Cmp8:
1016  ASSEMBLE_COMPARE(cmpb);
1017  break;
1018  case kIA32Test:
1019  ASSEMBLE_COMPARE(test);
1020  break;
1021  case kIA32Test16:
1022  ASSEMBLE_COMPARE(test_w);
1023  break;
1024  case kIA32Test8:
1025  ASSEMBLE_COMPARE(test_b);
1026  break;
1027  case kIA32Imul:
1028  if (HasImmediateInput(instr, 1)) {
1029  __ imul(i.OutputRegister(), i.InputOperand(0), i.InputInt32(1));
1030  } else {
1031  __ imul(i.OutputRegister(), i.InputOperand(1));
1032  }
1033  break;
1034  case kIA32ImulHigh:
1035  __ imul(i.InputRegister(1));
1036  break;
1037  case kIA32UmulHigh:
1038  __ mul(i.InputRegister(1));
1039  break;
1040  case kIA32Idiv:
1041  __ cdq();
1042  __ idiv(i.InputOperand(1));
1043  break;
1044  case kIA32Udiv:
1045  __ Move(edx, Immediate(0));
1046  __ div(i.InputOperand(1));
1047  break;
1048  case kIA32Not:
1049  __ not_(i.OutputOperand());
1050  break;
1051  case kIA32Neg:
1052  __ neg(i.OutputOperand());
1053  break;
1054  case kIA32Or:
1055  ASSEMBLE_BINOP(or_);
1056  break;
1057  case kIA32Xor:
1058  ASSEMBLE_BINOP(xor_);
1059  break;
1060  case kIA32Sub:
1061  ASSEMBLE_BINOP(sub);
1062  break;
1063  case kIA32Shl:
1064  if (HasImmediateInput(instr, 1)) {
1065  __ shl(i.OutputOperand(), i.InputInt5(1));
1066  } else {
1067  __ shl_cl(i.OutputOperand());
1068  }
1069  break;
1070  case kIA32Shr:
1071  if (HasImmediateInput(instr, 1)) {
1072  __ shr(i.OutputOperand(), i.InputInt5(1));
1073  } else {
1074  __ shr_cl(i.OutputOperand());
1075  }
1076  break;
1077  case kIA32Sar:
1078  if (HasImmediateInput(instr, 1)) {
1079  __ sar(i.OutputOperand(), i.InputInt5(1));
1080  } else {
1081  __ sar_cl(i.OutputOperand());
1082  }
1083  break;
1084  case kIA32AddPair: {
1085  // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1086  // i.InputRegister(1) ... left high word.
1087  // i.InputRegister(2) ... right low word.
1088  // i.InputRegister(3) ... right high word.
1089  bool use_temp = false;
1090  if ((instr->InputAt(1)->IsRegister() &&
1091  i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1092  i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1093  // We cannot write to the output register directly, because it would
1094  // overwrite an input for adc. We have to use the temp register.
1095  use_temp = true;
1096  __ Move(i.TempRegister(0), i.InputRegister(0));
1097  __ add(i.TempRegister(0), i.InputRegister(2));
1098  } else {
1099  __ add(i.OutputRegister(0), i.InputRegister(2));
1100  }
1101  i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1102  instr->InputAt(1));
1103  __ adc(i.OutputRegister(1), Operand(i.InputRegister(3)));
1104  if (use_temp) {
1105  __ Move(i.OutputRegister(0), i.TempRegister(0));
1106  }
1107  break;
1108  }
1109  case kIA32SubPair: {
1110  // i.OutputRegister(0) == i.InputRegister(0) ... left low word.
1111  // i.InputRegister(1) ... left high word.
1112  // i.InputRegister(2) ... right low word.
1113  // i.InputRegister(3) ... right high word.
1114  bool use_temp = false;
1115  if ((instr->InputAt(1)->IsRegister() &&
1116  i.OutputRegister(0).code() == i.InputRegister(1).code()) ||
1117  i.OutputRegister(0).code() == i.InputRegister(3).code()) {
1118  // We cannot write to the output register directly, because it would
1119  // overwrite an input for adc. We have to use the temp register.
1120  use_temp = true;
1121  __ Move(i.TempRegister(0), i.InputRegister(0));
1122  __ sub(i.TempRegister(0), i.InputRegister(2));
1123  } else {
1124  __ sub(i.OutputRegister(0), i.InputRegister(2));
1125  }
1126  i.MoveInstructionOperandToRegister(i.OutputRegister(1),
1127  instr->InputAt(1));
1128  __ sbb(i.OutputRegister(1), Operand(i.InputRegister(3)));
1129  if (use_temp) {
1130  __ Move(i.OutputRegister(0), i.TempRegister(0));
1131  }
1132  break;
1133  }
1134  case kIA32MulPair: {
1135  __ imul(i.OutputRegister(1), i.InputOperand(0));
1136  i.MoveInstructionOperandToRegister(i.TempRegister(0), instr->InputAt(1));
1137  __ imul(i.TempRegister(0), i.InputOperand(2));
1138  __ add(i.OutputRegister(1), i.TempRegister(0));
1139  __ mov(i.OutputRegister(0), i.InputOperand(0));
1140  // Multiplies the low words and stores them in eax and edx.
1141  __ mul(i.InputRegister(2));
1142  __ add(i.OutputRegister(1), i.TempRegister(0));
1143 
1144  break;
1145  }
1146  case kIA32ShlPair:
1147  if (HasImmediateInput(instr, 2)) {
1148  __ ShlPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1149  } else {
1150  // Shift has been loaded into CL by the register allocator.
1151  __ ShlPair_cl(i.InputRegister(1), i.InputRegister(0));
1152  }
1153  break;
1154  case kIA32ShrPair:
1155  if (HasImmediateInput(instr, 2)) {
1156  __ ShrPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1157  } else {
1158  // Shift has been loaded into CL by the register allocator.
1159  __ ShrPair_cl(i.InputRegister(1), i.InputRegister(0));
1160  }
1161  break;
1162  case kIA32SarPair:
1163  if (HasImmediateInput(instr, 2)) {
1164  __ SarPair(i.InputRegister(1), i.InputRegister(0), i.InputInt6(2));
1165  } else {
1166  // Shift has been loaded into CL by the register allocator.
1167  __ SarPair_cl(i.InputRegister(1), i.InputRegister(0));
1168  }
1169  break;
1170  case kIA32Ror:
1171  if (HasImmediateInput(instr, 1)) {
1172  __ ror(i.OutputOperand(), i.InputInt5(1));
1173  } else {
1174  __ ror_cl(i.OutputOperand());
1175  }
1176  break;
1177  case kIA32Lzcnt:
1178  __ Lzcnt(i.OutputRegister(), i.InputOperand(0));
1179  break;
1180  case kIA32Tzcnt:
1181  __ Tzcnt(i.OutputRegister(), i.InputOperand(0));
1182  break;
1183  case kIA32Popcnt:
1184  __ Popcnt(i.OutputRegister(), i.InputOperand(0));
1185  break;
1186  case kIA32Bswap:
1187  __ bswap(i.OutputRegister());
1188  break;
1189  case kArchWordPoisonOnSpeculation:
1190  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
1191  UNREACHABLE();
1192  break;
1193  case kLFence:
1194  __ lfence();
1195  break;
1196  case kSSEFloat32Cmp:
1197  __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1198  break;
1199  case kSSEFloat32Add:
1200  __ addss(i.InputDoubleRegister(0), i.InputOperand(1));
1201  break;
1202  case kSSEFloat32Sub:
1203  __ subss(i.InputDoubleRegister(0), i.InputOperand(1));
1204  break;
1205  case kSSEFloat32Mul:
1206  __ mulss(i.InputDoubleRegister(0), i.InputOperand(1));
1207  break;
1208  case kSSEFloat32Div:
1209  __ divss(i.InputDoubleRegister(0), i.InputOperand(1));
1210  // Don't delete this mov. It may improve performance on some CPUs,
1211  // when there is a (v)mulss depending on the result.
1212  __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1213  break;
1214  case kSSEFloat32Sqrt:
1215  __ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
1216  break;
1217  case kSSEFloat32Abs: {
1218  // TODO(bmeurer): Use 128-bit constants.
1219  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1220  __ psrlq(kScratchDoubleReg, 33);
1221  __ andps(i.OutputDoubleRegister(), kScratchDoubleReg);
1222  break;
1223  }
1224  case kSSEFloat32Neg: {
1225  // TODO(bmeurer): Use 128-bit constants.
1226  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1227  __ psllq(kScratchDoubleReg, 31);
1228  __ xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
1229  break;
1230  }
1231  case kSSEFloat32Round: {
1232  CpuFeatureScope sse_scope(tasm(), SSE4_1);
1233  RoundingMode const mode =
1234  static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1235  __ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1236  break;
1237  }
1238  case kSSEFloat64Cmp:
1239  __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1240  break;
1241  case kSSEFloat64Add:
1242  __ addsd(i.InputDoubleRegister(0), i.InputOperand(1));
1243  break;
1244  case kSSEFloat64Sub:
1245  __ subsd(i.InputDoubleRegister(0), i.InputOperand(1));
1246  break;
1247  case kSSEFloat64Mul:
1248  __ mulsd(i.InputDoubleRegister(0), i.InputOperand(1));
1249  break;
1250  case kSSEFloat64Div:
1251  __ divsd(i.InputDoubleRegister(0), i.InputOperand(1));
1252  // Don't delete this mov. It may improve performance on some CPUs,
1253  // when there is a (v)mulsd depending on the result.
1254  __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1255  break;
1256  case kSSEFloat32Max: {
1257  Label compare_nan, compare_swap, done_compare;
1258  if (instr->InputAt(1)->IsFPRegister()) {
1259  __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1260  } else {
1261  __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1262  }
1263  auto ool =
1264  new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1265  __ j(parity_even, ool->entry());
1266  __ j(above, &done_compare, Label::kNear);
1267  __ j(below, &compare_swap, Label::kNear);
1268  __ movmskps(i.TempRegister(0), i.InputDoubleRegister(0));
1269  __ test(i.TempRegister(0), Immediate(1));
1270  __ j(zero, &done_compare, Label::kNear);
1271  __ bind(&compare_swap);
1272  if (instr->InputAt(1)->IsFPRegister()) {
1273  __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1274  } else {
1275  __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1276  }
1277  __ bind(&done_compare);
1278  __ bind(ool->exit());
1279  break;
1280  }
1281 
1282  case kSSEFloat64Max: {
1283  Label compare_nan, compare_swap, done_compare;
1284  if (instr->InputAt(1)->IsFPRegister()) {
1285  __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1286  } else {
1287  __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1288  }
1289  auto ool =
1290  new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1291  __ j(parity_even, ool->entry());
1292  __ j(above, &done_compare, Label::kNear);
1293  __ j(below, &compare_swap, Label::kNear);
1294  __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(0));
1295  __ test(i.TempRegister(0), Immediate(1));
1296  __ j(zero, &done_compare, Label::kNear);
1297  __ bind(&compare_swap);
1298  if (instr->InputAt(1)->IsFPRegister()) {
1299  __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1300  } else {
1301  __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1302  }
1303  __ bind(&done_compare);
1304  __ bind(ool->exit());
1305  break;
1306  }
1307  case kSSEFloat32Min: {
1308  Label compare_swap, done_compare;
1309  if (instr->InputAt(1)->IsFPRegister()) {
1310  __ ucomiss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1311  } else {
1312  __ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
1313  }
1314  auto ool =
1315  new (zone()) OutOfLineLoadFloat32NaN(this, i.OutputDoubleRegister());
1316  __ j(parity_even, ool->entry());
1317  __ j(below, &done_compare, Label::kNear);
1318  __ j(above, &compare_swap, Label::kNear);
1319  if (instr->InputAt(1)->IsFPRegister()) {
1320  __ movmskps(i.TempRegister(0), i.InputDoubleRegister(1));
1321  } else {
1322  __ movss(kScratchDoubleReg, i.InputOperand(1));
1323  __ movmskps(i.TempRegister(0), kScratchDoubleReg);
1324  }
1325  __ test(i.TempRegister(0), Immediate(1));
1326  __ j(zero, &done_compare, Label::kNear);
1327  __ bind(&compare_swap);
1328  if (instr->InputAt(1)->IsFPRegister()) {
1329  __ movss(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1330  } else {
1331  __ movss(i.InputDoubleRegister(0), i.InputOperand(1));
1332  }
1333  __ bind(&done_compare);
1334  __ bind(ool->exit());
1335  break;
1336  }
1337  case kSSEFloat64Min: {
1338  Label compare_swap, done_compare;
1339  if (instr->InputAt(1)->IsFPRegister()) {
1340  __ ucomisd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1341  } else {
1342  __ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
1343  }
1344  auto ool =
1345  new (zone()) OutOfLineLoadFloat64NaN(this, i.OutputDoubleRegister());
1346  __ j(parity_even, ool->entry());
1347  __ j(below, &done_compare, Label::kNear);
1348  __ j(above, &compare_swap, Label::kNear);
1349  if (instr->InputAt(1)->IsFPRegister()) {
1350  __ movmskpd(i.TempRegister(0), i.InputDoubleRegister(1));
1351  } else {
1352  __ movsd(kScratchDoubleReg, i.InputOperand(1));
1353  __ movmskpd(i.TempRegister(0), kScratchDoubleReg);
1354  }
1355  __ test(i.TempRegister(0), Immediate(1));
1356  __ j(zero, &done_compare, Label::kNear);
1357  __ bind(&compare_swap);
1358  if (instr->InputAt(1)->IsFPRegister()) {
1359  __ movsd(i.InputDoubleRegister(0), i.InputDoubleRegister(1));
1360  } else {
1361  __ movsd(i.InputDoubleRegister(0), i.InputOperand(1));
1362  }
1363  __ bind(&done_compare);
1364  __ bind(ool->exit());
1365  break;
1366  }
1367  case kSSEFloat64Mod: {
1368  Register tmp = i.TempRegister(1);
1369  __ mov(tmp, esp);
1370  __ sub(esp, Immediate(kDoubleSize));
1371  __ and_(esp, -8); // align to 8 byte boundary.
1372  // Move values to st(0) and st(1).
1373  __ movsd(Operand(esp, 0), i.InputDoubleRegister(1));
1374  __ fld_d(Operand(esp, 0));
1375  __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1376  __ fld_d(Operand(esp, 0));
1377  // Loop while fprem isn't done.
1378  Label mod_loop;
1379  __ bind(&mod_loop);
1380  // This instruction traps on all kinds of inputs, but we are assuming the
1381  // floating point control word is set to ignore them all.
1382  __ fprem();
1383  // fnstsw_ax clobbers eax.
1384  DCHECK_EQ(eax, i.TempRegister(0));
1385  __ fnstsw_ax();
1386  __ sahf();
1387  __ j(parity_even, &mod_loop);
1388  // Move output to stack and clean up.
1389  __ fstp(1);
1390  __ fstp_d(Operand(esp, 0));
1391  __ movsd(i.OutputDoubleRegister(), Operand(esp, 0));
1392  __ mov(esp, tmp);
1393  break;
1394  }
1395  case kSSEFloat64Abs: {
1396  // TODO(bmeurer): Use 128-bit constants.
1397  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1398  __ psrlq(kScratchDoubleReg, 1);
1399  __ andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1400  break;
1401  }
1402  case kSSEFloat64Neg: {
1403  // TODO(bmeurer): Use 128-bit constants.
1404  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1405  __ psllq(kScratchDoubleReg, 63);
1406  __ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
1407  break;
1408  }
1409  case kSSEFloat64Sqrt:
1410  __ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
1411  break;
1412  case kSSEFloat64Round: {
1413  CpuFeatureScope sse_scope(tasm(), SSE4_1);
1414  RoundingMode const mode =
1415  static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
1416  __ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
1417  break;
1418  }
1419  case kSSEFloat32ToFloat64:
1420  __ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1421  break;
1422  case kSSEFloat64ToFloat32:
1423  __ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1424  break;
1425  case kSSEFloat32ToInt32:
1426  __ cvttss2si(i.OutputRegister(), i.InputOperand(0));
1427  break;
1428  case kSSEFloat32ToUint32:
1429  __ Cvttss2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
1430  break;
1431  case kSSEFloat64ToInt32:
1432  __ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
1433  break;
1434  case kSSEFloat64ToUint32:
1435  __ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0), kScratchDoubleReg);
1436  break;
1437  case kSSEInt32ToFloat32:
1438  __ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
1439  break;
1440  case kSSEUint32ToFloat32:
1441  __ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
1442  i.TempRegister(0));
1443  break;
1444  case kSSEInt32ToFloat64:
1445  __ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
1446  break;
1447  case kSSEUint32ToFloat64:
1448  __ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
1449  i.TempRegister(0));
1450  break;
1451  case kSSEFloat64ExtractLowWord32:
1452  if (instr->InputAt(0)->IsFPStackSlot()) {
1453  __ mov(i.OutputRegister(), i.InputOperand(0));
1454  } else {
1455  __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1456  }
1457  break;
1458  case kSSEFloat64ExtractHighWord32:
1459  if (instr->InputAt(0)->IsFPStackSlot()) {
1460  __ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
1461  } else {
1462  __ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
1463  }
1464  break;
1465  case kSSEFloat64InsertLowWord32:
1466  __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
1467  break;
1468  case kSSEFloat64InsertHighWord32:
1469  __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
1470  break;
1471  case kSSEFloat64LoadLowWord32:
1472  __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
1473  break;
1474  case kAVXFloat32Add: {
1475  CpuFeatureScope avx_scope(tasm(), AVX);
1476  __ vaddss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1477  i.InputOperand(1));
1478  break;
1479  }
1480  case kAVXFloat32Sub: {
1481  CpuFeatureScope avx_scope(tasm(), AVX);
1482  __ vsubss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1483  i.InputOperand(1));
1484  break;
1485  }
1486  case kAVXFloat32Mul: {
1487  CpuFeatureScope avx_scope(tasm(), AVX);
1488  __ vmulss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1489  i.InputOperand(1));
1490  break;
1491  }
1492  case kAVXFloat32Div: {
1493  CpuFeatureScope avx_scope(tasm(), AVX);
1494  __ vdivss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1495  i.InputOperand(1));
1496  // Don't delete this mov. It may improve performance on some CPUs,
1497  // when there is a (v)mulss depending on the result.
1498  __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1499  break;
1500  }
1501  case kAVXFloat64Add: {
1502  CpuFeatureScope avx_scope(tasm(), AVX);
1503  __ vaddsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1504  i.InputOperand(1));
1505  break;
1506  }
1507  case kAVXFloat64Sub: {
1508  CpuFeatureScope avx_scope(tasm(), AVX);
1509  __ vsubsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1510  i.InputOperand(1));
1511  break;
1512  }
1513  case kAVXFloat64Mul: {
1514  CpuFeatureScope avx_scope(tasm(), AVX);
1515  __ vmulsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1516  i.InputOperand(1));
1517  break;
1518  }
1519  case kAVXFloat64Div: {
1520  CpuFeatureScope avx_scope(tasm(), AVX);
1521  __ vdivsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1522  i.InputOperand(1));
1523  // Don't delete this mov. It may improve performance on some CPUs,
1524  // when there is a (v)mulsd depending on the result.
1525  __ movaps(i.OutputDoubleRegister(), i.OutputDoubleRegister());
1526  break;
1527  }
1528  case kAVXFloat32Abs: {
1529  // TODO(bmeurer): Use RIP relative 128-bit constants.
1530  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1531  __ psrlq(kScratchDoubleReg, 33);
1532  CpuFeatureScope avx_scope(tasm(), AVX);
1533  __ vandps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1534  break;
1535  }
1536  case kAVXFloat32Neg: {
1537  // TODO(bmeurer): Use RIP relative 128-bit constants.
1538  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1539  __ psllq(kScratchDoubleReg, 31);
1540  CpuFeatureScope avx_scope(tasm(), AVX);
1541  __ vxorps(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1542  break;
1543  }
1544  case kAVXFloat64Abs: {
1545  // TODO(bmeurer): Use RIP relative 128-bit constants.
1546  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1547  __ psrlq(kScratchDoubleReg, 1);
1548  CpuFeatureScope avx_scope(tasm(), AVX);
1549  __ vandpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1550  break;
1551  }
1552  case kAVXFloat64Neg: {
1553  // TODO(bmeurer): Use RIP relative 128-bit constants.
1554  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1555  __ psllq(kScratchDoubleReg, 63);
1556  CpuFeatureScope avx_scope(tasm(), AVX);
1557  __ vxorpd(i.OutputDoubleRegister(), kScratchDoubleReg, i.InputOperand(0));
1558  break;
1559  }
1560  case kSSEFloat64SilenceNaN:
1561  __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
1562  __ subsd(i.InputDoubleRegister(0), kScratchDoubleReg);
1563  break;
1564  case kIA32Movsxbl:
1565  ASSEMBLE_MOVX(movsx_b);
1566  break;
1567  case kIA32Movzxbl:
1568  ASSEMBLE_MOVX(movzx_b);
1569  break;
1570  case kIA32Movb: {
1571  size_t index = 0;
1572  Operand operand = i.MemoryOperand(&index);
1573  if (HasImmediateInput(instr, index)) {
1574  __ mov_b(operand, i.InputInt8(index));
1575  } else {
1576  __ mov_b(operand, i.InputRegister(index));
1577  }
1578  break;
1579  }
1580  case kIA32Movsxwl:
1581  ASSEMBLE_MOVX(movsx_w);
1582  break;
1583  case kIA32Movzxwl:
1584  ASSEMBLE_MOVX(movzx_w);
1585  break;
1586  case kIA32Movw: {
1587  size_t index = 0;
1588  Operand operand = i.MemoryOperand(&index);
1589  if (HasImmediateInput(instr, index)) {
1590  __ mov_w(operand, i.InputInt16(index));
1591  } else {
1592  __ mov_w(operand, i.InputRegister(index));
1593  }
1594  break;
1595  }
1596  case kIA32Movl:
1597  if (instr->HasOutput()) {
1598  __ mov(i.OutputRegister(), i.MemoryOperand());
1599  } else {
1600  size_t index = 0;
1601  Operand operand = i.MemoryOperand(&index);
1602  if (HasImmediateInput(instr, index)) {
1603  __ mov(operand, i.InputImmediate(index));
1604  } else {
1605  __ mov(operand, i.InputRegister(index));
1606  }
1607  }
1608  break;
1609  case kIA32Movsd:
1610  if (instr->HasOutput()) {
1611  __ movsd(i.OutputDoubleRegister(), i.MemoryOperand());
1612  } else {
1613  size_t index = 0;
1614  Operand operand = i.MemoryOperand(&index);
1615  __ movsd(operand, i.InputDoubleRegister(index));
1616  }
1617  break;
1618  case kIA32Movss:
1619  if (instr->HasOutput()) {
1620  __ movss(i.OutputDoubleRegister(), i.MemoryOperand());
1621  } else {
1622  size_t index = 0;
1623  Operand operand = i.MemoryOperand(&index);
1624  __ movss(operand, i.InputDoubleRegister(index));
1625  }
1626  break;
1627  case kIA32Movdqu:
1628  if (instr->HasOutput()) {
1629  __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
1630  } else {
1631  size_t index = 0;
1632  Operand operand = i.MemoryOperand(&index);
1633  __ Movdqu(operand, i.InputSimd128Register(index));
1634  }
1635  break;
1636  case kIA32BitcastFI:
1637  if (instr->InputAt(0)->IsFPStackSlot()) {
1638  __ mov(i.OutputRegister(), i.InputOperand(0));
1639  } else {
1640  __ movd(i.OutputRegister(), i.InputDoubleRegister(0));
1641  }
1642  break;
1643  case kIA32BitcastIF:
1644  if (instr->InputAt(0)->IsRegister()) {
1645  __ movd(i.OutputDoubleRegister(), i.InputRegister(0));
1646  } else {
1647  __ movss(i.OutputDoubleRegister(), i.InputOperand(0));
1648  }
1649  break;
1650  case kIA32Lea: {
1651  AddressingMode mode = AddressingModeField::decode(instr->opcode());
1652  // Shorten "leal" to "addl", "subl" or "shll" if the register allocation
1653  // and addressing mode just happens to work out. The "addl"/"subl" forms
1654  // in these cases are faster based on measurements.
1655  if (mode == kMode_MI) {
1656  __ Move(i.OutputRegister(), Immediate(i.InputInt32(0)));
1657  } else if (i.InputRegister(0) == i.OutputRegister()) {
1658  if (mode == kMode_MRI) {
1659  int32_t constant_summand = i.InputInt32(1);
1660  if (constant_summand > 0) {
1661  __ add(i.OutputRegister(), Immediate(constant_summand));
1662  } else if (constant_summand < 0) {
1663  __ sub(i.OutputRegister(), Immediate(-constant_summand));
1664  }
1665  } else if (mode == kMode_MR1) {
1666  if (i.InputRegister(1) == i.OutputRegister()) {
1667  __ shl(i.OutputRegister(), 1);
1668  } else {
1669  __ add(i.OutputRegister(), i.InputRegister(1));
1670  }
1671  } else if (mode == kMode_M2) {
1672  __ shl(i.OutputRegister(), 1);
1673  } else if (mode == kMode_M4) {
1674  __ shl(i.OutputRegister(), 2);
1675  } else if (mode == kMode_M8) {
1676  __ shl(i.OutputRegister(), 3);
1677  } else {
1678  __ lea(i.OutputRegister(), i.MemoryOperand());
1679  }
1680  } else if (mode == kMode_MR1 &&
1681  i.InputRegister(1) == i.OutputRegister()) {
1682  __ add(i.OutputRegister(), i.InputRegister(0));
1683  } else {
1684  __ lea(i.OutputRegister(), i.MemoryOperand());
1685  }
1686  break;
1687  }
1688  case kIA32PushFloat32:
1689  if (instr->InputAt(0)->IsFPRegister()) {
1690  __ sub(esp, Immediate(kFloatSize));
1691  __ movss(Operand(esp, 0), i.InputDoubleRegister(0));
1692  frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1693  } else if (HasImmediateInput(instr, 0)) {
1694  __ Move(kScratchDoubleReg, i.InputFloat32(0));
1695  __ sub(esp, Immediate(kFloatSize));
1696  __ movss(Operand(esp, 0), kScratchDoubleReg);
1697  frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1698  } else {
1699  __ movss(kScratchDoubleReg, i.InputOperand(0));
1700  __ sub(esp, Immediate(kFloatSize));
1701  __ movss(Operand(esp, 0), kScratchDoubleReg);
1702  frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1703  }
1704  break;
1705  case kIA32PushFloat64:
1706  if (instr->InputAt(0)->IsFPRegister()) {
1707  __ sub(esp, Immediate(kDoubleSize));
1708  __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1709  frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1710  } else if (HasImmediateInput(instr, 0)) {
1711  __ Move(kScratchDoubleReg, i.InputDouble(0));
1712  __ sub(esp, Immediate(kDoubleSize));
1713  __ movsd(Operand(esp, 0), kScratchDoubleReg);
1714  frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1715  } else {
1716  __ movsd(kScratchDoubleReg, i.InputOperand(0));
1717  __ sub(esp, Immediate(kDoubleSize));
1718  __ movsd(Operand(esp, 0), kScratchDoubleReg);
1719  frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1720  }
1721  break;
1722  case kIA32PushSimd128:
1723  if (instr->InputAt(0)->IsFPRegister()) {
1724  __ sub(esp, Immediate(kSimd128Size));
1725  __ movups(Operand(esp, 0), i.InputSimd128Register(0));
1726  } else {
1727  __ movups(kScratchDoubleReg, i.InputOperand(0));
1728  __ sub(esp, Immediate(kSimd128Size));
1729  __ movups(Operand(esp, 0), kScratchDoubleReg);
1730  }
1731  frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1732  break;
1733  case kIA32Push:
1734  if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
1735  size_t index = 0;
1736  Operand operand = i.MemoryOperand(&index);
1737  __ push(operand);
1738  frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1739  } else if (instr->InputAt(0)->IsFPRegister()) {
1740  __ sub(esp, Immediate(kFloatSize));
1741  __ movsd(Operand(esp, 0), i.InputDoubleRegister(0));
1742  frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1743  } else if (HasImmediateInput(instr, 0)) {
1744  __ push(i.InputImmediate(0));
1745  frame_access_state()->IncreaseSPDelta(1);
1746  } else {
1747  __ push(i.InputOperand(0));
1748  frame_access_state()->IncreaseSPDelta(1);
1749  }
1750  break;
1751  case kIA32Poke: {
1752  int slot = MiscField::decode(instr->opcode());
1753  if (HasImmediateInput(instr, 0)) {
1754  __ mov(Operand(esp, slot * kPointerSize), i.InputImmediate(0));
1755  } else {
1756  __ mov(Operand(esp, slot * kPointerSize), i.InputRegister(0));
1757  }
1758  break;
1759  }
1760  case kIA32Peek: {
1761  int reverse_slot = i.InputInt32(0) + 1;
1762  int offset =
1763  FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1764  if (instr->OutputAt(0)->IsFPRegister()) {
1765  LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1766  if (op->representation() == MachineRepresentation::kFloat64) {
1767  __ movsd(i.OutputDoubleRegister(), Operand(ebp, offset));
1768  } else {
1769  DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1770  __ movss(i.OutputFloatRegister(), Operand(ebp, offset));
1771  }
1772  } else {
1773  __ mov(i.OutputRegister(), Operand(ebp, offset));
1774  }
1775  break;
1776  }
1777  case kSSEF32x4Splat: {
1778  DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1779  XMMRegister dst = i.OutputSimd128Register();
1780  __ shufps(dst, dst, 0x0);
1781  break;
1782  }
1783  case kAVXF32x4Splat: {
1784  CpuFeatureScope avx_scope(tasm(), AVX);
1785  XMMRegister src = i.InputFloatRegister(0);
1786  __ vshufps(i.OutputSimd128Register(), src, src, 0x0);
1787  break;
1788  }
1789  case kSSEF32x4ExtractLane: {
1790  DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1791  XMMRegister dst = i.OutputFloatRegister();
1792  int8_t lane = i.InputInt8(1);
1793  if (lane != 0) {
1794  DCHECK_LT(lane, 4);
1795  __ shufps(dst, dst, lane);
1796  }
1797  break;
1798  }
1799  case kAVXF32x4ExtractLane: {
1800  CpuFeatureScope avx_scope(tasm(), AVX);
1801  XMMRegister dst = i.OutputFloatRegister();
1802  XMMRegister src = i.InputSimd128Register(0);
1803  int8_t lane = i.InputInt8(1);
1804  if (lane == 0) {
1805  if (dst != src) __ vmovaps(dst, src);
1806  } else {
1807  DCHECK_LT(lane, 4);
1808  __ vshufps(dst, src, src, lane);
1809  }
1810  break;
1811  }
1812  case kSSEF32x4ReplaceLane: {
1813  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1814  CpuFeatureScope sse_scope(tasm(), SSE4_1);
1815  __ insertps(i.OutputSimd128Register(), i.InputOperand(2),
1816  i.InputInt8(1) << 4);
1817  break;
1818  }
1819  case kAVXF32x4ReplaceLane: {
1820  CpuFeatureScope avx_scope(tasm(), AVX);
1821  __ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1822  i.InputOperand(2), i.InputInt8(1) << 4);
1823  break;
1824  }
1825  case kIA32F32x4SConvertI32x4: {
1826  __ Cvtdq2ps(i.OutputSimd128Register(), i.InputOperand(0));
1827  break;
1828  }
1829  case kSSEF32x4UConvertI32x4: {
1830  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1831  CpuFeatureScope sse_scope(tasm(), SSE4_1);
1832  XMMRegister dst = i.OutputSimd128Register();
1833  __ pxor(kScratchDoubleReg, kScratchDoubleReg); // zeros
1834  __ pblendw(kScratchDoubleReg, dst, 0x55); // get lo 16 bits
1835  __ psubd(dst, kScratchDoubleReg); // get hi 16 bits
1836  __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
1837  __ psrld(dst, 1); // divide by 2 to get in unsigned range
1838  __ cvtdq2ps(dst, dst); // convert hi exactly
1839  __ addps(dst, dst); // double hi, exactly
1840  __ addps(dst, kScratchDoubleReg); // add hi and lo, may round.
1841  break;
1842  }
1843  case kAVXF32x4UConvertI32x4: {
1844  CpuFeatureScope avx_scope(tasm(), AVX);
1845  XMMRegister dst = i.OutputSimd128Register();
1846  XMMRegister src = i.InputSimd128Register(0);
1847  __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
1848  kScratchDoubleReg); // zeros
1849  __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
1850  0x55); // get lo 16 bits
1851  __ vpsubd(dst, src, kScratchDoubleReg); // get hi 16 bits
1852  __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // convert lo exactly
1853  __ vpsrld(dst, dst, 1); // divide by 2 to get in unsigned range
1854  __ vcvtdq2ps(dst, dst); // convert hi exactly
1855  __ vaddps(dst, dst, dst); // double hi, exactly
1856  __ vaddps(dst, dst, kScratchDoubleReg); // add hi and lo, may round.
1857  break;
1858  }
1859  case kSSEF32x4Abs: {
1860  XMMRegister dst = i.OutputSimd128Register();
1861  Operand src = i.InputOperand(0);
1862  if (src.is_reg(dst)) {
1863  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1864  __ psrld(kScratchDoubleReg, 1);
1865  __ andps(dst, kScratchDoubleReg);
1866  } else {
1867  __ pcmpeqd(dst, dst);
1868  __ psrld(dst, 1);
1869  __ andps(dst, src);
1870  }
1871  break;
1872  }
1873  case kAVXF32x4Abs: {
1874  CpuFeatureScope avx_scope(tasm(), AVX);
1875  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1876  __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
1877  __ vandps(i.OutputSimd128Register(), kScratchDoubleReg,
1878  i.InputOperand(0));
1879  break;
1880  }
1881  case kSSEF32x4Neg: {
1882  XMMRegister dst = i.OutputSimd128Register();
1883  Operand src = i.InputOperand(0);
1884  if (src.is_reg(dst)) {
1885  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1886  __ pslld(kScratchDoubleReg, 31);
1887  __ xorps(dst, kScratchDoubleReg);
1888  } else {
1889  __ pcmpeqd(dst, dst);
1890  __ pslld(dst, 31);
1891  __ xorps(dst, src);
1892  }
1893  break;
1894  }
1895  case kAVXF32x4Neg: {
1896  CpuFeatureScope avx_scope(tasm(), AVX);
1897  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1898  __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
1899  __ vxorps(i.OutputSimd128Register(), kScratchDoubleReg,
1900  i.InputOperand(0));
1901  break;
1902  }
1903  case kIA32F32x4RecipApprox: {
1904  __ Rcpps(i.OutputSimd128Register(), i.InputOperand(0));
1905  break;
1906  }
1907  case kIA32F32x4RecipSqrtApprox: {
1908  __ Rsqrtps(i.OutputSimd128Register(), i.InputOperand(0));
1909  break;
1910  }
1911  case kSSEF32x4Add: {
1912  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1913  __ addps(i.OutputSimd128Register(), i.InputOperand(1));
1914  break;
1915  }
1916  case kAVXF32x4Add: {
1917  CpuFeatureScope avx_scope(tasm(), AVX);
1918  __ vaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1919  i.InputOperand(1));
1920  break;
1921  }
1922  case kSSEF32x4AddHoriz: {
1923  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1924  CpuFeatureScope sse_scope(tasm(), SSE3);
1925  __ haddps(i.OutputSimd128Register(), i.InputOperand(1));
1926  break;
1927  }
1928  case kAVXF32x4AddHoriz: {
1929  CpuFeatureScope avx_scope(tasm(), AVX);
1930  __ vhaddps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1931  i.InputOperand(1));
1932  break;
1933  }
1934  case kSSEF32x4Sub: {
1935  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1936  __ subps(i.OutputSimd128Register(), i.InputOperand(1));
1937  break;
1938  }
1939  case kAVXF32x4Sub: {
1940  CpuFeatureScope avx_scope(tasm(), AVX);
1941  __ vsubps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1942  i.InputOperand(1));
1943  break;
1944  }
1945  case kSSEF32x4Mul: {
1946  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1947  __ mulps(i.OutputSimd128Register(), i.InputOperand(1));
1948  break;
1949  }
1950  case kAVXF32x4Mul: {
1951  CpuFeatureScope avx_scope(tasm(), AVX);
1952  __ vmulps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1953  i.InputOperand(1));
1954  break;
1955  }
1956  case kSSEF32x4Min: {
1957  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1958  __ minps(i.OutputSimd128Register(), i.InputOperand(1));
1959  break;
1960  }
1961  case kAVXF32x4Min: {
1962  CpuFeatureScope avx_scope(tasm(), AVX);
1963  __ vminps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1964  i.InputOperand(1));
1965  break;
1966  }
1967  case kSSEF32x4Max: {
1968  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1969  __ maxps(i.OutputSimd128Register(), i.InputOperand(1));
1970  break;
1971  }
1972  case kAVXF32x4Max: {
1973  CpuFeatureScope avx_scope(tasm(), AVX);
1974  __ vmaxps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1975  i.InputOperand(1));
1976  break;
1977  }
1978  case kSSEF32x4Eq: {
1979  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1980  __ cmpeqps(i.OutputSimd128Register(), i.InputOperand(1));
1981  break;
1982  }
1983  case kAVXF32x4Eq: {
1984  CpuFeatureScope avx_scope(tasm(), AVX);
1985  __ vcmpeqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1986  i.InputOperand(1));
1987  break;
1988  }
1989  case kSSEF32x4Ne: {
1990  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
1991  __ cmpneqps(i.OutputSimd128Register(), i.InputOperand(1));
1992  break;
1993  }
1994  case kAVXF32x4Ne: {
1995  CpuFeatureScope avx_scope(tasm(), AVX);
1996  __ vcmpneqps(i.OutputSimd128Register(), i.InputSimd128Register(0),
1997  i.InputOperand(1));
1998  break;
1999  }
2000  case kSSEF32x4Lt: {
2001  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2002  __ cmpltps(i.OutputSimd128Register(), i.InputOperand(1));
2003  break;
2004  }
2005  case kAVXF32x4Lt: {
2006  CpuFeatureScope avx_scope(tasm(), AVX);
2007  __ vcmpltps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2008  i.InputOperand(1));
2009  break;
2010  }
2011  case kSSEF32x4Le: {
2012  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2013  __ cmpleps(i.OutputSimd128Register(), i.InputOperand(1));
2014  break;
2015  }
2016  case kAVXF32x4Le: {
2017  CpuFeatureScope avx_scope(tasm(), AVX);
2018  __ vcmpleps(i.OutputSimd128Register(), i.InputSimd128Register(0),
2019  i.InputOperand(1));
2020  break;
2021  }
2022  case kIA32I32x4Splat: {
2023  XMMRegister dst = i.OutputSimd128Register();
2024  __ Movd(dst, i.InputOperand(0));
2025  __ Pshufd(dst, dst, 0x0);
2026  break;
2027  }
2028  case kIA32I32x4ExtractLane: {
2029  __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
2030  break;
2031  }
2032  case kSSEI32x4ReplaceLane: {
2033  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2034  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2035  __ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2036  break;
2037  }
2038  case kAVXI32x4ReplaceLane: {
2039  CpuFeatureScope avx_scope(tasm(), AVX);
2040  __ vpinsrd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2041  i.InputOperand(2), i.InputInt8(1));
2042  break;
2043  }
2044  case kSSEI32x4SConvertF32x4: {
2045  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2046  XMMRegister dst = i.OutputSimd128Register();
2047  // NAN->0
2048  __ movaps(kScratchDoubleReg, dst);
2049  __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2050  __ pand(dst, kScratchDoubleReg);
2051  // Set top bit if >= 0 (but not -0.0!)
2052  __ pxor(kScratchDoubleReg, dst);
2053  // Convert
2054  __ cvttps2dq(dst, dst);
2055  // Set top bit if >=0 is now < 0
2056  __ pand(kScratchDoubleReg, dst);
2057  __ psrad(kScratchDoubleReg, 31);
2058  // Set positive overflow lanes to 0x7FFFFFFF
2059  __ pxor(dst, kScratchDoubleReg);
2060  break;
2061  }
2062  case kAVXI32x4SConvertF32x4: {
2063  CpuFeatureScope avx_scope(tasm(), AVX);
2064  XMMRegister dst = i.OutputSimd128Register();
2065  XMMRegister src = i.InputSimd128Register(0);
2066  // NAN->0
2067  __ vcmpeqps(kScratchDoubleReg, src, src);
2068  __ vpand(dst, src, kScratchDoubleReg);
2069  // Set top bit if >= 0 (but not -0.0!)
2070  __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
2071  // Convert
2072  __ vcvttps2dq(dst, dst);
2073  // Set top bit if >=0 is now < 0
2074  __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
2075  __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
2076  // Set positive overflow lanes to 0x7FFFFFFF
2077  __ vpxor(dst, dst, kScratchDoubleReg);
2078  break;
2079  }
2080  case kIA32I32x4SConvertI16x8Low: {
2081  __ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
2082  break;
2083  }
2084  case kIA32I32x4SConvertI16x8High: {
2085  XMMRegister dst = i.OutputSimd128Register();
2086  __ Palignr(dst, i.InputOperand(0), 8);
2087  __ Pmovsxwd(dst, dst);
2088  break;
2089  }
2090  case kIA32I32x4Neg: {
2091  XMMRegister dst = i.OutputSimd128Register();
2092  Operand src = i.InputOperand(0);
2093  if (src.is_reg(dst)) {
2094  __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2095  __ Psignd(dst, kScratchDoubleReg);
2096  } else {
2097  __ Pxor(dst, dst);
2098  __ Psubd(dst, src);
2099  }
2100  break;
2101  }
2102  case kSSEI32x4Shl: {
2103  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2104  __ pslld(i.OutputSimd128Register(), i.InputInt8(1));
2105  break;
2106  }
2107  case kAVXI32x4Shl: {
2108  CpuFeatureScope avx_scope(tasm(), AVX);
2109  __ vpslld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2110  i.InputInt8(1));
2111  break;
2112  }
2113  case kSSEI32x4ShrS: {
2114  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2115  __ psrad(i.OutputSimd128Register(), i.InputInt8(1));
2116  break;
2117  }
2118  case kAVXI32x4ShrS: {
2119  CpuFeatureScope avx_scope(tasm(), AVX);
2120  __ vpsrad(i.OutputSimd128Register(), i.InputSimd128Register(0),
2121  i.InputInt8(1));
2122  break;
2123  }
2124  case kSSEI32x4Add: {
2125  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2126  __ paddd(i.OutputSimd128Register(), i.InputOperand(1));
2127  break;
2128  }
2129  case kAVXI32x4Add: {
2130  CpuFeatureScope avx_scope(tasm(), AVX);
2131  __ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2132  i.InputOperand(1));
2133  break;
2134  }
2135  case kSSEI32x4AddHoriz: {
2136  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2137  CpuFeatureScope sse_scope(tasm(), SSSE3);
2138  __ phaddd(i.OutputSimd128Register(), i.InputOperand(1));
2139  break;
2140  }
2141  case kAVXI32x4AddHoriz: {
2142  CpuFeatureScope avx_scope(tasm(), AVX);
2143  __ vphaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2144  i.InputOperand(1));
2145  break;
2146  }
2147  case kSSEI32x4Sub: {
2148  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2149  __ psubd(i.OutputSimd128Register(), i.InputOperand(1));
2150  break;
2151  }
2152  case kAVXI32x4Sub: {
2153  CpuFeatureScope avx_scope(tasm(), AVX);
2154  __ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2155  i.InputOperand(1));
2156  break;
2157  }
2158  case kSSEI32x4Mul: {
2159  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2160  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2161  __ pmulld(i.OutputSimd128Register(), i.InputOperand(1));
2162  break;
2163  }
2164  case kAVXI32x4Mul: {
2165  CpuFeatureScope avx_scope(tasm(), AVX);
2166  __ vpmulld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2167  i.InputOperand(1));
2168  break;
2169  }
2170  case kSSEI32x4MinS: {
2171  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2172  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2173  __ pminsd(i.OutputSimd128Register(), i.InputOperand(1));
2174  break;
2175  }
2176  case kAVXI32x4MinS: {
2177  CpuFeatureScope avx_scope(tasm(), AVX);
2178  __ vpminsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2179  i.InputOperand(1));
2180  break;
2181  }
2182  case kSSEI32x4MaxS: {
2183  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2184  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2185  __ pmaxsd(i.OutputSimd128Register(), i.InputOperand(1));
2186  break;
2187  }
2188  case kAVXI32x4MaxS: {
2189  CpuFeatureScope avx_scope(tasm(), AVX);
2190  __ vpmaxsd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2191  i.InputOperand(1));
2192  break;
2193  }
2194  case kSSEI32x4Eq: {
2195  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2196  __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2197  break;
2198  }
2199  case kAVXI32x4Eq: {
2200  CpuFeatureScope avx_scope(tasm(), AVX);
2201  __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2202  i.InputOperand(1));
2203  break;
2204  }
2205  case kSSEI32x4Ne: {
2206  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2207  __ pcmpeqd(i.OutputSimd128Register(), i.InputOperand(1));
2208  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2209  __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2210  break;
2211  }
2212  case kAVXI32x4Ne: {
2213  CpuFeatureScope avx_scope(tasm(), AVX);
2214  __ vpcmpeqd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2215  i.InputOperand(1));
2216  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2217  __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2218  kScratchDoubleReg);
2219  break;
2220  }
2221  case kSSEI32x4GtS: {
2222  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2223  __ pcmpgtd(i.OutputSimd128Register(), i.InputOperand(1));
2224  break;
2225  }
2226  case kAVXI32x4GtS: {
2227  CpuFeatureScope avx_scope(tasm(), AVX);
2228  __ vpcmpgtd(i.OutputSimd128Register(), i.InputSimd128Register(0),
2229  i.InputOperand(1));
2230  break;
2231  }
2232  case kSSEI32x4GeS: {
2233  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2234  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2235  XMMRegister dst = i.OutputSimd128Register();
2236  Operand src = i.InputOperand(1);
2237  __ pminsd(dst, src);
2238  __ pcmpeqd(dst, src);
2239  break;
2240  }
2241  case kAVXI32x4GeS: {
2242  CpuFeatureScope avx_scope(tasm(), AVX);
2243  XMMRegister src1 = i.InputSimd128Register(0);
2244  Operand src2 = i.InputOperand(1);
2245  __ vpminsd(kScratchDoubleReg, src1, src2);
2246  __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2247  break;
2248  }
2249  case kSSEI32x4UConvertF32x4: {
2250  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2251  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2252  XMMRegister dst = i.OutputSimd128Register();
2253  XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2254  // NAN->0, negative->0
2255  __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2256  __ maxps(dst, kScratchDoubleReg);
2257  // scratch: float representation of max_signed
2258  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2259  __ psrld(kScratchDoubleReg, 1); // 0x7fffffff
2260  __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2261  // tmp: convert (src-max_signed).
2262  // Positive overflow lanes -> 0x7FFFFFFF
2263  // Negative lanes -> 0
2264  __ movaps(tmp, dst);
2265  __ subps(tmp, kScratchDoubleReg);
2266  __ cmpleps(kScratchDoubleReg, tmp);
2267  __ cvttps2dq(tmp, tmp);
2268  __ pxor(tmp, kScratchDoubleReg);
2269  __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2270  __ pmaxsd(tmp, kScratchDoubleReg);
2271  // convert. Overflow lanes above max_signed will be 0x80000000
2272  __ cvttps2dq(dst, dst);
2273  // Add (src-max_signed) for overflow lanes.
2274  __ paddd(dst, tmp);
2275  break;
2276  }
2277  case kAVXI32x4UConvertF32x4: {
2278  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2279  CpuFeatureScope avx_scope(tasm(), AVX);
2280  XMMRegister dst = i.OutputSimd128Register();
2281  XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2282  // NAN->0, negative->0
2283  __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2284  __ vmaxps(dst, dst, kScratchDoubleReg);
2285  // scratch: float representation of max_signed
2286  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2287  __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
2288  __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
2289  // tmp: convert (src-max_signed).
2290  // Positive overflow lanes -> 0x7FFFFFFF
2291  // Negative lanes -> 0
2292  __ vsubps(tmp, dst, kScratchDoubleReg);
2293  __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2294  __ vcvttps2dq(tmp, tmp);
2295  __ vpxor(tmp, tmp, kScratchDoubleReg);
2296  __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2297  __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2298  // convert. Overflow lanes above max_signed will be 0x80000000
2299  __ vcvttps2dq(dst, dst);
2300  // Add (src-max_signed) for overflow lanes.
2301  __ vpaddd(dst, dst, tmp);
2302  break;
2303  }
2304  case kIA32I32x4UConvertI16x8Low: {
2305  __ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
2306  break;
2307  }
2308  case kIA32I32x4UConvertI16x8High: {
2309  XMMRegister dst = i.OutputSimd128Register();
2310  __ Palignr(dst, i.InputOperand(0), 8);
2311  __ Pmovzxwd(dst, dst);
2312  break;
2313  }
2314  case kSSEI32x4ShrU: {
2315  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2316  __ psrld(i.OutputSimd128Register(), i.InputInt8(1));
2317  break;
2318  }
2319  case kAVXI32x4ShrU: {
2320  CpuFeatureScope avx_scope(tasm(), AVX);
2321  __ vpsrld(i.OutputSimd128Register(), i.InputSimd128Register(0),
2322  i.InputInt8(1));
2323  break;
2324  }
2325  case kSSEI32x4MinU: {
2326  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2327  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2328  __ pminud(i.OutputSimd128Register(), i.InputOperand(1));
2329  break;
2330  }
2331  case kAVXI32x4MinU: {
2332  CpuFeatureScope avx_scope(tasm(), AVX);
2333  __ vpminud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2334  i.InputOperand(1));
2335  break;
2336  }
2337  case kSSEI32x4MaxU: {
2338  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2339  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2340  __ pmaxud(i.OutputSimd128Register(), i.InputOperand(1));
2341  break;
2342  }
2343  case kAVXI32x4MaxU: {
2344  CpuFeatureScope avx_scope(tasm(), AVX);
2345  __ vpmaxud(i.OutputSimd128Register(), i.InputSimd128Register(0),
2346  i.InputOperand(1));
2347  break;
2348  }
2349  case kSSEI32x4GtU: {
2350  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2351  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2352  XMMRegister dst = i.OutputSimd128Register();
2353  Operand src = i.InputOperand(1);
2354  __ pmaxud(dst, src);
2355  __ pcmpeqd(dst, src);
2356  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2357  __ pxor(dst, kScratchDoubleReg);
2358  break;
2359  }
2360  case kAVXI32x4GtU: {
2361  CpuFeatureScope avx_scope(tasm(), AVX);
2362  XMMRegister dst = i.OutputSimd128Register();
2363  XMMRegister src1 = i.InputSimd128Register(0);
2364  Operand src2 = i.InputOperand(1);
2365  __ vpmaxud(kScratchDoubleReg, src1, src2);
2366  __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2367  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2368  __ vpxor(dst, dst, kScratchDoubleReg);
2369  break;
2370  }
2371  case kSSEI32x4GeU: {
2372  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2373  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2374  XMMRegister dst = i.OutputSimd128Register();
2375  Operand src = i.InputOperand(1);
2376  __ pminud(dst, src);
2377  __ pcmpeqd(dst, src);
2378  break;
2379  }
2380  case kAVXI32x4GeU: {
2381  CpuFeatureScope avx_scope(tasm(), AVX);
2382  XMMRegister src1 = i.InputSimd128Register(0);
2383  Operand src2 = i.InputOperand(1);
2384  __ vpminud(kScratchDoubleReg, src1, src2);
2385  __ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2386  break;
2387  }
2388  case kIA32I16x8Splat: {
2389  XMMRegister dst = i.OutputSimd128Register();
2390  __ Movd(dst, i.InputOperand(0));
2391  __ Pshuflw(dst, dst, 0x0);
2392  __ Pshufd(dst, dst, 0x0);
2393  break;
2394  }
2395  case kIA32I16x8ExtractLane: {
2396  Register dst = i.OutputRegister();
2397  __ Pextrw(dst, i.InputSimd128Register(0), i.InputInt8(1));
2398  __ movsx_w(dst, dst);
2399  break;
2400  }
2401  case kSSEI16x8ReplaceLane: {
2402  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2403  __ pinsrw(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2404  break;
2405  }
2406  case kAVXI16x8ReplaceLane: {
2407  CpuFeatureScope avx_scope(tasm(), AVX);
2408  __ vpinsrw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2409  i.InputOperand(2), i.InputInt8(1));
2410  break;
2411  }
2412  case kIA32I16x8SConvertI8x16Low: {
2413  __ Pmovsxbw(i.OutputSimd128Register(), i.InputOperand(0));
2414  break;
2415  }
2416  case kIA32I16x8SConvertI8x16High: {
2417  XMMRegister dst = i.OutputSimd128Register();
2418  __ Palignr(dst, i.InputOperand(0), 8);
2419  __ Pmovsxbw(dst, dst);
2420  break;
2421  }
2422  case kIA32I16x8Neg: {
2423  XMMRegister dst = i.OutputSimd128Register();
2424  Operand src = i.InputOperand(0);
2425  if (src.is_reg(dst)) {
2426  __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2427  __ Psignw(dst, kScratchDoubleReg);
2428  } else {
2429  __ Pxor(dst, dst);
2430  __ Psubw(dst, src);
2431  }
2432  break;
2433  }
2434  case kSSEI16x8Shl: {
2435  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2436  __ psllw(i.OutputSimd128Register(), i.InputInt8(1));
2437  break;
2438  }
2439  case kAVXI16x8Shl: {
2440  CpuFeatureScope avx_scope(tasm(), AVX);
2441  __ vpsllw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2442  i.InputInt8(1));
2443  break;
2444  }
2445  case kSSEI16x8ShrS: {
2446  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2447  __ psraw(i.OutputSimd128Register(), i.InputInt8(1));
2448  break;
2449  }
2450  case kAVXI16x8ShrS: {
2451  CpuFeatureScope avx_scope(tasm(), AVX);
2452  __ vpsraw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2453  i.InputInt8(1));
2454  break;
2455  }
2456  case kSSEI16x8SConvertI32x4: {
2457  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2458  __ packssdw(i.OutputSimd128Register(), i.InputOperand(1));
2459  break;
2460  }
2461  case kAVXI16x8SConvertI32x4: {
2462  CpuFeatureScope avx_scope(tasm(), AVX);
2463  __ vpackssdw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2464  i.InputOperand(1));
2465  break;
2466  }
2467  case kSSEI16x8Add: {
2468  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2469  __ paddw(i.OutputSimd128Register(), i.InputOperand(1));
2470  break;
2471  }
2472  case kAVXI16x8Add: {
2473  CpuFeatureScope avx_scope(tasm(), AVX);
2474  __ vpaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2475  i.InputOperand(1));
2476  break;
2477  }
2478  case kSSEI16x8AddSaturateS: {
2479  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2480  __ paddsw(i.OutputSimd128Register(), i.InputOperand(1));
2481  break;
2482  }
2483  case kAVXI16x8AddSaturateS: {
2484  CpuFeatureScope avx_scope(tasm(), AVX);
2485  __ vpaddsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2486  i.InputOperand(1));
2487  break;
2488  }
2489  case kSSEI16x8AddHoriz: {
2490  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2491  CpuFeatureScope sse_scope(tasm(), SSSE3);
2492  __ phaddw(i.OutputSimd128Register(), i.InputOperand(1));
2493  break;
2494  }
2495  case kAVXI16x8AddHoriz: {
2496  CpuFeatureScope avx_scope(tasm(), AVX);
2497  __ vphaddw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2498  i.InputOperand(1));
2499  break;
2500  }
2501  case kSSEI16x8Sub: {
2502  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2503  __ psubw(i.OutputSimd128Register(), i.InputOperand(1));
2504  break;
2505  }
2506  case kAVXI16x8Sub: {
2507  CpuFeatureScope avx_scope(tasm(), AVX);
2508  __ vpsubw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2509  i.InputOperand(1));
2510  break;
2511  }
2512  case kSSEI16x8SubSaturateS: {
2513  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2514  __ psubsw(i.OutputSimd128Register(), i.InputOperand(1));
2515  break;
2516  }
2517  case kAVXI16x8SubSaturateS: {
2518  CpuFeatureScope avx_scope(tasm(), AVX);
2519  __ vpsubsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2520  i.InputOperand(1));
2521  break;
2522  }
2523  case kSSEI16x8Mul: {
2524  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2525  __ pmullw(i.OutputSimd128Register(), i.InputOperand(1));
2526  break;
2527  }
2528  case kAVXI16x8Mul: {
2529  CpuFeatureScope avx_scope(tasm(), AVX);
2530  __ vpmullw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2531  i.InputOperand(1));
2532  break;
2533  }
2534  case kSSEI16x8MinS: {
2535  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2536  __ pminsw(i.OutputSimd128Register(), i.InputOperand(1));
2537  break;
2538  }
2539  case kAVXI16x8MinS: {
2540  CpuFeatureScope avx_scope(tasm(), AVX);
2541  __ vpminsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2542  i.InputOperand(1));
2543  break;
2544  }
2545  case kSSEI16x8MaxS: {
2546  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2547  __ pmaxsw(i.OutputSimd128Register(), i.InputOperand(1));
2548  break;
2549  }
2550  case kAVXI16x8MaxS: {
2551  CpuFeatureScope avx_scope(tasm(), AVX);
2552  __ vpmaxsw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2553  i.InputOperand(1));
2554  break;
2555  }
2556  case kSSEI16x8Eq: {
2557  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2558  __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2559  break;
2560  }
2561  case kAVXI16x8Eq: {
2562  CpuFeatureScope avx_scope(tasm(), AVX);
2563  __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2564  i.InputOperand(1));
2565  break;
2566  }
2567  case kSSEI16x8Ne: {
2568  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2569  __ pcmpeqw(i.OutputSimd128Register(), i.InputOperand(1));
2570  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2571  __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
2572  break;
2573  }
2574  case kAVXI16x8Ne: {
2575  CpuFeatureScope avx_scope(tasm(), AVX);
2576  __ vpcmpeqw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2577  i.InputOperand(1));
2578  __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2579  __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2580  kScratchDoubleReg);
2581  break;
2582  }
2583  case kSSEI16x8GtS: {
2584  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2585  __ pcmpgtw(i.OutputSimd128Register(), i.InputOperand(1));
2586  break;
2587  }
2588  case kAVXI16x8GtS: {
2589  CpuFeatureScope avx_scope(tasm(), AVX);
2590  __ vpcmpgtw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2591  i.InputOperand(1));
2592  break;
2593  }
2594  case kSSEI16x8GeS: {
2595  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2596  XMMRegister dst = i.OutputSimd128Register();
2597  Operand src = i.InputOperand(1);
2598  __ pminsw(dst, src);
2599  __ pcmpeqw(dst, src);
2600  break;
2601  }
2602  case kAVXI16x8GeS: {
2603  CpuFeatureScope avx_scope(tasm(), AVX);
2604  XMMRegister src1 = i.InputSimd128Register(0);
2605  Operand src2 = i.InputOperand(1);
2606  __ vpminsw(kScratchDoubleReg, src1, src2);
2607  __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2608  break;
2609  }
2610  case kIA32I16x8UConvertI8x16Low: {
2611  __ Pmovzxbw(i.OutputSimd128Register(), i.InputOperand(0));
2612  break;
2613  }
2614  case kIA32I16x8UConvertI8x16High: {
2615  XMMRegister dst = i.OutputSimd128Register();
2616  __ Palignr(dst, i.InputOperand(0), 8);
2617  __ Pmovzxbw(dst, dst);
2618  break;
2619  }
2620  case kSSEI16x8ShrU: {
2621  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2622  __ psrlw(i.OutputSimd128Register(), i.InputInt8(1));
2623  break;
2624  }
2625  case kAVXI16x8ShrU: {
2626  CpuFeatureScope avx_scope(tasm(), AVX);
2627  __ vpsrlw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2628  i.InputInt8(1));
2629  break;
2630  }
2631  case kSSEI16x8UConvertI32x4: {
2632  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2633  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2634  XMMRegister dst = i.OutputSimd128Register();
2635  // Change negative lanes to 0x7FFFFFFF
2636  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2637  __ psrld(kScratchDoubleReg, 1);
2638  __ pminud(dst, kScratchDoubleReg);
2639  __ pminud(kScratchDoubleReg, i.InputOperand(1));
2640  __ packusdw(dst, kScratchDoubleReg);
2641  break;
2642  }
2643  case kAVXI16x8UConvertI32x4: {
2644  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2645  CpuFeatureScope avx_scope(tasm(), AVX);
2646  XMMRegister dst = i.OutputSimd128Register();
2647  // Change negative lanes to 0x7FFFFFFF
2648  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2649  __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
2650  __ vpminud(dst, kScratchDoubleReg, i.InputSimd128Register(0));
2651  __ vpminud(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
2652  __ vpackusdw(dst, dst, kScratchDoubleReg);
2653  break;
2654  }
2655  case kSSEI16x8AddSaturateU: {
2656  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2657  __ paddusw(i.OutputSimd128Register(), i.InputOperand(1));
2658  break;
2659  }
2660  case kAVXI16x8AddSaturateU: {
2661  CpuFeatureScope avx_scope(tasm(), AVX);
2662  __ vpaddusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2663  i.InputOperand(1));
2664  break;
2665  }
2666  case kSSEI16x8SubSaturateU: {
2667  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2668  __ psubusw(i.OutputSimd128Register(), i.InputOperand(1));
2669  break;
2670  }
2671  case kAVXI16x8SubSaturateU: {
2672  CpuFeatureScope avx_scope(tasm(), AVX);
2673  __ vpsubusw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2674  i.InputOperand(1));
2675  break;
2676  }
2677  case kSSEI16x8MinU: {
2678  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2679  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2680  __ pminuw(i.OutputSimd128Register(), i.InputOperand(1));
2681  break;
2682  }
2683  case kAVXI16x8MinU: {
2684  CpuFeatureScope avx_scope(tasm(), AVX);
2685  __ vpminuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2686  i.InputOperand(1));
2687  break;
2688  }
2689  case kSSEI16x8MaxU: {
2690  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2691  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2692  __ pmaxuw(i.OutputSimd128Register(), i.InputOperand(1));
2693  break;
2694  }
2695  case kAVXI16x8MaxU: {
2696  CpuFeatureScope avx_scope(tasm(), AVX);
2697  __ vpmaxuw(i.OutputSimd128Register(), i.InputSimd128Register(0),
2698  i.InputOperand(1));
2699  break;
2700  }
2701  case kSSEI16x8GtU: {
2702  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2703  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2704  XMMRegister dst = i.OutputSimd128Register();
2705  Operand src = i.InputOperand(1);
2706  __ pmaxuw(dst, src);
2707  __ pcmpeqw(dst, src);
2708  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2709  __ pxor(dst, kScratchDoubleReg);
2710  break;
2711  }
2712  case kAVXI16x8GtU: {
2713  CpuFeatureScope avx_scope(tasm(), AVX);
2714  XMMRegister dst = i.OutputSimd128Register();
2715  XMMRegister src1 = i.InputSimd128Register(0);
2716  Operand src2 = i.InputOperand(1);
2717  __ vpmaxuw(kScratchDoubleReg, src1, src2);
2718  __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2719  __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2720  __ vpxor(dst, dst, kScratchDoubleReg);
2721  break;
2722  }
2723  case kSSEI16x8GeU: {
2724  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2725  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2726  XMMRegister dst = i.OutputSimd128Register();
2727  Operand src = i.InputOperand(1);
2728  __ pminuw(dst, src);
2729  __ pcmpeqw(dst, src);
2730  break;
2731  }
2732  case kAVXI16x8GeU: {
2733  CpuFeatureScope avx_scope(tasm(), AVX);
2734  XMMRegister src1 = i.InputSimd128Register(0);
2735  Operand src2 = i.InputOperand(1);
2736  __ vpminuw(kScratchDoubleReg, src1, src2);
2737  __ vpcmpeqw(i.OutputSimd128Register(), kScratchDoubleReg, src2);
2738  break;
2739  }
2740  case kIA32I8x16Splat: {
2741  XMMRegister dst = i.OutputSimd128Register();
2742  __ Movd(dst, i.InputOperand(0));
2743  __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
2744  __ Pshufb(dst, kScratchDoubleReg);
2745  break;
2746  }
2747  case kIA32I8x16ExtractLane: {
2748  Register dst = i.OutputRegister();
2749  __ Pextrb(dst, i.InputSimd128Register(0), i.InputInt8(1));
2750  __ movsx_b(dst, dst);
2751  break;
2752  }
2753  case kSSEI8x16ReplaceLane: {
2754  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2755  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2756  __ pinsrb(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
2757  break;
2758  }
2759  case kAVXI8x16ReplaceLane: {
2760  CpuFeatureScope avx_scope(tasm(), AVX);
2761  __ vpinsrb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2762  i.InputOperand(2), i.InputInt8(1));
2763  break;
2764  }
2765  case kSSEI8x16SConvertI16x8: {
2766  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2767  __ packsswb(i.OutputSimd128Register(), i.InputOperand(1));
2768  break;
2769  }
2770  case kAVXI8x16SConvertI16x8: {
2771  CpuFeatureScope avx_scope(tasm(), AVX);
2772  __ vpacksswb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2773  i.InputOperand(1));
2774  break;
2775  }
2776  case kIA32I8x16Neg: {
2777  XMMRegister dst = i.OutputSimd128Register();
2778  Operand src = i.InputOperand(0);
2779  if (src.is_reg(dst)) {
2780  __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2781  __ Psignb(dst, kScratchDoubleReg);
2782  } else {
2783  __ Pxor(dst, dst);
2784  __ Psubb(dst, src);
2785  }
2786  break;
2787  }
2788  case kSSEI8x16Shl: {
2789  XMMRegister dst = i.OutputSimd128Register();
2790  DCHECK_EQ(dst, i.InputSimd128Register(0));
2791  int8_t shift = i.InputInt8(1) & 0x7;
2792  if (shift < 4) {
2793  // For small shifts, doubling is faster.
2794  for (int i = 0; i < shift; ++i) {
2795  __ paddb(dst, dst);
2796  }
2797  } else {
2798  // Mask off the unwanted bits before word-shifting.
2799  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2800  __ psrlw(kScratchDoubleReg, 8 + shift);
2801  __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2802  __ pand(dst, kScratchDoubleReg);
2803  __ psllw(dst, shift);
2804  }
2805  break;
2806  }
2807  case kAVXI8x16Shl: {
2808  CpuFeatureScope avx_scope(tasm(), AVX);
2809  XMMRegister dst = i.OutputSimd128Register();
2810  XMMRegister src = i.InputSimd128Register(0);
2811  int8_t shift = i.InputInt8(1) & 0x7;
2812  if (shift < 4) {
2813  // For small shifts, doubling is faster.
2814  for (int i = 0; i < shift; ++i) {
2815  __ vpaddb(dst, src, src);
2816  src = dst;
2817  }
2818  } else {
2819  // Mask off the unwanted bits before word-shifting.
2820  __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2821  __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift);
2822  __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2823  __ vpand(dst, src, kScratchDoubleReg);
2824  __ vpsllw(dst, dst, shift);
2825  }
2826  break;
2827  }
2828  case kIA32I8x16ShrS: {
2829  XMMRegister dst = i.OutputSimd128Register();
2830  XMMRegister src = i.InputSimd128Register(0);
2831  int8_t shift = i.InputInt8(1) & 0x7;
2832  // Unpack the bytes into words, do arithmetic shifts, and repack.
2833  __ Punpckhbw(kScratchDoubleReg, src);
2834  __ Punpcklbw(dst, src);
2835  __ Psraw(kScratchDoubleReg, 8 + shift);
2836  __ Psraw(dst, 8 + shift);
2837  __ Packsswb(dst, kScratchDoubleReg);
2838  break;
2839  }
2840  case kSSEI8x16Add: {
2841  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2842  __ paddb(i.OutputSimd128Register(), i.InputOperand(1));
2843  break;
2844  }
2845  case kAVXI8x16Add: {
2846  CpuFeatureScope avx_scope(tasm(), AVX);
2847  __ vpaddb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2848  i.InputOperand(1));
2849  break;
2850  }
2851  case kSSEI8x16AddSaturateS: {
2852  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2853  __ paddsb(i.OutputSimd128Register(), i.InputOperand(1));
2854  break;
2855  }
2856  case kAVXI8x16AddSaturateS: {
2857  CpuFeatureScope avx_scope(tasm(), AVX);
2858  __ vpaddsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2859  i.InputOperand(1));
2860  break;
2861  }
2862  case kSSEI8x16Sub: {
2863  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2864  __ psubb(i.OutputSimd128Register(), i.InputOperand(1));
2865  break;
2866  }
2867  case kAVXI8x16Sub: {
2868  CpuFeatureScope avx_scope(tasm(), AVX);
2869  __ vpsubb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2870  i.InputOperand(1));
2871  break;
2872  }
2873  case kSSEI8x16SubSaturateS: {
2874  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2875  __ psubsb(i.OutputSimd128Register(), i.InputOperand(1));
2876  break;
2877  }
2878  case kAVXI8x16SubSaturateS: {
2879  CpuFeatureScope avx_scope(tasm(), AVX);
2880  __ vpsubsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2881  i.InputOperand(1));
2882  break;
2883  }
2884  case kSSEI8x16Mul: {
2885  XMMRegister dst = i.OutputSimd128Register();
2886  DCHECK_EQ(dst, i.InputSimd128Register(0));
2887  XMMRegister right = i.InputSimd128Register(1);
2888  XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2889 
2890  // I16x8 view of I8x16
2891  // left = AAaa AAaa ... AAaa AAaa
2892  // right= BBbb BBbb ... BBbb BBbb
2893 
2894  // t = 00AA 00AA ... 00AA 00AA
2895  // s = 00BB 00BB ... 00BB 00BB
2896  __ movaps(tmp, dst);
2897  __ movaps(kScratchDoubleReg, right);
2898  __ psrlw(tmp, 8);
2899  __ psrlw(kScratchDoubleReg, 8);
2900  // dst = left * 256
2901  __ psllw(dst, 8);
2902 
2903  // t = I16x8Mul(t, s)
2904  // => __PP __PP ... __PP __PP
2905  __ pmullw(tmp, kScratchDoubleReg);
2906  // dst = I16x8Mul(left * 256, right)
2907  // => pp__ pp__ ... pp__ pp__
2908  __ pmullw(dst, right);
2909 
2910  // t = I16x8Shl(t, 8)
2911  // => PP00 PP00 ... PP00 PP00
2912  __ psllw(tmp, 8);
2913 
2914  // dst = I16x8Shr(dst, 8)
2915  // => 00pp 00pp ... 00pp 00pp
2916  __ psrlw(dst, 8);
2917 
2918  // dst = I16x8Or(dst, t)
2919  // => PPpp PPpp ... PPpp PPpp
2920  __ por(dst, tmp);
2921  break;
2922  }
2923  case kAVXI8x16Mul: {
2924  CpuFeatureScope avx_scope(tasm(), AVX);
2925  XMMRegister dst = i.OutputSimd128Register();
2926  XMMRegister left = i.InputSimd128Register(0);
2927  XMMRegister right = i.InputSimd128Register(1);
2928  XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
2929 
2930  // I16x8 view of I8x16
2931  // left = AAaa AAaa ... AAaa AAaa
2932  // right= BBbb BBbb ... BBbb BBbb
2933 
2934  // t = 00AA 00AA ... 00AA 00AA
2935  // s = 00BB 00BB ... 00BB 00BB
2936  __ vpsrlw(tmp, left, 8);
2937  __ vpsrlw(kScratchDoubleReg, right, 8);
2938 
2939  // t = I16x8Mul(t0, t1)
2940  // => __PP __PP ... __PP __PP
2941  __ vpmullw(tmp, tmp, kScratchDoubleReg);
2942 
2943  // s = left * 256
2944  __ vpsllw(kScratchDoubleReg, left, 8);
2945 
2946  // dst = I16x8Mul(left * 256, right)
2947  // => pp__ pp__ ... pp__ pp__
2948  __ vpmullw(dst, kScratchDoubleReg, right);
2949 
2950  // dst = I16x8Shr(dst, 8)
2951  // => 00pp 00pp ... 00pp 00pp
2952  __ vpsrlw(dst, dst, 8);
2953 
2954  // t = I16x8Shl(t, 8)
2955  // => PP00 PP00 ... PP00 PP00
2956  __ vpsllw(tmp, tmp, 8);
2957 
2958  // dst = I16x8Or(dst, t)
2959  // => PPpp PPpp ... PPpp PPpp
2960  __ vpor(dst, dst, tmp);
2961  break;
2962  }
2963  case kSSEI8x16MinS: {
2964  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2965  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2966  __ pminsb(i.OutputSimd128Register(), i.InputOperand(1));
2967  break;
2968  }
2969  case kAVXI8x16MinS: {
2970  CpuFeatureScope avx_scope(tasm(), AVX);
2971  __ vpminsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2972  i.InputOperand(1));
2973  break;
2974  }
2975  case kSSEI8x16MaxS: {
2976  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2977  CpuFeatureScope sse_scope(tasm(), SSE4_1);
2978  __ pmaxsb(i.OutputSimd128Register(), i.InputOperand(1));
2979  break;
2980  }
2981  case kAVXI8x16MaxS: {
2982  CpuFeatureScope avx_scope(tasm(), AVX);
2983  __ vpmaxsb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2984  i.InputOperand(1));
2985  break;
2986  }
2987  case kSSEI8x16Eq: {
2988  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
2989  __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
2990  break;
2991  }
2992  case kAVXI8x16Eq: {
2993  CpuFeatureScope avx_scope(tasm(), AVX);
2994  __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
2995  i.InputOperand(1));
2996  break;
2997  }
2998  case kSSEI8x16Ne: {
2999  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3000  __ pcmpeqb(i.OutputSimd128Register(), i.InputOperand(1));
3001  __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3002  __ pxor(i.OutputSimd128Register(), kScratchDoubleReg);
3003  break;
3004  }
3005  case kAVXI8x16Ne: {
3006  CpuFeatureScope avx_scope(tasm(), AVX);
3007  __ vpcmpeqb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3008  i.InputOperand(1));
3009  __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3010  __ vpxor(i.OutputSimd128Register(), i.OutputSimd128Register(),
3011  kScratchDoubleReg);
3012  break;
3013  }
3014  case kSSEI8x16GtS: {
3015  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3016  __ pcmpgtb(i.OutputSimd128Register(), i.InputOperand(1));
3017  break;
3018  }
3019  case kAVXI8x16GtS: {
3020  CpuFeatureScope avx_scope(tasm(), AVX);
3021  __ vpcmpgtb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3022  i.InputOperand(1));
3023  break;
3024  }
3025  case kSSEI8x16GeS: {
3026  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3027  CpuFeatureScope sse_scope(tasm(), SSE4_1);
3028  XMMRegister dst = i.OutputSimd128Register();
3029  Operand src = i.InputOperand(1);
3030  __ pminsb(dst, src);
3031  __ pcmpeqb(dst, src);
3032  break;
3033  }
3034  case kAVXI8x16GeS: {
3035  CpuFeatureScope avx_scope(tasm(), AVX);
3036  XMMRegister src1 = i.InputSimd128Register(0);
3037  Operand src2 = i.InputOperand(1);
3038  __ vpminsb(kScratchDoubleReg, src1, src2);
3039  __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3040  break;
3041  }
3042  case kSSEI8x16UConvertI16x8: {
3043  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3044  CpuFeatureScope sse_scope(tasm(), SSE4_1);
3045  XMMRegister dst = i.OutputSimd128Register();
3046  // Change negative lanes to 0x7FFF
3047  __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3048  __ psrlw(kScratchDoubleReg, 1);
3049  __ pminuw(dst, kScratchDoubleReg);
3050  __ pminuw(kScratchDoubleReg, i.InputOperand(1));
3051  __ packuswb(dst, kScratchDoubleReg);
3052  break;
3053  }
3054  case kAVXI8x16UConvertI16x8: {
3055  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3056  CpuFeatureScope avx_scope(tasm(), AVX);
3057  XMMRegister dst = i.OutputSimd128Register();
3058  // Change negative lanes to 0x7FFF
3059  __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3060  __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
3061  __ vpminuw(dst, kScratchDoubleReg, i.InputSimd128Register(0));
3062  __ vpminuw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1));
3063  __ vpackuswb(dst, dst, kScratchDoubleReg);
3064  break;
3065  }
3066  case kSSEI8x16AddSaturateU: {
3067  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3068  __ paddusb(i.OutputSimd128Register(), i.InputOperand(1));
3069  break;
3070  }
3071  case kAVXI8x16AddSaturateU: {
3072  CpuFeatureScope avx_scope(tasm(), AVX);
3073  __ vpaddusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3074  i.InputOperand(1));
3075  break;
3076  }
3077  case kSSEI8x16SubSaturateU: {
3078  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3079  __ psubusb(i.OutputSimd128Register(), i.InputOperand(1));
3080  break;
3081  }
3082  case kAVXI8x16SubSaturateU: {
3083  CpuFeatureScope avx_scope(tasm(), AVX);
3084  __ vpsubusb(i.OutputSimd128Register(), i.InputSimd128Register(0),
3085  i.InputOperand(1));
3086  break;
3087  }
3088  case kIA32I8x16ShrU: {
3089  XMMRegister dst = i.OutputSimd128Register();
3090  XMMRegister src = i.InputSimd128Register(0);
3091  int8_t shift = i.InputInt8(1) & 0x7;
3092  // Unpack the bytes into words, do logical shifts, and repack.
3093  __ Punpckhbw(kScratchDoubleReg, src);
3094  __ Punpcklbw(dst, src);
3095  __ Psrlw(kScratchDoubleReg, 8 + shift);
3096  __ Psrlw(dst, 8 + shift);
3097  __ Packuswb(dst, kScratchDoubleReg);
3098  break;
3099  }
3100  case kSSEI8x16MinU: {
3101  XMMRegister dst = i.OutputSimd128Register();
3102  DCHECK_EQ(dst, i.InputSimd128Register(0));
3103  __ pminub(dst, i.InputOperand(1));
3104  break;
3105  }
3106  case kAVXI8x16MinU: {
3107  CpuFeatureScope avx_scope(tasm(), AVX);
3108  __ vpminub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3109  i.InputOperand(1));
3110  break;
3111  }
3112  case kSSEI8x16MaxU: {
3113  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3114  __ pmaxub(i.OutputSimd128Register(), i.InputOperand(1));
3115  break;
3116  }
3117  case kAVXI8x16MaxU: {
3118  CpuFeatureScope avx_scope(tasm(), AVX);
3119  __ vpmaxub(i.OutputSimd128Register(), i.InputSimd128Register(0),
3120  i.InputOperand(1));
3121  break;
3122  }
3123  case kSSEI8x16GtU: {
3124  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3125  XMMRegister dst = i.OutputSimd128Register();
3126  Operand src = i.InputOperand(1);
3127  __ pmaxub(dst, src);
3128  __ pcmpeqb(dst, src);
3129  __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3130  __ pxor(dst, kScratchDoubleReg);
3131  break;
3132  }
3133  case kAVXI8x16GtU: {
3134  CpuFeatureScope avx_scope(tasm(), AVX);
3135  XMMRegister dst = i.OutputSimd128Register();
3136  XMMRegister src1 = i.InputSimd128Register(0);
3137  Operand src2 = i.InputOperand(1);
3138  __ vpmaxub(kScratchDoubleReg, src1, src2);
3139  __ vpcmpeqb(dst, kScratchDoubleReg, src2);
3140  __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3141  __ vpxor(dst, dst, kScratchDoubleReg);
3142  break;
3143  }
3144  case kSSEI8x16GeU: {
3145  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3146  XMMRegister dst = i.OutputSimd128Register();
3147  Operand src = i.InputOperand(1);
3148  __ pminub(dst, src);
3149  __ pcmpeqb(dst, src);
3150  break;
3151  }
3152  case kAVXI8x16GeU: {
3153  CpuFeatureScope avx_scope(tasm(), AVX);
3154  XMMRegister src1 = i.InputSimd128Register(0);
3155  Operand src2 = i.InputOperand(1);
3156  __ vpminub(kScratchDoubleReg, src1, src2);
3157  __ vpcmpeqb(i.OutputSimd128Register(), kScratchDoubleReg, src2);
3158  break;
3159  }
3160  case kIA32S128Zero: {
3161  XMMRegister dst = i.OutputSimd128Register();
3162  __ Pxor(dst, dst);
3163  break;
3164  }
3165  case kSSES128Not: {
3166  XMMRegister dst = i.OutputSimd128Register();
3167  Operand src = i.InputOperand(0);
3168  if (src.is_reg(dst)) {
3169  __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3170  __ pxor(dst, kScratchDoubleReg);
3171  } else {
3172  __ pcmpeqd(dst, dst);
3173  __ pxor(dst, src);
3174  }
3175  break;
3176  }
3177  case kAVXS128Not: {
3178  CpuFeatureScope avx_scope(tasm(), AVX);
3179  __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3180  __ vpxor(i.OutputSimd128Register(), kScratchDoubleReg, i.InputOperand(0));
3181  break;
3182  }
3183  case kSSES128And: {
3184  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3185  __ pand(i.OutputSimd128Register(), i.InputOperand(1));
3186  break;
3187  }
3188  case kAVXS128And: {
3189  CpuFeatureScope avx_scope(tasm(), AVX);
3190  __ vpand(i.OutputSimd128Register(), i.InputSimd128Register(0),
3191  i.InputOperand(1));
3192  break;
3193  }
3194  case kSSES128Or: {
3195  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3196  __ por(i.OutputSimd128Register(), i.InputOperand(1));
3197  break;
3198  }
3199  case kAVXS128Or: {
3200  CpuFeatureScope avx_scope(tasm(), AVX);
3201  __ vpor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3202  i.InputOperand(1));
3203  break;
3204  }
3205  case kSSES128Xor: {
3206  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3207  __ pxor(i.OutputSimd128Register(), i.InputOperand(1));
3208  break;
3209  }
3210  case kAVXS128Xor: {
3211  CpuFeatureScope avx_scope(tasm(), AVX);
3212  __ vpxor(i.OutputSimd128Register(), i.InputSimd128Register(0),
3213  i.InputOperand(1));
3214  break;
3215  }
3216  case kSSES128Select: {
3217  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3218  // Mask used here is stored in dst.
3219  XMMRegister dst = i.OutputSimd128Register();
3220  __ movaps(kScratchDoubleReg, i.InputSimd128Register(1));
3221  __ xorps(kScratchDoubleReg, i.InputSimd128Register(2));
3222  __ andps(dst, kScratchDoubleReg);
3223  __ xorps(dst, i.InputSimd128Register(2));
3224  break;
3225  }
3226  case kAVXS128Select: {
3227  CpuFeatureScope avx_scope(tasm(), AVX);
3228  XMMRegister dst = i.OutputSimd128Register();
3229  __ vxorps(kScratchDoubleReg, i.InputSimd128Register(2),
3230  i.InputOperand(1));
3231  __ vandps(dst, kScratchDoubleReg, i.InputOperand(0));
3232  __ vxorps(dst, dst, i.InputSimd128Register(2));
3233  break;
3234  }
3235  case kIA32S8x16Shuffle: {
3236  XMMRegister dst = i.OutputSimd128Register();
3237  Operand src0 = i.InputOperand(0);
3238  Register tmp = i.TempRegister(0);
3239  // Prepare 16 byte aligned buffer for shuffle control mask
3240  __ mov(tmp, esp);
3241  __ and_(esp, -16);
3242  if (instr->InputCount() == 5) { // only one input operand
3243  DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
3244  for (int j = 4; j > 0; j--) {
3245  uint32_t mask = i.InputUint32(j);
3246  __ push(Immediate(mask));
3247  }
3248  __ Pshufb(dst, Operand(esp, 0));
3249  } else { // two input operands
3250  DCHECK_EQ(6, instr->InputCount());
3251  __ movups(kScratchDoubleReg, src0);
3252  for (int j = 5; j > 1; j--) {
3253  uint32_t lanes = i.InputUint32(j);
3254  uint32_t mask = 0;
3255  for (int k = 0; k < 32; k += 8) {
3256  uint8_t lane = lanes >> k;
3257  mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3258  }
3259  __ push(Immediate(mask));
3260  }
3261  __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3262  Operand src1 = i.InputOperand(1);
3263  if (!src1.is_reg(dst)) __ movups(dst, src1);
3264  for (int j = 5; j > 1; j--) {
3265  uint32_t lanes = i.InputUint32(j);
3266  uint32_t mask = 0;
3267  for (int k = 0; k < 32; k += 8) {
3268  uint8_t lane = lanes >> k;
3269  mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3270  }
3271  __ push(Immediate(mask));
3272  }
3273  __ Pshufb(dst, Operand(esp, 0));
3274  __ por(dst, kScratchDoubleReg);
3275  }
3276  __ mov(esp, tmp);
3277  break;
3278  }
3279  case kIA32S32x4Swizzle: {
3280  DCHECK_EQ(2, instr->InputCount());
3281  __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), i.InputInt8(1));
3282  break;
3283  }
3284  case kIA32S32x4Shuffle: {
3285  DCHECK_EQ(4, instr->InputCount()); // Swizzles should be handled above.
3286  int8_t shuffle = i.InputInt8(2);
3287  DCHECK_NE(0xe4, shuffle); // A simple blend should be handled below.
3288  __ Pshufd(kScratchDoubleReg, i.InputOperand(1), shuffle);
3289  __ Pshufd(i.OutputSimd128Register(), i.InputOperand(0), shuffle);
3290  __ Pblendw(i.OutputSimd128Register(), kScratchDoubleReg, i.InputInt8(3));
3291  break;
3292  }
3293  case kIA32S16x8Blend:
3294  ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1, i.InputInt8(2));
3295  break;
3296  case kIA32S16x8HalfShuffle1: {
3297  XMMRegister dst = i.OutputSimd128Register();
3298  __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(1));
3299  __ Pshufhw(dst, dst, i.InputInt8(2));
3300  break;
3301  }
3302  case kIA32S16x8HalfShuffle2: {
3303  XMMRegister dst = i.OutputSimd128Register();
3304  __ Pshuflw(kScratchDoubleReg, i.InputOperand(1), i.InputInt8(2));
3305  __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg, i.InputInt8(3));
3306  __ Pshuflw(dst, i.InputOperand(0), i.InputInt8(2));
3307  __ Pshufhw(dst, dst, i.InputInt8(3));
3308  __ Pblendw(dst, kScratchDoubleReg, i.InputInt8(4));
3309  break;
3310  }
3311  case kIA32S8x16Alignr:
3312  ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3, i.InputInt8(2));
3313  break;
3314  case kIA32S16x8Dup: {
3315  XMMRegister dst = i.OutputSimd128Register();
3316  Operand src = i.InputOperand(0);
3317  int8_t lane = i.InputInt8(1) & 0x7;
3318  int8_t lane4 = lane & 0x3;
3319  int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3320  if (lane < 4) {
3321  __ Pshuflw(dst, src, half_dup);
3322  __ Pshufd(dst, dst, 0);
3323  } else {
3324  __ Pshufhw(dst, src, half_dup);
3325  __ Pshufd(dst, dst, 0xaa);
3326  }
3327  break;
3328  }
3329  case kIA32S8x16Dup: {
3330  XMMRegister dst = i.OutputSimd128Register();
3331  XMMRegister src = i.InputSimd128Register(0);
3332  int8_t lane = i.InputInt8(1) & 0xf;
3333  if (CpuFeatures::IsSupported(AVX)) {
3334  CpuFeatureScope avx_scope(tasm(), AVX);
3335  if (lane < 8) {
3336  __ vpunpcklbw(dst, src, src);
3337  } else {
3338  __ vpunpckhbw(dst, src, src);
3339  }
3340  } else {
3341  DCHECK_EQ(dst, src);
3342  if (lane < 8) {
3343  __ punpcklbw(dst, dst);
3344  } else {
3345  __ punpckhbw(dst, dst);
3346  }
3347  }
3348  lane &= 0x7;
3349  int8_t lane4 = lane & 0x3;
3350  int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3351  if (lane < 4) {
3352  __ Pshuflw(dst, dst, half_dup);
3353  __ Pshufd(dst, dst, 0);
3354  } else {
3355  __ Pshufhw(dst, dst, half_dup);
3356  __ Pshufd(dst, dst, 0xaa);
3357  }
3358  break;
3359  }
3360  case kIA32S64x2UnpackHigh:
3361  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3362  break;
3363  case kIA32S32x4UnpackHigh:
3364  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3365  break;
3366  case kIA32S16x8UnpackHigh:
3367  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3368  break;
3369  case kIA32S8x16UnpackHigh:
3370  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3371  break;
3372  case kIA32S64x2UnpackLow:
3373  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3374  break;
3375  case kIA32S32x4UnpackLow:
3376  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3377  break;
3378  case kIA32S16x8UnpackLow:
3379  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3380  break;
3381  case kIA32S8x16UnpackLow:
3382  ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3383  break;
3384  case kSSES16x8UnzipHigh: {
3385  CpuFeatureScope sse_scope(tasm(), SSE4_1);
3386  XMMRegister dst = i.OutputSimd128Register();
3387  XMMRegister src2 = dst;
3388  DCHECK_EQ(dst, i.InputSimd128Register(0));
3389  if (instr->InputCount() == 2) {
3390  __ movups(kScratchDoubleReg, i.InputOperand(1));
3391  __ psrld(kScratchDoubleReg, 16);
3392  src2 = kScratchDoubleReg;
3393  }
3394  __ psrld(dst, 16);
3395  __ packusdw(dst, src2);
3396  break;
3397  }
3398  case kAVXS16x8UnzipHigh: {
3399  CpuFeatureScope avx_scope(tasm(), AVX);
3400  XMMRegister dst = i.OutputSimd128Register();
3401  XMMRegister src2 = dst;
3402  if (instr->InputCount() == 2) {
3403  __ vpsrld(kScratchDoubleReg, i.InputSimd128Register(1), 16);
3404  src2 = kScratchDoubleReg;
3405  }
3406  __ vpsrld(dst, i.InputSimd128Register(0), 16);
3407  __ vpackusdw(dst, dst, src2);
3408  break;
3409  }
3410  case kSSES16x8UnzipLow: {
3411  CpuFeatureScope sse_scope(tasm(), SSE4_1);
3412  XMMRegister dst = i.OutputSimd128Register();
3413  XMMRegister src2 = dst;
3414  DCHECK_EQ(dst, i.InputSimd128Register(0));
3415  __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3416  if (instr->InputCount() == 2) {
3417  __ pblendw(kScratchDoubleReg, i.InputOperand(1), 0x55);
3418  src2 = kScratchDoubleReg;
3419  }
3420  __ pblendw(dst, kScratchDoubleReg, 0xaa);
3421  __ packusdw(dst, src2);
3422  break;
3423  }
3424  case kAVXS16x8UnzipLow: {
3425  CpuFeatureScope avx_scope(tasm(), AVX);
3426  XMMRegister dst = i.OutputSimd128Register();
3427  XMMRegister src2 = dst;
3428  __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3429  if (instr->InputCount() == 2) {
3430  __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, i.InputOperand(1),
3431  0x55);
3432  src2 = kScratchDoubleReg;
3433  }
3434  __ vpblendw(dst, kScratchDoubleReg, i.InputSimd128Register(0), 0x55);
3435  __ vpackusdw(dst, dst, src2);
3436  break;
3437  }
3438  case kSSES8x16UnzipHigh: {
3439  XMMRegister dst = i.OutputSimd128Register();
3440  XMMRegister src2 = dst;
3441  DCHECK_EQ(dst, i.InputSimd128Register(0));
3442  if (instr->InputCount() == 2) {
3443  __ movups(kScratchDoubleReg, i.InputOperand(1));
3444  __ psrlw(kScratchDoubleReg, 8);
3445  src2 = kScratchDoubleReg;
3446  }
3447  __ psrlw(dst, 8);
3448  __ packuswb(dst, src2);
3449  break;
3450  }
3451  case kAVXS8x16UnzipHigh: {
3452  CpuFeatureScope avx_scope(tasm(), AVX);
3453  XMMRegister dst = i.OutputSimd128Register();
3454  XMMRegister src2 = dst;
3455  if (instr->InputCount() == 2) {
3456  __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3457  src2 = kScratchDoubleReg;
3458  }
3459  __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3460  __ vpackuswb(dst, dst, src2);
3461  break;
3462  }
3463  case kSSES8x16UnzipLow: {
3464  XMMRegister dst = i.OutputSimd128Register();
3465  XMMRegister src2 = dst;
3466  DCHECK_EQ(dst, i.InputSimd128Register(0));
3467  if (instr->InputCount() == 2) {
3468  __ movups(kScratchDoubleReg, i.InputOperand(1));
3469  __ psllw(kScratchDoubleReg, 8);
3470  __ psrlw(kScratchDoubleReg, 8);
3471  src2 = kScratchDoubleReg;
3472  }
3473  __ psllw(dst, 8);
3474  __ psrlw(dst, 8);
3475  __ packuswb(dst, src2);
3476  break;
3477  }
3478  case kAVXS8x16UnzipLow: {
3479  CpuFeatureScope avx_scope(tasm(), AVX);
3480  XMMRegister dst = i.OutputSimd128Register();
3481  XMMRegister src2 = dst;
3482  if (instr->InputCount() == 2) {
3483  __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3484  __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
3485  src2 = kScratchDoubleReg;
3486  }
3487  __ vpsllw(dst, i.InputSimd128Register(0), 8);
3488  __ vpsrlw(dst, dst, 8);
3489  __ vpackuswb(dst, dst, src2);
3490  break;
3491  }
3492  case kSSES8x16TransposeLow: {
3493  XMMRegister dst = i.OutputSimd128Register();
3494  DCHECK_EQ(dst, i.InputSimd128Register(0));
3495  __ psllw(dst, 8);
3496  if (instr->InputCount() == 1) {
3497  __ movups(kScratchDoubleReg, dst);
3498  } else {
3499  DCHECK_EQ(2, instr->InputCount());
3500  __ movups(kScratchDoubleReg, i.InputOperand(1));
3501  __ psllw(kScratchDoubleReg, 8);
3502  }
3503  __ psrlw(dst, 8);
3504  __ por(dst, kScratchDoubleReg);
3505  break;
3506  }
3507  case kAVXS8x16TransposeLow: {
3508  CpuFeatureScope avx_scope(tasm(), AVX);
3509  XMMRegister dst = i.OutputSimd128Register();
3510  if (instr->InputCount() == 1) {
3511  __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(0), 8);
3512  __ vpsrlw(dst, kScratchDoubleReg, 8);
3513  } else {
3514  DCHECK_EQ(2, instr->InputCount());
3515  __ vpsllw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3516  __ vpsllw(dst, i.InputSimd128Register(0), 8);
3517  __ vpsrlw(dst, dst, 8);
3518  }
3519  __ vpor(dst, dst, kScratchDoubleReg);
3520  break;
3521  }
3522  case kSSES8x16TransposeHigh: {
3523  XMMRegister dst = i.OutputSimd128Register();
3524  DCHECK_EQ(dst, i.InputSimd128Register(0));
3525  __ psrlw(dst, 8);
3526  if (instr->InputCount() == 1) {
3527  __ movups(kScratchDoubleReg, dst);
3528  } else {
3529  DCHECK_EQ(2, instr->InputCount());
3530  __ movups(kScratchDoubleReg, i.InputOperand(1));
3531  __ psrlw(kScratchDoubleReg, 8);
3532  }
3533  __ psllw(kScratchDoubleReg, 8);
3534  __ por(dst, kScratchDoubleReg);
3535  break;
3536  }
3537  case kAVXS8x16TransposeHigh: {
3538  CpuFeatureScope avx_scope(tasm(), AVX);
3539  XMMRegister dst = i.OutputSimd128Register();
3540  if (instr->InputCount() == 1) {
3541  __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3542  __ vpsllw(kScratchDoubleReg, dst, 8);
3543  } else {
3544  DCHECK_EQ(2, instr->InputCount());
3545  __ vpsrlw(kScratchDoubleReg, i.InputSimd128Register(1), 8);
3546  __ vpsrlw(dst, i.InputSimd128Register(0), 8);
3547  __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
3548  }
3549  __ vpor(dst, dst, kScratchDoubleReg);
3550  break;
3551  }
3552  case kSSES8x8Reverse:
3553  case kSSES8x4Reverse:
3554  case kSSES8x2Reverse: {
3555  DCHECK_EQ(1, instr->InputCount());
3556  XMMRegister dst = i.OutputSimd128Register();
3557  DCHECK_EQ(dst, i.InputSimd128Register(0));
3558  if (arch_opcode != kSSES8x2Reverse) {
3559  // First shuffle words into position.
3560  int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3561  __ pshuflw(dst, dst, shuffle_mask);
3562  __ pshufhw(dst, dst, shuffle_mask);
3563  }
3564  __ movaps(kScratchDoubleReg, dst);
3565  __ psrlw(kScratchDoubleReg, 8);
3566  __ psllw(dst, 8);
3567  __ por(dst, kScratchDoubleReg);
3568  break;
3569  }
3570  case kAVXS8x2Reverse:
3571  case kAVXS8x4Reverse:
3572  case kAVXS8x8Reverse: {
3573  DCHECK_EQ(1, instr->InputCount());
3574  CpuFeatureScope avx_scope(tasm(), AVX);
3575  XMMRegister dst = i.OutputSimd128Register();
3576  XMMRegister src = dst;
3577  if (arch_opcode != kAVXS8x2Reverse) {
3578  // First shuffle words into position.
3579  int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3580  __ vpshuflw(dst, i.InputOperand(0), shuffle_mask);
3581  __ vpshufhw(dst, dst, shuffle_mask);
3582  } else {
3583  src = i.InputSimd128Register(0);
3584  }
3585  // Reverse each 16 bit lane.
3586  __ vpsrlw(kScratchDoubleReg, src, 8);
3587  __ vpsllw(dst, src, 8);
3588  __ vpor(dst, dst, kScratchDoubleReg);
3589  break;
3590  }
3591  case kIA32S1x4AnyTrue:
3592  case kIA32S1x8AnyTrue:
3593  case kIA32S1x16AnyTrue: {
3594  Register dst = i.OutputRegister();
3595  XMMRegister src = i.InputSimd128Register(0);
3596  Register tmp = i.TempRegister(0);
3597  __ xor_(tmp, tmp);
3598  __ mov(dst, Immediate(-1));
3599  __ Ptest(src, src);
3600  __ cmov(zero, dst, tmp);
3601  break;
3602  }
3603  case kIA32S1x4AllTrue:
3604  case kIA32S1x8AllTrue:
3605  case kIA32S1x16AllTrue: {
3606  Register dst = i.OutputRegister();
3607  Operand src = i.InputOperand(0);
3608  Register tmp = i.TempRegister(0);
3609  __ mov(tmp, Immediate(-1));
3610  __ xor_(dst, dst);
3611  // Compare all src lanes to false.
3612  __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3613  if (arch_opcode == kIA32S1x4AllTrue) {
3614  __ Pcmpeqd(kScratchDoubleReg, src);
3615  } else if (arch_opcode == kIA32S1x8AllTrue) {
3616  __ Pcmpeqw(kScratchDoubleReg, src);
3617  } else {
3618  __ Pcmpeqb(kScratchDoubleReg, src);
3619  }
3620  // If kScratchDoubleReg is all zero, none of src lanes are false.
3621  __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
3622  __ cmov(zero, dst, tmp);
3623  break;
3624  }
3625  case kIA32StackCheck: {
3626  __ CompareStackLimit(esp);
3627  break;
3628  }
3629  case kIA32Word32AtomicPairLoad: {
3630  XMMRegister tmp = i.ToDoubleRegister(instr->TempAt(0));
3631  __ movq(tmp, i.MemoryOperand());
3632  if (instr->OutputCount() == 2) {
3633  __ Pextrd(i.OutputRegister(0), tmp, 0);
3634  __ Pextrd(i.OutputRegister(1), tmp, 1);
3635  } else if (instr->OutputCount() == 1) {
3636  __ Pextrd(i.OutputRegister(0), tmp, 0);
3637  __ Pextrd(i.TempRegister(1), tmp, 1);
3638  }
3639  break;
3640  }
3641  case kIA32Word32AtomicPairStore: {
3642  Label store;
3643  __ bind(&store);
3644  __ mov(i.TempRegister(0), i.MemoryOperand(2));
3645  __ mov(i.TempRegister(1), i.NextMemoryOperand(2));
3646  __ push(ebx);
3647  frame_access_state()->IncreaseSPDelta(1);
3648  i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3649  __ lock();
3650  __ cmpxchg8b(i.MemoryOperand(2));
3651  __ pop(ebx);
3652  frame_access_state()->IncreaseSPDelta(-1);
3653  __ j(not_equal, &store);
3654  break;
3655  }
3656  case kWord32AtomicExchangeInt8: {
3657  __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3658  __ movsx_b(i.InputRegister(0), i.InputRegister(0));
3659  break;
3660  }
3661  case kWord32AtomicExchangeUint8: {
3662  __ xchg_b(i.InputRegister(0), i.MemoryOperand(1));
3663  __ movzx_b(i.InputRegister(0), i.InputRegister(0));
3664  break;
3665  }
3666  case kWord32AtomicExchangeInt16: {
3667  __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3668  __ movsx_w(i.InputRegister(0), i.InputRegister(0));
3669  break;
3670  }
3671  case kWord32AtomicExchangeUint16: {
3672  __ xchg_w(i.InputRegister(0), i.MemoryOperand(1));
3673  __ movzx_w(i.InputRegister(0), i.InputRegister(0));
3674  break;
3675  }
3676  case kWord32AtomicExchangeWord32: {
3677  __ xchg(i.InputRegister(0), i.MemoryOperand(1));
3678  break;
3679  }
3680  case kIA32Word32AtomicPairExchange: {
3681  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3682  Label exchange;
3683  __ bind(&exchange);
3684  __ mov(eax, i.MemoryOperand(2));
3685  __ mov(edx, i.NextMemoryOperand(2));
3686  __ push(ebx);
3687  frame_access_state()->IncreaseSPDelta(1);
3688  i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3689  __ lock();
3690  __ cmpxchg8b(i.MemoryOperand(2));
3691  __ pop(ebx);
3692  frame_access_state()->IncreaseSPDelta(-1);
3693  __ j(not_equal, &exchange);
3694  break;
3695  }
3696  case kWord32AtomicCompareExchangeInt8: {
3697  __ lock();
3698  __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3699  __ movsx_b(eax, eax);
3700  break;
3701  }
3702  case kWord32AtomicCompareExchangeUint8: {
3703  __ lock();
3704  __ cmpxchg_b(i.MemoryOperand(2), i.InputRegister(1));
3705  __ movzx_b(eax, eax);
3706  break;
3707  }
3708  case kWord32AtomicCompareExchangeInt16: {
3709  __ lock();
3710  __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3711  __ movsx_w(eax, eax);
3712  break;
3713  }
3714  case kWord32AtomicCompareExchangeUint16: {
3715  __ lock();
3716  __ cmpxchg_w(i.MemoryOperand(2), i.InputRegister(1));
3717  __ movzx_w(eax, eax);
3718  break;
3719  }
3720  case kWord32AtomicCompareExchangeWord32: {
3721  __ lock();
3722  __ cmpxchg(i.MemoryOperand(2), i.InputRegister(1));
3723  break;
3724  }
3725  case kIA32Word32AtomicPairCompareExchange: {
3726  __ push(ebx);
3727  frame_access_state()->IncreaseSPDelta(1);
3728  i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
3729  __ lock();
3730  __ cmpxchg8b(i.MemoryOperand(4));
3731  __ pop(ebx);
3732  frame_access_state()->IncreaseSPDelta(-1);
3733  break;
3734  }
3735 #define ATOMIC_BINOP_CASE(op, inst) \
3736  case kWord32Atomic##op##Int8: { \
3737  ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3738  __ movsx_b(eax, eax); \
3739  break; \
3740  } \
3741  case kWord32Atomic##op##Uint8: { \
3742  ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \
3743  __ movzx_b(eax, eax); \
3744  break; \
3745  } \
3746  case kWord32Atomic##op##Int16: { \
3747  ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3748  __ movsx_w(eax, eax); \
3749  break; \
3750  } \
3751  case kWord32Atomic##op##Uint16: { \
3752  ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \
3753  __ movzx_w(eax, eax); \
3754  break; \
3755  } \
3756  case kWord32Atomic##op##Word32: { \
3757  ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \
3758  break; \
3759  }
3760  ATOMIC_BINOP_CASE(Add, add)
3761  ATOMIC_BINOP_CASE(Sub, sub)
3762  ATOMIC_BINOP_CASE(And, and_)
3763  ATOMIC_BINOP_CASE(Or, or_)
3764  ATOMIC_BINOP_CASE(Xor, xor_)
3765 #undef ATOMIC_BINOP_CASE
3766 #define ATOMIC_BINOP_CASE(op, instr1, instr2) \
3767  case kIA32Word32AtomicPair##op: { \
3768  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \
3769  ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \
3770  break; \
3771  }
3772  ATOMIC_BINOP_CASE(Add, add, adc)
3773  ATOMIC_BINOP_CASE(And, and_, and_)
3774  ATOMIC_BINOP_CASE(Or, or_, or_)
3775  ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3776 #undef ATOMIC_BINOP_CASE
3777  case kIA32Word32AtomicPairSub: {
3778  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr));
3779  Label binop;
3780  __ bind(&binop);
3781  // Move memory operand into edx:eax
3782  __ mov(eax, i.MemoryOperand(2));
3783  __ mov(edx, i.NextMemoryOperand(2));
3784  // Save input registers temporarily on the stack.
3785  __ push(ebx);
3786  frame_access_state()->IncreaseSPDelta(1);
3787  i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3788  __ push(i.InputRegister(1));
3789  // Negate input in place
3790  __ neg(ebx);
3791  __ adc(i.InputRegister(1), 0);
3792  __ neg(i.InputRegister(1));
3793  // Add memory operand, negated input.
3794  __ add(ebx, eax);
3795  __ adc(i.InputRegister(1), edx);
3796  __ lock();
3797  __ cmpxchg8b(i.MemoryOperand(2));
3798  // Restore input registers
3799  __ pop(i.InputRegister(1));
3800  __ pop(ebx);
3801  frame_access_state()->IncreaseSPDelta(-1);
3802  __ j(not_equal, &binop);
3803  break;
3804  }
3805  case kWord32AtomicLoadInt8:
3806  case kWord32AtomicLoadUint8:
3807  case kWord32AtomicLoadInt16:
3808  case kWord32AtomicLoadUint16:
3809  case kWord32AtomicLoadWord32:
3810  case kWord32AtomicStoreWord8:
3811  case kWord32AtomicStoreWord16:
3812  case kWord32AtomicStoreWord32:
3813  UNREACHABLE(); // Won't be generated by instruction selector.
3814  break;
3815  }
3816  return kSuccess;
3817 } // NOLINT(readability/fn_size)
3818 
3819 static Condition FlagsConditionToCondition(FlagsCondition condition) {
3820  switch (condition) {
3821  case kUnorderedEqual:
3822  case kEqual:
3823  return equal;
3824  break;
3825  case kUnorderedNotEqual:
3826  case kNotEqual:
3827  return not_equal;
3828  break;
3829  case kSignedLessThan:
3830  return less;
3831  break;
3832  case kSignedGreaterThanOrEqual:
3833  return greater_equal;
3834  break;
3835  case kSignedLessThanOrEqual:
3836  return less_equal;
3837  break;
3838  case kSignedGreaterThan:
3839  return greater;
3840  break;
3841  case kUnsignedLessThan:
3842  return below;
3843  break;
3844  case kUnsignedGreaterThanOrEqual:
3845  return above_equal;
3846  break;
3847  case kUnsignedLessThanOrEqual:
3848  return below_equal;
3849  break;
3850  case kUnsignedGreaterThan:
3851  return above;
3852  break;
3853  case kOverflow:
3854  return overflow;
3855  break;
3856  case kNotOverflow:
3857  return no_overflow;
3858  break;
3859  default:
3860  UNREACHABLE();
3861  break;
3862  }
3863 }
3864 
3865 // Assembles a branch after an instruction.
3866 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3867  Label::Distance flabel_distance =
3868  branch->fallthru ? Label::kNear : Label::kFar;
3869  Label* tlabel = branch->true_label;
3870  Label* flabel = branch->false_label;
3871  if (branch->condition == kUnorderedEqual) {
3872  __ j(parity_even, flabel, flabel_distance);
3873  } else if (branch->condition == kUnorderedNotEqual) {
3874  __ j(parity_even, tlabel);
3875  }
3876  __ j(FlagsConditionToCondition(branch->condition), tlabel);
3877 
3878  // Add a jump if not falling through to the next block.
3879  if (!branch->fallthru) __ jmp(flabel);
3880 }
3881 
3882 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3883  Instruction* instr) {
3884  // TODO(860429): Remove remaining poisoning infrastructure on ia32.
3885  UNREACHABLE();
3886 }
3887 
3888 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3889  BranchInfo* branch) {
3890  AssembleArchBranch(instr, branch);
3891 }
3892 
3893 void CodeGenerator::AssembleArchJump(RpoNumber target) {
3894  if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3895 }
3896 
3897 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3898  FlagsCondition condition) {
3899  class OutOfLineTrap final : public OutOfLineCode {
3900  public:
3901  OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3902  : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3903 
3904  void Generate() final {
3905  IA32OperandConverter i(gen_, instr_);
3906  TrapId trap_id =
3907  static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
3908  GenerateCallToTrap(trap_id);
3909  }
3910 
3911  private:
3912  void GenerateCallToTrap(TrapId trap_id) {
3913  if (trap_id == TrapId::kInvalid) {
3914  // We cannot test calls to the runtime in cctest/test-run-wasm.
3915  // Therefore we emit a call to C here instead of a call to the runtime.
3916  __ PrepareCallCFunction(0, esi);
3917  __ CallCFunction(
3918  ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3919  __ LeaveFrame(StackFrame::WASM_COMPILED);
3920  auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3921  size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
3922  // Use ecx as a scratch register, we return anyways immediately.
3923  __ Ret(static_cast<int>(pop_size), ecx);
3924  } else {
3925  gen_->AssembleSourcePosition(instr_);
3926  // A direct call to a wasm runtime stub defined in this module.
3927  // Just encode the stub index. This will be patched when the code
3928  // is added to the native module and copied into wasm code space.
3929  __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3930  ReferenceMap* reference_map =
3931  new (gen_->zone()) ReferenceMap(gen_->zone());
3932  gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3933  Safepoint::kNoLazyDeopt);
3934  __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3935  }
3936  }
3937 
3938  Instruction* instr_;
3939  CodeGenerator* gen_;
3940  };
3941  auto ool = new (zone()) OutOfLineTrap(this, instr);
3942  Label* tlabel = ool->entry();
3943  Label end;
3944  if (condition == kUnorderedEqual) {
3945  __ j(parity_even, &end);
3946  } else if (condition == kUnorderedNotEqual) {
3947  __ j(parity_even, tlabel);
3948  }
3949  __ j(FlagsConditionToCondition(condition), tlabel);
3950  __ bind(&end);
3951 }
3952 
3953 // Assembles boolean materializations after an instruction.
3954 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3955  FlagsCondition condition) {
3956  IA32OperandConverter i(this, instr);
3957  Label done;
3958 
3959  // Materialize a full 32-bit 1 or 0 value. The result register is always the
3960  // last output of the instruction.
3961  Label check;
3962  DCHECK_NE(0u, instr->OutputCount());
3963  Register reg = i.OutputRegister(instr->OutputCount() - 1);
3964  if (condition == kUnorderedEqual) {
3965  __ j(parity_odd, &check, Label::kNear);
3966  __ Move(reg, Immediate(0));
3967  __ jmp(&done, Label::kNear);
3968  } else if (condition == kUnorderedNotEqual) {
3969  __ j(parity_odd, &check, Label::kNear);
3970  __ mov(reg, Immediate(1));
3971  __ jmp(&done, Label::kNear);
3972  }
3973  Condition cc = FlagsConditionToCondition(condition);
3974 
3975  __ bind(&check);
3976  if (reg.is_byte_register()) {
3977  // setcc for byte registers (al, bl, cl, dl).
3978  __ setcc(cc, reg);
3979  __ movzx_b(reg, reg);
3980  } else {
3981  // Emit a branch to set a register to either 1 or 0.
3982  Label set;
3983  __ j(cc, &set, Label::kNear);
3984  __ Move(reg, Immediate(0));
3985  __ jmp(&done, Label::kNear);
3986  __ bind(&set);
3987  __ mov(reg, Immediate(1));
3988  }
3989  __ bind(&done);
3990 }
3991 
3992 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3993  IA32OperandConverter i(this, instr);
3994  Register input = i.InputRegister(0);
3995  std::vector<std::pair<int32_t, Label*>> cases;
3996  for (size_t index = 2; index < instr->InputCount(); index += 2) {
3997  cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
3998  }
3999  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
4000  cases.data() + cases.size());
4001 }
4002 
4003 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
4004  IA32OperandConverter i(this, instr);
4005  Register input = i.InputRegister(0);
4006  for (size_t index = 2; index < instr->InputCount(); index += 2) {
4007  __ cmp(input, Immediate(i.InputInt32(index + 0)));
4008  __ j(equal, GetLabel(i.InputRpo(index + 1)));
4009  }
4010  AssembleArchJump(i.InputRpo(1));
4011 }
4012 
4013 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4014  IA32OperandConverter i(this, instr);
4015  Register input = i.InputRegister(0);
4016  size_t const case_count = instr->InputCount() - 2;
4017  Label** cases = zone()->NewArray<Label*>(case_count);
4018  for (size_t index = 0; index < case_count; ++index) {
4019  cases[index] = GetLabel(i.InputRpo(index + 2));
4020  }
4021  Label* const table = AddJumpTable(cases, case_count);
4022  __ cmp(input, Immediate(case_count));
4023  __ j(above_equal, GetLabel(i.InputRpo(1)));
4024  __ jmp(Operand::JumpTable(input, times_4, table));
4025 }
4026 
4027 // The calling convention for JSFunctions on IA32 passes arguments on the
4028 // stack and the JSFunction and context in EDI and ESI, respectively, thus
4029 // the steps of the call look as follows:
4030 
4031 // --{ before the call instruction }--------------------------------------------
4032 // | caller frame |
4033 // ^ esp ^ ebp
4034 
4035 // --{ push arguments and setup ESI, EDI }--------------------------------------
4036 // | args + receiver | caller frame |
4037 // ^ esp ^ ebp
4038 // [edi = JSFunction, esi = context]
4039 
4040 // --{ call [edi + kCodeEntryOffset] }------------------------------------------
4041 // | RET | args + receiver | caller frame |
4042 // ^ esp ^ ebp
4043 
4044 // =={ prologue of called function }============================================
4045 // --{ push ebp }---------------------------------------------------------------
4046 // | FP | RET | args + receiver | caller frame |
4047 // ^ esp ^ ebp
4048 
4049 // --{ mov ebp, esp }-----------------------------------------------------------
4050 // | FP | RET | args + receiver | caller frame |
4051 // ^ ebp,esp
4052 
4053 // --{ push esi }---------------------------------------------------------------
4054 // | CTX | FP | RET | args + receiver | caller frame |
4055 // ^esp ^ ebp
4056 
4057 // --{ push edi }---------------------------------------------------------------
4058 // | FNC | CTX | FP | RET | args + receiver | caller frame |
4059 // ^esp ^ ebp
4060 
4061 // --{ subi esp, #N }-----------------------------------------------------------
4062 // | callee frame | FNC | CTX | FP | RET | args + receiver | caller frame |
4063 // ^esp ^ ebp
4064 
4065 // =={ body of called function }================================================
4066 
4067 // =={ epilogue of called function }============================================
4068 // --{ mov esp, ebp }-----------------------------------------------------------
4069 // | FP | RET | args + receiver | caller frame |
4070 // ^ esp,ebp
4071 
4072 // --{ pop ebp }-----------------------------------------------------------
4073 // | | RET | args + receiver | caller frame |
4074 // ^ esp ^ ebp
4075 
4076 // --{ ret #A+1 }-----------------------------------------------------------
4077 // | | caller frame |
4078 // ^ esp ^ ebp
4079 
4080 // Runtime function calls are accomplished by doing a stub call to the
4081 // CEntry (a real code object). On IA32 passes arguments on the
4082 // stack, the number of arguments in EAX, the address of the runtime function
4083 // in EBX, and the context in ESI.
4084 
4085 // --{ before the call instruction }--------------------------------------------
4086 // | caller frame |
4087 // ^ esp ^ ebp
4088 
4089 // --{ push arguments and setup EAX, EBX, and ESI }-----------------------------
4090 // | args + receiver | caller frame |
4091 // ^ esp ^ ebp
4092 // [eax = #args, ebx = runtime function, esi = context]
4093 
4094 // --{ call #CEntry }-----------------------------------------------------------
4095 // | RET | args + receiver | caller frame |
4096 // ^ esp ^ ebp
4097 
4098 // =={ body of runtime function }===============================================
4099 
4100 // --{ runtime returns }--------------------------------------------------------
4101 // | caller frame |
4102 // ^ esp ^ ebp
4103 
4104 // Other custom linkages (e.g. for calling directly into and out of C++) may
4105 // need to save callee-saved registers on the stack, which is done in the
4106 // function prologue of generated code.
4107 
4108 // --{ before the call instruction }--------------------------------------------
4109 // | caller frame |
4110 // ^ esp ^ ebp
4111 
4112 // --{ set up arguments in registers on stack }---------------------------------
4113 // | args | caller frame |
4114 // ^ esp ^ ebp
4115 // [r0 = arg0, r1 = arg1, ...]
4116 
4117 // --{ call code }--------------------------------------------------------------
4118 // | RET | args | caller frame |
4119 // ^ esp ^ ebp
4120 
4121 // =={ prologue of called function }============================================
4122 // --{ push ebp }---------------------------------------------------------------
4123 // | FP | RET | args | caller frame |
4124 // ^ esp ^ ebp
4125 
4126 // --{ mov ebp, esp }-----------------------------------------------------------
4127 // | FP | RET | args | caller frame |
4128 // ^ ebp,esp
4129 
4130 // --{ save registers }---------------------------------------------------------
4131 // | regs | FP | RET | args | caller frame |
4132 // ^ esp ^ ebp
4133 
4134 // --{ subi esp, #N }-----------------------------------------------------------
4135 // | callee frame | regs | FP | RET | args | caller frame |
4136 // ^esp ^ ebp
4137 
4138 // =={ body of called function }================================================
4139 
4140 // =={ epilogue of called function }============================================
4141 // --{ restore registers }------------------------------------------------------
4142 // | regs | FP | RET | args | caller frame |
4143 // ^ esp ^ ebp
4144 
4145 // --{ mov esp, ebp }-----------------------------------------------------------
4146 // | FP | RET | args | caller frame |
4147 // ^ esp,ebp
4148 
4149 // --{ pop ebp }----------------------------------------------------------------
4150 // | RET | args | caller frame |
4151 // ^ esp ^ ebp
4152 
4153 void CodeGenerator::FinishFrame(Frame* frame) {
4154  auto call_descriptor = linkage()->GetIncomingDescriptor();
4155  const RegList saves = call_descriptor->CalleeSavedRegisters();
4156  if (saves != 0) { // Save callee-saved registers.
4157  DCHECK(!info()->is_osr());
4158  int pushed = 0;
4159  for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4160  if (!((1 << i) & saves)) continue;
4161  ++pushed;
4162  }
4163  frame->AllocateSavedCalleeRegisterSlots(pushed);
4164  }
4165 }
4166 
4167 void CodeGenerator::AssembleConstructFrame() {
4168  auto call_descriptor = linkage()->GetIncomingDescriptor();
4169  if (frame_access_state()->has_frame()) {
4170  if (call_descriptor->IsCFunctionCall()) {
4171  __ push(ebp);
4172  __ mov(ebp, esp);
4173  } else if (call_descriptor->IsJSFunctionCall()) {
4174  __ Prologue();
4175  if (call_descriptor->PushArgumentCount()) {
4176  __ push(kJavaScriptCallArgCountRegister);
4177  }
4178  } else {
4179  __ StubPrologue(info()->GetOutputStackFrameType());
4180  if (call_descriptor->IsWasmFunctionCall()) {
4181  __ push(kWasmInstanceRegister);
4182  } else if (call_descriptor->IsWasmImportWrapper()) {
4183  // WASM import wrappers are passed a tuple in the place of the instance.
4184  // Unpack the tuple into the instance and the target callable.
4185  // This must be done here in the codegen because it cannot be expressed
4186  // properly in the graph.
4187  __ mov(kJSFunctionRegister,
4188  Operand(kWasmInstanceRegister,
4189  Tuple2::kValue2Offset - kHeapObjectTag));
4190  __ mov(kWasmInstanceRegister,
4191  Operand(kWasmInstanceRegister,
4192  Tuple2::kValue1Offset - kHeapObjectTag));
4193  __ push(kWasmInstanceRegister);
4194  }
4195  }
4196  }
4197 
4198  int shrink_slots = frame()->GetTotalFrameSlotCount() -
4199  call_descriptor->CalculateFixedFrameSize();
4200 
4201  if (info()->is_osr()) {
4202  // TurboFan OSR-compiled functions cannot be entered directly.
4203  __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4204 
4205  // Unoptimized code jumps directly to this entrypoint while the unoptimized
4206  // frame is still on the stack. Optimized code uses OSR values directly from
4207  // the unoptimized frame. Thus, all that needs to be done is to allocate the
4208  // remaining stack slots.
4209  if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
4210  osr_pc_offset_ = __ pc_offset();
4211  shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
4212  }
4213 
4214  const RegList saves = call_descriptor->CalleeSavedRegisters();
4215  if (shrink_slots > 0) {
4216  DCHECK(frame_access_state()->has_frame());
4217  if (info()->IsWasm() && shrink_slots > 128) {
4218  // For WebAssembly functions with big frames we have to do the stack
4219  // overflow check before we construct the frame. Otherwise we may not
4220  // have enough space on the stack to call the runtime for the stack
4221  // overflow.
4222  Label done;
4223 
4224  // If the frame is bigger than the stack, we throw the stack overflow
4225  // exception unconditionally. Thereby we can avoid the integer overflow
4226  // check in the condition code.
4227  if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) {
4228  Register scratch = esi;
4229  __ push(scratch);
4230  __ mov(scratch,
4231  FieldOperand(kWasmInstanceRegister,
4232  WasmInstanceObject::kRealStackLimitAddressOffset));
4233  __ mov(scratch, Operand(scratch, 0));
4234  __ add(scratch, Immediate(shrink_slots * kPointerSize));
4235  __ cmp(esp, scratch);
4236  __ pop(scratch);
4237  __ j(above_equal, &done);
4238  }
4239  __ mov(ecx, FieldOperand(kWasmInstanceRegister,
4240  WasmInstanceObject::kCEntryStubOffset));
4241  __ Move(esi, Smi::zero());
4242  __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, ecx);
4243  ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
4244  RecordSafepoint(reference_map, Safepoint::kSimple, 0,
4245  Safepoint::kNoLazyDeopt);
4246  __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4247  __ bind(&done);
4248  }
4249 
4250  // Skip callee-saved and return slots, which are created below.
4251  shrink_slots -= base::bits::CountPopulation(saves);
4252  shrink_slots -= frame()->GetReturnSlotCount();
4253  if (shrink_slots > 0) {
4254  __ sub(esp, Immediate(shrink_slots * kPointerSize));
4255  }
4256  }
4257 
4258  if (saves != 0) { // Save callee-saved registers.
4259  DCHECK(!info()->is_osr());
4260  for (int i = Register::kNumRegisters - 1; i >= 0; i--) {
4261  if (((1 << i) & saves)) __ push(Register::from_code(i));
4262  }
4263  }
4264 
4265  // Allocate return slots (located after callee-saved).
4266  if (frame()->GetReturnSlotCount() > 0) {
4267  __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
4268  }
4269 }
4270 
4271 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
4272  auto call_descriptor = linkage()->GetIncomingDescriptor();
4273 
4274  const RegList saves = call_descriptor->CalleeSavedRegisters();
4275  // Restore registers.
4276  if (saves != 0) {
4277  const int returns = frame()->GetReturnSlotCount();
4278  if (returns != 0) {
4279  __ add(esp, Immediate(returns * kPointerSize));
4280  }
4281  for (int i = 0; i < Register::kNumRegisters; i++) {
4282  if (!((1 << i) & saves)) continue;
4283  __ pop(Register::from_code(i));
4284  }
4285  }
4286 
4287  // Might need ecx for scratch if pop_size is too big or if there is a variable
4288  // pop count.
4289  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4290  size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
4291  IA32OperandConverter g(this, nullptr);
4292  if (call_descriptor->IsCFunctionCall()) {
4293  AssembleDeconstructFrame();
4294  } else if (frame_access_state()->has_frame()) {
4295  // Canonicalize JSFunction return sites for now if they always have the same
4296  // number of return args.
4297  if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
4298  if (return_label_.is_bound()) {
4299  __ jmp(&return_label_);
4300  return;
4301  } else {
4302  __ bind(&return_label_);
4303  AssembleDeconstructFrame();
4304  }
4305  } else {
4306  AssembleDeconstructFrame();
4307  }
4308  }
4309  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
4310  DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4311  if (pop->IsImmediate()) {
4312  DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
4313  pop_size += g.ToConstant(pop).ToInt32() * kPointerSize;
4314  __ Ret(static_cast<int>(pop_size), ecx);
4315  } else {
4316  Register pop_reg = g.ToRegister(pop);
4317  Register scratch_reg = pop_reg == ecx ? edx : ecx;
4318  __ pop(scratch_reg);
4319  __ lea(esp, Operand(esp, pop_reg, times_4, static_cast<int>(pop_size)));
4320  __ jmp(scratch_reg);
4321  }
4322 }
4323 
4324 void CodeGenerator::FinishCode() {}
4325 
4326 void CodeGenerator::AssembleMove(InstructionOperand* source,
4327  InstructionOperand* destination) {
4328  IA32OperandConverter g(this, nullptr);
4329  // Dispatch on the source and destination operand kinds.
4330  switch (MoveType::InferMove(source, destination)) {
4331  case MoveType::kRegisterToRegister:
4332  if (source->IsRegister()) {
4333  __ mov(g.ToRegister(destination), g.ToRegister(source));
4334  } else {
4335  DCHECK(source->IsFPRegister());
4336  __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4337  }
4338  return;
4339  case MoveType::kRegisterToStack: {
4340  Operand dst = g.ToOperand(destination);
4341  if (source->IsRegister()) {
4342  __ mov(dst, g.ToRegister(source));
4343  } else {
4344  DCHECK(source->IsFPRegister());
4345  XMMRegister src = g.ToDoubleRegister(source);
4346  MachineRepresentation rep =
4347  LocationOperand::cast(source)->representation();
4348  if (rep == MachineRepresentation::kFloat32) {
4349  __ movss(dst, src);
4350  } else if (rep == MachineRepresentation::kFloat64) {
4351  __ movsd(dst, src);
4352  } else {
4353  DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4354  __ movups(dst, src);
4355  }
4356  }
4357  return;
4358  }
4359  case MoveType::kStackToRegister: {
4360  Operand src = g.ToOperand(source);
4361  if (source->IsStackSlot()) {
4362  __ mov(g.ToRegister(destination), src);
4363  } else {
4364  DCHECK(source->IsFPStackSlot());
4365  XMMRegister dst = g.ToDoubleRegister(destination);
4366  MachineRepresentation rep =
4367  LocationOperand::cast(source)->representation();
4368  if (rep == MachineRepresentation::kFloat32) {
4369  __ movss(dst, src);
4370  } else if (rep == MachineRepresentation::kFloat64) {
4371  __ movsd(dst, src);
4372  } else {
4373  DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4374  __ movups(dst, src);
4375  }
4376  }
4377  return;
4378  }
4379  case MoveType::kStackToStack: {
4380  Operand src = g.ToOperand(source);
4381  Operand dst = g.ToOperand(destination);
4382  if (source->IsStackSlot()) {
4383  __ push(src);
4384  __ pop(dst);
4385  } else {
4386  MachineRepresentation rep =
4387  LocationOperand::cast(source)->representation();
4388  if (rep == MachineRepresentation::kFloat32) {
4389  __ movss(kScratchDoubleReg, src);
4390  __ movss(dst, kScratchDoubleReg);
4391  } else if (rep == MachineRepresentation::kFloat64) {
4392  __ movsd(kScratchDoubleReg, src);
4393  __ movsd(dst, kScratchDoubleReg);
4394  } else {
4395  DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4396  __ movups(kScratchDoubleReg, src);
4397  __ movups(dst, kScratchDoubleReg);
4398  }
4399  }
4400  return;
4401  }
4402  case MoveType::kConstantToRegister: {
4403  Constant src = g.ToConstant(source);
4404  if (destination->IsRegister()) {
4405  Register dst = g.ToRegister(destination);
4406  if (src.type() == Constant::kHeapObject) {
4407  __ Move(dst, src.ToHeapObject());
4408  } else {
4409  __ Move(dst, g.ToImmediate(source));
4410  }
4411  } else {
4412  DCHECK(destination->IsFPRegister());
4413  XMMRegister dst = g.ToDoubleRegister(destination);
4414  if (src.type() == Constant::kFloat32) {
4415  // TODO(turbofan): Can we do better here?
4416  __ Move(dst, src.ToFloat32AsInt());
4417  } else {
4418  DCHECK_EQ(src.type(), Constant::kFloat64);
4419  __ Move(dst, src.ToFloat64().AsUint64());
4420  }
4421  }
4422  return;
4423  }
4424  case MoveType::kConstantToStack: {
4425  Constant src = g.ToConstant(source);
4426  Operand dst = g.ToOperand(destination);
4427  if (destination->IsStackSlot()) {
4428  __ Move(dst, g.ToImmediate(source));
4429  } else {
4430  DCHECK(destination->IsFPStackSlot());
4431  if (src.type() == Constant::kFloat32) {
4432  __ Move(dst, Immediate(src.ToFloat32AsInt()));
4433  } else {
4434  DCHECK_EQ(src.type(), Constant::kFloat64);
4435  uint64_t constant_value = src.ToFloat64().AsUint64();
4436  uint32_t lower = static_cast<uint32_t>(constant_value);
4437  uint32_t upper = static_cast<uint32_t>(constant_value >> 32);
4438  Operand dst0 = dst;
4439  Operand dst1 = g.ToOperand(destination, kPointerSize);
4440  __ Move(dst0, Immediate(lower));
4441  __ Move(dst1, Immediate(upper));
4442  }
4443  }
4444  return;
4445  }
4446  }
4447  UNREACHABLE();
4448 }
4449 
4450 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4451  InstructionOperand* destination) {
4452  IA32OperandConverter g(this, nullptr);
4453  // Dispatch on the source and destination operand kinds. Not all
4454  // combinations are possible.
4455  switch (MoveType::InferSwap(source, destination)) {
4456  case MoveType::kRegisterToRegister: {
4457  if (source->IsRegister()) {
4458  Register src = g.ToRegister(source);
4459  Register dst = g.ToRegister(destination);
4460  __ push(src);
4461  __ mov(src, dst);
4462  __ pop(dst);
4463  } else {
4464  DCHECK(source->IsFPRegister());
4465  XMMRegister src = g.ToDoubleRegister(source);
4466  XMMRegister dst = g.ToDoubleRegister(destination);
4467  __ movaps(kScratchDoubleReg, src);
4468  __ movaps(src, dst);
4469  __ movaps(dst, kScratchDoubleReg);
4470  }
4471  return;
4472  }
4473  case MoveType::kRegisterToStack: {
4474  if (source->IsRegister()) {
4475  Register src = g.ToRegister(source);
4476  __ push(src);
4477  frame_access_state()->IncreaseSPDelta(1);
4478  Operand dst = g.ToOperand(destination);
4479  __ mov(src, dst);
4480  frame_access_state()->IncreaseSPDelta(-1);
4481  dst = g.ToOperand(destination);
4482  __ pop(dst);
4483  } else {
4484  DCHECK(source->IsFPRegister());
4485  XMMRegister src = g.ToDoubleRegister(source);
4486  Operand dst = g.ToOperand(destination);
4487  MachineRepresentation rep =
4488  LocationOperand::cast(source)->representation();
4489  if (rep == MachineRepresentation::kFloat32) {
4490  __ movss(kScratchDoubleReg, dst);
4491  __ movss(dst, src);
4492  __ movaps(src, kScratchDoubleReg);
4493  } else if (rep == MachineRepresentation::kFloat64) {
4494  __ movsd(kScratchDoubleReg, dst);
4495  __ movsd(dst, src);
4496  __ movaps(src, kScratchDoubleReg);
4497  } else {
4498  DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4499  __ movups(kScratchDoubleReg, dst);
4500  __ movups(dst, src);
4501  __ movups(src, kScratchDoubleReg);
4502  }
4503  }
4504  return;
4505  }
4506  case MoveType::kStackToStack: {
4507  if (source->IsStackSlot()) {
4508  Operand dst1 = g.ToOperand(destination);
4509  __ push(dst1);
4510  frame_access_state()->IncreaseSPDelta(1);
4511  Operand src1 = g.ToOperand(source);
4512  __ push(src1);
4513  Operand dst2 = g.ToOperand(destination);
4514  __ pop(dst2);
4515  frame_access_state()->IncreaseSPDelta(-1);
4516  Operand src2 = g.ToOperand(source);
4517  __ pop(src2);
4518  } else {
4519  DCHECK(source->IsFPStackSlot());
4520  Operand src0 = g.ToOperand(source);
4521  Operand dst0 = g.ToOperand(destination);
4522  MachineRepresentation rep =
4523  LocationOperand::cast(source)->representation();
4524  if (rep == MachineRepresentation::kFloat32) {
4525  __ movss(kScratchDoubleReg, dst0); // Save dst in scratch register.
4526  __ push(src0); // Then use stack to copy src to destination.
4527  __ pop(dst0);
4528  __ movss(src0, kScratchDoubleReg);
4529  } else if (rep == MachineRepresentation::kFloat64) {
4530  __ movsd(kScratchDoubleReg, dst0); // Save dst in scratch register.
4531  __ push(src0); // Then use stack to copy src to destination.
4532  __ pop(dst0);
4533  __ push(g.ToOperand(source, kPointerSize));
4534  __ pop(g.ToOperand(destination, kPointerSize));
4535  __ movsd(src0, kScratchDoubleReg);
4536  } else {
4537  DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4538  __ movups(kScratchDoubleReg, dst0); // Save dst in scratch register.
4539  __ push(src0); // Then use stack to copy src to destination.
4540  __ pop(dst0);
4541  __ push(g.ToOperand(source, kPointerSize));
4542  __ pop(g.ToOperand(destination, kPointerSize));
4543  __ push(g.ToOperand(source, 2 * kPointerSize));
4544  __ pop(g.ToOperand(destination, 2 * kPointerSize));
4545  __ push(g.ToOperand(source, 3 * kPointerSize));
4546  __ pop(g.ToOperand(destination, 3 * kPointerSize));
4547  __ movups(src0, kScratchDoubleReg);
4548  }
4549  }
4550  return;
4551  }
4552  default:
4553  UNREACHABLE();
4554  break;
4555  }
4556 }
4557 
4558 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
4559  for (size_t index = 0; index < target_count; ++index) {
4560  __ dd(targets[index]);
4561  }
4562 }
4563 
4564 #undef __
4565 #undef kScratchDoubleReg
4566 #undef ASSEMBLE_COMPARE
4567 #undef ASSEMBLE_IEEE754_BINOP
4568 #undef ASSEMBLE_IEEE754_UNOP
4569 #undef ASSEMBLE_BINOP
4570 #undef ASSEMBLE_ATOMIC_BINOP
4571 #undef ASSEMBLE_I64ATOMIC_BINOP
4572 #undef ASSEMBLE_MOVX
4573 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE
4574 #undef ASSEMBLE_SIMD_IMM_SHUFFLE
4575 
4576 } // namespace compiler
4577 } // namespace internal
4578 } // namespace v8
Definition: libplatform.h:13