5 #include "src/compiler/backend/code-generator.h" 7 #include "src/assembler-inl.h" 8 #include "src/callable.h" 9 #include "src/compiler/backend/code-generator-impl.h" 10 #include "src/compiler/backend/gap-resolver.h" 11 #include "src/compiler/node-matchers.h" 12 #include "src/compiler/osr.h" 13 #include "src/frame-constants.h" 14 #include "src/frames.h" 15 #include "src/heap/heap-inl.h" 16 #include "src/ia32/assembler-ia32.h" 17 #include "src/macro-assembler.h" 18 #include "src/objects/smi.h" 19 #include "src/optimized-compilation-info.h" 20 #include "src/wasm/wasm-code-manager.h" 21 #include "src/wasm/wasm-objects.h" 29 #define kScratchDoubleReg xmm0 37 Operand InputOperand(
size_t index,
int extra = 0) {
38 return ToOperand(instr_->InputAt(index), extra);
42 return ToImmediate(instr_->InputAt(index));
45 Operand OutputOperand() {
return ToOperand(instr_->Output()); }
48 if (op->IsRegister()) {
51 }
else if (op->IsFPRegister()) {
53 return Operand(ToDoubleRegister(op));
55 DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
56 return SlotToOperand(AllocatedOperand::cast(op)->index(), extra);
59 Operand SlotToOperand(
int slot,
int extra = 0) {
60 FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
61 return Operand(offset.from_stack_pointer() ? esp : ebp,
62 offset.offset() + extra);
66 Constant constant = ToConstant(operand);
67 if (constant.type() == Constant::kInt32 &&
68 RelocInfo::IsWasmReference(constant.rmode())) {
69 return Immediate(static_cast<Address>(constant.ToInt32()),
72 switch (constant.type()) {
73 case Constant::kInt32:
75 case Constant::kFloat32:
76 return Immediate::EmbeddedNumber(constant.ToFloat32());
77 case Constant::kFloat64:
78 return Immediate::EmbeddedNumber(constant.ToFloat64().value());
79 case Constant::kExternalReference:
80 return Immediate(constant.ToExternalReference());
81 case Constant::kHeapObject:
82 return Immediate(constant.ToHeapObject());
83 case Constant::kDelayedStringConstant:
84 return Immediate::EmbeddedStringConstant(
85 constant.ToDelayedStringConstant());
86 case Constant::kInt64:
88 case Constant::kRpoNumber:
89 return Immediate::CodeRelativeOffset(ToLabel(operand));
94 static size_t NextOffset(
size_t* offset) {
100 static ScaleFactor ScaleFor(AddressingMode one, AddressingMode mode) {
101 STATIC_ASSERT(0 == static_cast<int>(times_1));
102 STATIC_ASSERT(1 == static_cast<int>(times_2));
103 STATIC_ASSERT(2 == static_cast<int>(times_4));
104 STATIC_ASSERT(3 == static_cast<int>(times_8));
105 int scale =
static_cast<int>(mode - one);
106 DCHECK(scale >= 0 && scale < 4);
107 return static_cast<ScaleFactor
>(scale);
110 Operand MemoryOperand(
size_t* offset) {
111 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
114 Register base = InputRegister(NextOffset(offset));
119 Register base = InputRegister(NextOffset(offset));
120 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
121 return Operand(base, ctant.ToInt32(), ctant.rmode());
127 Register base = InputRegister(NextOffset(offset));
128 Register index = InputRegister(NextOffset(offset));
129 ScaleFactor scale = ScaleFor(kMode_MR1, mode);
131 return Operand(base, index, scale, disp);
137 Register base = InputRegister(NextOffset(offset));
138 Register index = InputRegister(NextOffset(offset));
139 ScaleFactor scale = ScaleFor(kMode_MR1I, mode);
140 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
141 return Operand(base, index, scale, ctant.ToInt32(), ctant.rmode());
147 Register index = InputRegister(NextOffset(offset));
148 ScaleFactor scale = ScaleFor(kMode_M1, mode);
150 return Operand(index, scale, disp);
156 Register index = InputRegister(NextOffset(offset));
157 ScaleFactor scale = ScaleFor(kMode_M1I, mode);
158 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
159 return Operand(index, scale, ctant.ToInt32(), ctant.rmode());
162 Constant ctant = ToConstant(instr_->InputAt(NextOffset(offset)));
163 return Operand(ctant.ToInt32(), ctant.rmode());
171 Operand MemoryOperand(
size_t first_input = 0) {
172 return MemoryOperand(&first_input);
175 Operand NextMemoryOperand(
size_t offset = 0) {
176 AddressingMode mode = AddressingModeField::decode(instr_->opcode());
177 Register base = InputRegister(NextOffset(&offset));
178 const int32_t disp = 4;
179 if (mode == kMode_MR1) {
180 Register index = InputRegister(NextOffset(&offset));
181 ScaleFactor scale = ScaleFor(kMode_MR1, kMode_MR1);
182 return Operand(base, index, scale, disp);
183 }
else if (mode == kMode_MRI) {
184 Constant ctant = ToConstant(instr_->InputAt(NextOffset(&offset)));
185 return Operand(base, ctant.ToInt32() + disp, ctant.rmode());
191 void MoveInstructionOperandToRegister(
Register destination,
193 if (op->IsImmediate() || op->IsConstant()) {
194 gen_->tasm()->mov(destination, ToImmediate(op));
195 }
else if (op->IsRegister()) {
196 gen_->tasm()->Move(destination, ToRegister(op));
198 gen_->tasm()->mov(destination, ToOperand(op));
205 bool HasImmediateInput(
Instruction* instr,
size_t index) {
206 return instr->InputAt(index)->IsImmediate();
209 class OutOfLineLoadFloat32NaN final :
public OutOfLineCode {
211 OutOfLineLoadFloat32NaN(CodeGenerator* gen,
XMMRegister result)
212 : OutOfLineCode(gen), result_(result) {}
214 void Generate() final {
215 __ xorps(result_, result_);
216 __ divss(result_, result_);
220 XMMRegister
const result_;
223 class OutOfLineLoadFloat64NaN final :
public OutOfLineCode {
225 OutOfLineLoadFloat64NaN(CodeGenerator* gen, XMMRegister result)
226 : OutOfLineCode(gen), result_(result) {}
228 void Generate() final {
229 __ xorpd(result_, result_);
230 __ divsd(result_, result_);
234 XMMRegister
const result_;
237 class OutOfLineTruncateDoubleToI final :
public OutOfLineCode {
239 OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
240 XMMRegister input, StubCallMode stub_mode)
241 : OutOfLineCode(gen),
244 stub_mode_(stub_mode),
245 isolate_(gen->isolate()),
246 zone_(gen->zone()) {}
248 void Generate() final {
249 __ sub(esp, Immediate(kDoubleSize));
250 __ movsd(MemOperand(esp, 0), input_);
251 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
255 __ wasm_call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
257 __ Call(BUILTIN_CODE(isolate_, DoubleToI), RelocInfo::CODE_TARGET);
259 __ mov(result_, MemOperand(esp, 0));
260 __ add(esp, Immediate(kDoubleSize));
264 Register
const result_;
265 XMMRegister
const input_;
266 StubCallMode stub_mode_;
271 class OutOfLineRecordWrite final :
public OutOfLineCode {
273 OutOfLineRecordWrite(CodeGenerator* gen, Register
object, Operand operand,
274 Register value, Register scratch0, Register scratch1,
275 RecordWriteMode mode, StubCallMode stub_mode)
276 : OutOfLineCode(gen),
283 stub_mode_(stub_mode),
284 zone_(gen->zone()) {}
286 void Generate() final {
287 if (mode_ > RecordWriteMode::kValueIsPointer) {
288 __ JumpIfSmi(value_, exit());
290 __ CheckPageFlag(value_, scratch0_,
291 MemoryChunk::kPointersToHereAreInterestingMask, zero,
293 __ lea(scratch1_, operand_);
294 RememberedSetAction
const remembered_set_action =
295 mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
296 : OMIT_REMEMBERED_SET;
297 SaveFPRegsMode
const save_fp_mode =
298 frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
299 if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
303 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
304 save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
306 __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
312 Register
const object_;
313 Operand
const operand_;
314 Register
const value_;
315 Register
const scratch0_;
316 Register
const scratch1_;
317 RecordWriteMode
const mode_;
318 StubCallMode
const stub_mode_;
324 #define ASSEMBLE_COMPARE(asm_instr) \ 326 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ 328 Operand left = i.MemoryOperand(&index); \ 329 if (HasImmediateInput(instr, index)) { \ 330 __ asm_instr(left, i.InputImmediate(index)); \ 332 __ asm_instr(left, i.InputRegister(index)); \ 335 if (HasImmediateInput(instr, 1)) { \ 336 if (instr->InputAt(0)->IsRegister()) { \ 337 __ asm_instr(i.InputRegister(0), i.InputImmediate(1)); \ 339 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ 342 if (instr->InputAt(1)->IsRegister()) { \ 343 __ asm_instr(i.InputRegister(0), i.InputRegister(1)); \ 345 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ 351 #define ASSEMBLE_IEEE754_BINOP(name) \ 354 __ PrepareCallCFunction(4, eax); \ 355 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \ 356 __ movsd(Operand(esp, 1 * kDoubleSize), i.InputDoubleRegister(1)); \ 357 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 4); \ 360 __ sub(esp, Immediate(kDoubleSize)); \ 361 __ fstp_d(Operand(esp, 0)); \ 362 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ 363 __ add(esp, Immediate(kDoubleSize)); \ 366 #define ASSEMBLE_IEEE754_UNOP(name) \ 369 __ PrepareCallCFunction(2, eax); \ 370 __ movsd(Operand(esp, 0 * kDoubleSize), i.InputDoubleRegister(0)); \ 371 __ CallCFunction(ExternalReference::ieee754_##name##_function(), 2); \ 374 __ sub(esp, Immediate(kDoubleSize)); \ 375 __ fstp_d(Operand(esp, 0)); \ 376 __ movsd(i.OutputDoubleRegister(), Operand(esp, 0)); \ 377 __ add(esp, Immediate(kDoubleSize)); \ 380 #define ASSEMBLE_BINOP(asm_instr) \ 382 if (AddressingModeField::decode(instr->opcode()) != kMode_None) { \ 384 Operand right = i.MemoryOperand(&index); \ 385 __ asm_instr(i.InputRegister(0), right); \ 387 if (HasImmediateInput(instr, 1)) { \ 388 __ asm_instr(i.InputOperand(0), i.InputImmediate(1)); \ 390 __ asm_instr(i.InputRegister(0), i.InputOperand(1)); \ 395 #define ASSEMBLE_ATOMIC_BINOP(bin_inst, mov_inst, cmpxchg_inst) \ 399 __ mov_inst(eax, i.MemoryOperand(1)); \ 400 __ Move(i.TempRegister(0), eax); \ 401 __ bin_inst(i.TempRegister(0), i.InputRegister(0)); \ 403 __ cmpxchg_inst(i.MemoryOperand(1), i.TempRegister(0)); \ 404 __ j(not_equal, &binop); \ 407 #define ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \ 411 __ mov(eax, i.MemoryOperand(2)); \ 412 __ mov(edx, i.NextMemoryOperand(2)); \ 414 frame_access_state()->IncreaseSPDelta(1); \ 415 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0)); \ 416 __ push(i.InputRegister(1)); \ 417 __ instr1(ebx, eax); \ 418 __ instr2(i.InputRegister(1), edx); \ 420 __ cmpxchg8b(i.MemoryOperand(2)); \ 421 __ pop(i.InputRegister(1)); \ 423 frame_access_state()->IncreaseSPDelta(-1); \ 424 __ j(not_equal, &binop); \ 427 #define ASSEMBLE_MOVX(mov_instr) \ 429 if (instr->addressing_mode() != kMode_None) { \ 430 __ mov_instr(i.OutputRegister(), i.MemoryOperand()); \ 431 } else if (instr->InputAt(0)->IsRegister()) { \ 432 __ mov_instr(i.OutputRegister(), i.InputRegister(0)); \ 434 __ mov_instr(i.OutputRegister(), i.InputOperand(0)); \ 438 #define ASSEMBLE_SIMD_PUNPCK_SHUFFLE(opcode) \ 440 XMMRegister src0 = i.InputSimd128Register(0); \ 441 Operand src1 = i.InputOperand(instr->InputCount() == 2 ? 1 : 0); \ 442 if (CpuFeatures::IsSupported(AVX)) { \ 443 CpuFeatureScope avx_scope(tasm(), AVX); \ 444 __ v##opcode(i.OutputSimd128Register(), src0, src1); \ 446 DCHECK_EQ(i.OutputSimd128Register(), src0); \ 447 __ opcode(i.OutputSimd128Register(), src1); \ 451 #define ASSEMBLE_SIMD_IMM_SHUFFLE(opcode, SSELevel, imm) \ 452 if (CpuFeatures::IsSupported(AVX)) { \ 453 CpuFeatureScope avx_scope(tasm(), AVX); \ 454 __ v##opcode(i.OutputSimd128Register(), i.InputSimd128Register(0), \ 455 i.InputOperand(1), imm); \ 457 CpuFeatureScope sse_scope(tasm(), SSELevel); \ 458 DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); \ 459 __ opcode(i.OutputSimd128Register(), i.InputOperand(1), imm); \ 462 void CodeGenerator::AssembleDeconstructFrame() {
467 void CodeGenerator::AssemblePrepareTailCall() {
468 if (frame_access_state()->has_frame()) {
469 __ mov(ebp, MemOperand(ebp, 0));
471 frame_access_state()->SetFrameAccessToSP();
474 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
479 int scratch_count = 3;
480 Register scratch1 = esi;
481 Register scratch2 = ecx;
482 Register scratch3 = edx;
483 DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
487 __ cmp(Operand(ebp, StandardFrameConstants::kContextOffset),
488 Immediate(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
489 __ j(not_equal, &done, Label::kNear);
497 Register caller_args_count_reg = scratch1;
498 __ mov(caller_args_count_reg,
499 Operand(ebp, ArgumentsAdaptorFrameConstants::kLengthOffset));
500 __ SmiUntag(caller_args_count_reg);
502 ParameterCount callee_args_count(args_reg);
503 __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
504 scratch3, scratch_count);
514 void AdjustStackPointerForTailCall(TurboAssembler* tasm,
515 FrameAccessState* state,
516 int new_slot_above_sp,
517 bool allow_shrinkage =
true) {
518 int current_sp_offset = state->GetSPToFPSlotCount() +
519 StandardFrameConstants::kFixedSlotCountAboveFp;
520 int stack_slot_delta = new_slot_above_sp - current_sp_offset;
521 if (stack_slot_delta > 0) {
522 tasm->sub(esp, Immediate(stack_slot_delta * kPointerSize));
523 state->IncreaseSPDelta(stack_slot_delta);
524 }
else if (allow_shrinkage && stack_slot_delta < 0) {
525 tasm->add(esp, Immediate(-stack_slot_delta * kPointerSize));
526 state->IncreaseSPDelta(stack_slot_delta);
531 bool VerifyOutputOfAtomicPairInstr(IA32OperandConverter* converter,
532 const Instruction* instr) {
533 if (instr->OutputCount() > 0) {
534 if (converter->OutputRegister(0) != eax)
return false;
535 if (instr->OutputCount() == 2 && converter->OutputRegister(1) != edx)
544 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
545 int first_unused_stack_slot) {
546 CodeGenerator::PushTypeFlags flags(kImmediatePush | kScalarPush);
547 ZoneVector<MoveOperands*> pushes(zone());
548 GetPushCompatibleMoves(instr, flags, &pushes);
550 if (!pushes.empty() &&
551 (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
552 first_unused_stack_slot)) {
553 IA32OperandConverter g(
this, instr);
554 for (
auto move : pushes) {
555 LocationOperand destination_location(
556 LocationOperand::cast(move->destination()));
557 InstructionOperand source(move->source());
558 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
559 destination_location.index());
560 if (source.IsStackSlot()) {
561 LocationOperand source_location(LocationOperand::cast(source));
562 __ push(g.SlotToOperand(source_location.index()));
563 }
else if (source.IsRegister()) {
564 LocationOperand source_location(LocationOperand::cast(source));
565 __ push(source_location.GetRegister());
566 }
else if (source.IsImmediate()) {
567 __ Push(Immediate(ImmediateOperand::cast(source).inline_value()));
572 frame_access_state()->IncreaseSPDelta(1);
576 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
577 first_unused_stack_slot,
false);
580 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
581 int first_unused_stack_slot) {
582 AdjustStackPointerForTailCall(tasm(), frame_access_state(),
583 first_unused_stack_slot);
587 void CodeGenerator::AssembleCodeStartRegisterCheck() {
589 __ ComputeCodeStartAddress(eax);
590 __ cmp(eax, kJavaScriptCallCodeStartRegister);
591 __ Assert(equal, AbortReason::kWrongFunctionCodeStart);
602 void CodeGenerator::BailoutIfDeoptimized() {
603 int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
605 __ mov(eax, Operand(kJavaScriptCallCodeStartRegister, offset));
606 __ test(FieldOperand(eax, CodeDataContainer::kKindSpecificFlagsOffset),
607 Immediate(1 << Code::kMarkedForDeoptimizationBit));
611 DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
612 Handle<Code> code = isolate()->builtins()->builtin_handle(
613 Builtins::kCompileLazyDeoptimizedCode);
614 __ j(not_zero, code, RelocInfo::CODE_TARGET);
617 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
622 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
628 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
629 Instruction* instr) {
630 IA32OperandConverter
i(
this, instr);
631 InstructionCode opcode = instr->opcode();
632 ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
633 switch (arch_opcode) {
634 case kArchCallCodeObject: {
635 InstructionOperand* op = instr->InputAt(0);
636 if (op->IsImmediate()) {
637 Handle<Code> code =
i.InputCode(0);
638 __ Call(code, RelocInfo::CODE_TARGET);
639 }
else if (op->IsRegister()) {
640 Register reg =
i.InputRegister(0);
642 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
643 reg == kJavaScriptCallCodeStartRegister);
644 __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
645 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
646 __ RetpolineCall(reg);
651 CHECK(tasm()->root_array_available());
658 frame_access_state()->IncreaseSPDelta(1);
659 Operand virtual_call_target_register(
660 kRootRegister, IsolateData::virtual_call_target_register_offset());
661 __ mov(eax,
i.InputOperand(0));
662 __ add(eax, Immediate(Code::kHeaderSize - kHeapObjectTag));
663 __ mov(virtual_call_target_register, eax);
665 frame_access_state()->IncreaseSPDelta(-1);
666 __ call(virtual_call_target_register);
668 RecordCallPosition(instr);
669 frame_access_state()->ClearSPDelta();
672 case kArchCallWasmFunction: {
673 if (HasImmediateInput(instr, 0)) {
674 Constant constant =
i.ToConstant(instr->InputAt(0));
675 Address wasm_code =
static_cast<Address
>(constant.ToInt32());
676 if (DetermineStubCallMode() == StubCallMode::kCallWasmRuntimeStub) {
677 __ wasm_call(wasm_code, constant.rmode());
679 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
680 __ RetpolineCall(wasm_code, constant.rmode());
682 __ call(wasm_code, constant.rmode());
686 Register reg =
i.InputRegister(0);
687 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
688 __ RetpolineCall(reg);
693 RecordCallPosition(instr);
694 frame_access_state()->ClearSPDelta();
697 case kArchTailCallCodeObjectFromJSFunction:
698 case kArchTailCallCodeObject: {
699 if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
700 AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
701 no_reg, no_reg, no_reg);
703 if (HasImmediateInput(instr, 0)) {
704 Handle<Code> code =
i.InputCode(0);
705 __ Jump(code, RelocInfo::CODE_TARGET);
707 Register reg =
i.InputRegister(0);
709 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
710 reg == kJavaScriptCallCodeStartRegister);
711 __ add(reg, Immediate(Code::kHeaderSize - kHeapObjectTag));
712 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
713 __ RetpolineJump(reg);
718 frame_access_state()->ClearSPDelta();
719 frame_access_state()->SetFrameAccessToDefault();
722 case kArchTailCallWasm: {
723 if (HasImmediateInput(instr, 0)) {
724 Constant constant =
i.ToConstant(instr->InputAt(0));
725 Address wasm_code =
static_cast<Address
>(constant.ToInt32());
726 __ jmp(wasm_code, constant.rmode());
728 Register reg =
i.InputRegister(0);
729 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
730 __ RetpolineJump(reg);
735 frame_access_state()->ClearSPDelta();
736 frame_access_state()->SetFrameAccessToDefault();
739 case kArchTailCallAddress: {
740 CHECK(!HasImmediateInput(instr, 0));
741 Register reg =
i.InputRegister(0);
743 HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
744 reg == kJavaScriptCallCodeStartRegister);
745 if (HasCallDescriptorFlag(instr, CallDescriptor::kRetpoline)) {
746 __ RetpolineJump(reg);
750 frame_access_state()->ClearSPDelta();
751 frame_access_state()->SetFrameAccessToDefault();
754 case kArchCallJSFunction: {
755 Register func =
i.InputRegister(0);
756 if (FLAG_debug_code) {
758 __ cmp(esi, FieldOperand(func, JSFunction::kContextOffset));
759 __ Assert(equal, AbortReason::kWrongFunctionContext);
761 static_assert(kJavaScriptCallCodeStartRegister == ecx,
"ABI mismatch");
762 __ mov(ecx, FieldOperand(func, JSFunction::kCodeOffset));
763 __ add(ecx, Immediate(Code::kHeaderSize - kHeapObjectTag));
765 RecordCallPosition(instr);
766 frame_access_state()->ClearSPDelta();
769 case kArchPrepareCallCFunction: {
771 frame_access_state()->SetFrameAccessToFP();
772 int const num_parameters = MiscField::decode(instr->opcode());
773 __ PrepareCallCFunction(num_parameters,
i.TempRegister(0));
776 case kArchSaveCallerRegisters: {
778 static_cast<SaveFPRegsMode
>(MiscField::decode(instr->opcode()));
779 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
781 int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
782 DCHECK_EQ(0, bytes % kPointerSize);
783 DCHECK_EQ(0, frame_access_state()->sp_delta());
784 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
785 DCHECK(!caller_registers_saved_);
786 caller_registers_saved_ =
true;
789 case kArchRestoreCallerRegisters: {
791 static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
792 DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
794 int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
795 frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
796 DCHECK_EQ(0, frame_access_state()->sp_delta());
797 DCHECK(caller_registers_saved_);
798 caller_registers_saved_ =
false;
801 case kArchPrepareTailCall:
802 AssemblePrepareTailCall();
804 case kArchCallCFunction: {
805 int const num_parameters = MiscField::decode(instr->opcode());
806 if (HasImmediateInput(instr, 0)) {
807 ExternalReference ref =
i.InputExternalReference(0);
808 __ CallCFunction(ref, num_parameters);
810 Register func =
i.InputRegister(0);
811 __ CallCFunction(func, num_parameters);
813 frame_access_state()->SetFrameAccessToDefault();
819 frame_access_state()->ClearSPDelta();
820 if (caller_registers_saved_) {
827 __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
828 frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
833 AssembleArchJump(
i.InputRpo(0));
835 case kArchBinarySearchSwitch:
836 AssembleArchBinarySearchSwitch(instr);
838 case kArchLookupSwitch:
839 AssembleArchLookupSwitch(instr);
841 case kArchTableSwitch:
842 AssembleArchTableSwitch(instr);
845 __ RecordComment(reinterpret_cast<const char*>(
i.InputInt32(0)));
847 case kArchDebugAbort:
848 DCHECK(
i.InputRegister(0) == edx);
849 if (!frame_access_state()->has_frame()) {
852 FrameScope scope(tasm(), StackFrame::NONE);
853 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
854 RelocInfo::CODE_TARGET);
856 __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
857 RelocInfo::CODE_TARGET);
861 case kArchDebugBreak:
865 case kArchThrowTerminator:
868 case kArchDeoptimize: {
870 BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
871 CodeGenResult result =
872 AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
873 if (result != kSuccess)
return result;
877 AssembleReturn(instr->InputAt(0));
879 case kArchStackPointer:
880 __ mov(
i.OutputRegister(), esp);
882 case kArchFramePointer:
883 __ mov(
i.OutputRegister(), ebp);
885 case kArchParentFramePointer:
886 if (frame_access_state()->has_frame()) {
887 __ mov(
i.OutputRegister(), Operand(ebp, 0));
889 __ mov(
i.OutputRegister(), ebp);
892 case kArchTruncateDoubleToI: {
893 auto result =
i.OutputRegister();
894 auto input =
i.InputDoubleRegister(0);
895 auto ool =
new (zone()) OutOfLineTruncateDoubleToI(
896 this, result, input, DetermineStubCallMode());
897 __ cvttsd2si(result, Operand(input));
899 __ j(overflow, ool->entry());
900 __ bind(ool->exit());
903 case kArchStoreWithWriteBarrier: {
904 RecordWriteMode mode =
905 static_cast<RecordWriteMode
>(MiscField::decode(instr->opcode()));
906 Register
object =
i.InputRegister(0);
908 Operand operand =
i.MemoryOperand(&index);
909 Register value =
i.InputRegister(index);
910 Register scratch0 =
i.TempRegister(0);
911 Register scratch1 =
i.TempRegister(1);
912 auto ool =
new (zone())
913 OutOfLineRecordWrite(
this,
object, operand, value, scratch0, scratch1,
914 mode, DetermineStubCallMode());
915 __ mov(operand, value);
916 __ CheckPageFlag(
object, scratch0,
917 MemoryChunk::kPointersFromHereAreInterestingMask,
918 not_zero, ool->entry());
919 __ bind(ool->exit());
922 case kArchStackSlot: {
924 frame_access_state()->GetFrameOffset(
i.InputInt32(0));
925 Register base = offset.from_stack_pointer() ? esp : ebp;
926 __ lea(
i.OutputRegister(), Operand(base, offset.offset()));
929 case kIeee754Float64Acos:
930 ASSEMBLE_IEEE754_UNOP(acos);
932 case kIeee754Float64Acosh:
933 ASSEMBLE_IEEE754_UNOP(acosh);
935 case kIeee754Float64Asin:
936 ASSEMBLE_IEEE754_UNOP(asin);
938 case kIeee754Float64Asinh:
939 ASSEMBLE_IEEE754_UNOP(asinh);
941 case kIeee754Float64Atan:
942 ASSEMBLE_IEEE754_UNOP(atan);
944 case kIeee754Float64Atanh:
945 ASSEMBLE_IEEE754_UNOP(atanh);
947 case kIeee754Float64Atan2:
948 ASSEMBLE_IEEE754_BINOP(atan2);
950 case kIeee754Float64Cbrt:
951 ASSEMBLE_IEEE754_UNOP(cbrt);
953 case kIeee754Float64Cos:
954 ASSEMBLE_IEEE754_UNOP(cos);
956 case kIeee754Float64Cosh:
957 ASSEMBLE_IEEE754_UNOP(cosh);
959 case kIeee754Float64Expm1:
960 ASSEMBLE_IEEE754_UNOP(expm1);
962 case kIeee754Float64Exp:
963 ASSEMBLE_IEEE754_UNOP(exp);
965 case kIeee754Float64Log:
966 ASSEMBLE_IEEE754_UNOP(log);
968 case kIeee754Float64Log1p:
969 ASSEMBLE_IEEE754_UNOP(log1p);
971 case kIeee754Float64Log2:
972 ASSEMBLE_IEEE754_UNOP(log2);
974 case kIeee754Float64Log10:
975 ASSEMBLE_IEEE754_UNOP(log10);
977 case kIeee754Float64Pow: {
979 if (
i.InputDoubleRegister(1) != xmm2) {
980 __ movaps(xmm2,
i.InputDoubleRegister(0));
981 __ movaps(xmm1,
i.InputDoubleRegister(1));
983 __ movaps(xmm0,
i.InputDoubleRegister(0));
984 __ movaps(xmm1, xmm2);
985 __ movaps(xmm2, xmm0);
987 __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
988 __ movaps(
i.OutputDoubleRegister(), xmm3);
991 case kIeee754Float64Sin:
992 ASSEMBLE_IEEE754_UNOP(sin);
994 case kIeee754Float64Sinh:
995 ASSEMBLE_IEEE754_UNOP(sinh);
997 case kIeee754Float64Tan:
998 ASSEMBLE_IEEE754_UNOP(tan);
1000 case kIeee754Float64Tanh:
1001 ASSEMBLE_IEEE754_UNOP(tanh);
1004 ASSEMBLE_BINOP(add);
1007 ASSEMBLE_BINOP(and_);
1010 ASSEMBLE_COMPARE(cmp);
1013 ASSEMBLE_COMPARE(cmpw);
1016 ASSEMBLE_COMPARE(cmpb);
1019 ASSEMBLE_COMPARE(test);
1022 ASSEMBLE_COMPARE(test_w);
1025 ASSEMBLE_COMPARE(test_b);
1028 if (HasImmediateInput(instr, 1)) {
1029 __ imul(
i.OutputRegister(),
i.InputOperand(0),
i.InputInt32(1));
1031 __ imul(
i.OutputRegister(),
i.InputOperand(1));
1035 __ imul(
i.InputRegister(1));
1038 __ mul(
i.InputRegister(1));
1042 __ idiv(
i.InputOperand(1));
1045 __ Move(edx, Immediate(0));
1046 __ div(
i.InputOperand(1));
1049 __ not_(
i.OutputOperand());
1052 __ neg(
i.OutputOperand());
1055 ASSEMBLE_BINOP(or_);
1058 ASSEMBLE_BINOP(xor_);
1061 ASSEMBLE_BINOP(sub);
1064 if (HasImmediateInput(instr, 1)) {
1065 __ shl(
i.OutputOperand(),
i.InputInt5(1));
1067 __ shl_cl(
i.OutputOperand());
1071 if (HasImmediateInput(instr, 1)) {
1072 __ shr(
i.OutputOperand(),
i.InputInt5(1));
1074 __ shr_cl(
i.OutputOperand());
1078 if (HasImmediateInput(instr, 1)) {
1079 __ sar(
i.OutputOperand(),
i.InputInt5(1));
1081 __ sar_cl(
i.OutputOperand());
1084 case kIA32AddPair: {
1089 bool use_temp =
false;
1090 if ((instr->InputAt(1)->IsRegister() &&
1091 i.OutputRegister(0).code() ==
i.InputRegister(1).code()) ||
1092 i.OutputRegister(0).code() ==
i.InputRegister(3).code()) {
1096 __ Move(
i.TempRegister(0),
i.InputRegister(0));
1097 __ add(
i.TempRegister(0),
i.InputRegister(2));
1099 __ add(
i.OutputRegister(0),
i.InputRegister(2));
1101 i.MoveInstructionOperandToRegister(
i.OutputRegister(1),
1103 __ adc(
i.OutputRegister(1), Operand(
i.InputRegister(3)));
1105 __ Move(
i.OutputRegister(0),
i.TempRegister(0));
1109 case kIA32SubPair: {
1114 bool use_temp =
false;
1115 if ((instr->InputAt(1)->IsRegister() &&
1116 i.OutputRegister(0).code() ==
i.InputRegister(1).code()) ||
1117 i.OutputRegister(0).code() ==
i.InputRegister(3).code()) {
1121 __ Move(
i.TempRegister(0),
i.InputRegister(0));
1122 __ sub(
i.TempRegister(0),
i.InputRegister(2));
1124 __ sub(
i.OutputRegister(0),
i.InputRegister(2));
1126 i.MoveInstructionOperandToRegister(
i.OutputRegister(1),
1128 __ sbb(
i.OutputRegister(1), Operand(
i.InputRegister(3)));
1130 __ Move(
i.OutputRegister(0),
i.TempRegister(0));
1134 case kIA32MulPair: {
1135 __ imul(
i.OutputRegister(1),
i.InputOperand(0));
1136 i.MoveInstructionOperandToRegister(
i.TempRegister(0), instr->InputAt(1));
1137 __ imul(
i.TempRegister(0),
i.InputOperand(2));
1138 __ add(
i.OutputRegister(1),
i.TempRegister(0));
1139 __ mov(
i.OutputRegister(0),
i.InputOperand(0));
1141 __ mul(
i.InputRegister(2));
1142 __ add(
i.OutputRegister(1),
i.TempRegister(0));
1147 if (HasImmediateInput(instr, 2)) {
1148 __ ShlPair(
i.InputRegister(1),
i.InputRegister(0),
i.InputInt6(2));
1151 __ ShlPair_cl(
i.InputRegister(1),
i.InputRegister(0));
1155 if (HasImmediateInput(instr, 2)) {
1156 __ ShrPair(
i.InputRegister(1),
i.InputRegister(0),
i.InputInt6(2));
1159 __ ShrPair_cl(
i.InputRegister(1),
i.InputRegister(0));
1163 if (HasImmediateInput(instr, 2)) {
1164 __ SarPair(
i.InputRegister(1),
i.InputRegister(0),
i.InputInt6(2));
1167 __ SarPair_cl(
i.InputRegister(1),
i.InputRegister(0));
1171 if (HasImmediateInput(instr, 1)) {
1172 __ ror(
i.OutputOperand(),
i.InputInt5(1));
1174 __ ror_cl(
i.OutputOperand());
1178 __ Lzcnt(
i.OutputRegister(),
i.InputOperand(0));
1181 __ Tzcnt(
i.OutputRegister(),
i.InputOperand(0));
1184 __ Popcnt(
i.OutputRegister(),
i.InputOperand(0));
1187 __ bswap(
i.OutputRegister());
1189 case kArchWordPoisonOnSpeculation:
1196 case kSSEFloat32Cmp:
1197 __ ucomiss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1199 case kSSEFloat32Add:
1200 __ addss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1202 case kSSEFloat32Sub:
1203 __ subss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1205 case kSSEFloat32Mul:
1206 __ mulss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1208 case kSSEFloat32Div:
1209 __ divss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1212 __ movaps(
i.OutputDoubleRegister(),
i.OutputDoubleRegister());
1214 case kSSEFloat32Sqrt:
1215 __ sqrtss(
i.OutputDoubleRegister(),
i.InputOperand(0));
1217 case kSSEFloat32Abs: {
1219 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1220 __ psrlq(kScratchDoubleReg, 33);
1221 __ andps(
i.OutputDoubleRegister(), kScratchDoubleReg);
1224 case kSSEFloat32Neg: {
1226 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1227 __ psllq(kScratchDoubleReg, 31);
1228 __ xorps(
i.OutputDoubleRegister(), kScratchDoubleReg);
1231 case kSSEFloat32Round: {
1232 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1233 RoundingMode
const mode =
1234 static_cast<RoundingMode
>(MiscField::decode(instr->opcode()));
1235 __ roundss(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0), mode);
1238 case kSSEFloat64Cmp:
1239 __ ucomisd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1241 case kSSEFloat64Add:
1242 __ addsd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1244 case kSSEFloat64Sub:
1245 __ subsd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1247 case kSSEFloat64Mul:
1248 __ mulsd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1250 case kSSEFloat64Div:
1251 __ divsd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1254 __ movaps(
i.OutputDoubleRegister(),
i.OutputDoubleRegister());
1256 case kSSEFloat32Max: {
1257 Label compare_nan, compare_swap, done_compare;
1258 if (instr->InputAt(1)->IsFPRegister()) {
1259 __ ucomiss(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1261 __ ucomiss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1264 new (zone()) OutOfLineLoadFloat32NaN(
this,
i.OutputDoubleRegister());
1265 __ j(parity_even, ool->entry());
1266 __ j(above, &done_compare, Label::kNear);
1267 __ j(below, &compare_swap, Label::kNear);
1268 __ movmskps(
i.TempRegister(0),
i.InputDoubleRegister(0));
1269 __ test(
i.TempRegister(0), Immediate(1));
1270 __ j(zero, &done_compare, Label::kNear);
1271 __ bind(&compare_swap);
1272 if (instr->InputAt(1)->IsFPRegister()) {
1273 __ movss(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1275 __ movss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1277 __ bind(&done_compare);
1278 __ bind(ool->exit());
1282 case kSSEFloat64Max: {
1283 Label compare_nan, compare_swap, done_compare;
1284 if (instr->InputAt(1)->IsFPRegister()) {
1285 __ ucomisd(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1287 __ ucomisd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1290 new (zone()) OutOfLineLoadFloat64NaN(
this,
i.OutputDoubleRegister());
1291 __ j(parity_even, ool->entry());
1292 __ j(above, &done_compare, Label::kNear);
1293 __ j(below, &compare_swap, Label::kNear);
1294 __ movmskpd(
i.TempRegister(0),
i.InputDoubleRegister(0));
1295 __ test(
i.TempRegister(0), Immediate(1));
1296 __ j(zero, &done_compare, Label::kNear);
1297 __ bind(&compare_swap);
1298 if (instr->InputAt(1)->IsFPRegister()) {
1299 __ movsd(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1301 __ movsd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1303 __ bind(&done_compare);
1304 __ bind(ool->exit());
1307 case kSSEFloat32Min: {
1308 Label compare_swap, done_compare;
1309 if (instr->InputAt(1)->IsFPRegister()) {
1310 __ ucomiss(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1312 __ ucomiss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1315 new (zone()) OutOfLineLoadFloat32NaN(
this,
i.OutputDoubleRegister());
1316 __ j(parity_even, ool->entry());
1317 __ j(below, &done_compare, Label::kNear);
1318 __ j(above, &compare_swap, Label::kNear);
1319 if (instr->InputAt(1)->IsFPRegister()) {
1320 __ movmskps(
i.TempRegister(0),
i.InputDoubleRegister(1));
1322 __ movss(kScratchDoubleReg,
i.InputOperand(1));
1323 __ movmskps(
i.TempRegister(0), kScratchDoubleReg);
1325 __ test(
i.TempRegister(0), Immediate(1));
1326 __ j(zero, &done_compare, Label::kNear);
1327 __ bind(&compare_swap);
1328 if (instr->InputAt(1)->IsFPRegister()) {
1329 __ movss(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1331 __ movss(
i.InputDoubleRegister(0),
i.InputOperand(1));
1333 __ bind(&done_compare);
1334 __ bind(ool->exit());
1337 case kSSEFloat64Min: {
1338 Label compare_swap, done_compare;
1339 if (instr->InputAt(1)->IsFPRegister()) {
1340 __ ucomisd(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1342 __ ucomisd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1345 new (zone()) OutOfLineLoadFloat64NaN(
this,
i.OutputDoubleRegister());
1346 __ j(parity_even, ool->entry());
1347 __ j(below, &done_compare, Label::kNear);
1348 __ j(above, &compare_swap, Label::kNear);
1349 if (instr->InputAt(1)->IsFPRegister()) {
1350 __ movmskpd(
i.TempRegister(0),
i.InputDoubleRegister(1));
1352 __ movsd(kScratchDoubleReg,
i.InputOperand(1));
1353 __ movmskpd(
i.TempRegister(0), kScratchDoubleReg);
1355 __ test(
i.TempRegister(0), Immediate(1));
1356 __ j(zero, &done_compare, Label::kNear);
1357 __ bind(&compare_swap);
1358 if (instr->InputAt(1)->IsFPRegister()) {
1359 __ movsd(
i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
1361 __ movsd(
i.InputDoubleRegister(0),
i.InputOperand(1));
1363 __ bind(&done_compare);
1364 __ bind(ool->exit());
1367 case kSSEFloat64Mod: {
1368 Register tmp =
i.TempRegister(1);
1370 __ sub(esp, Immediate(kDoubleSize));
1373 __ movsd(Operand(esp, 0),
i.InputDoubleRegister(1));
1374 __ fld_d(Operand(esp, 0));
1375 __ movsd(Operand(esp, 0),
i.InputDoubleRegister(0));
1376 __ fld_d(Operand(esp, 0));
1384 DCHECK_EQ(eax,
i.TempRegister(0));
1387 __ j(parity_even, &mod_loop);
1390 __ fstp_d(Operand(esp, 0));
1391 __ movsd(
i.OutputDoubleRegister(), Operand(esp, 0));
1395 case kSSEFloat64Abs: {
1397 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1398 __ psrlq(kScratchDoubleReg, 1);
1399 __ andpd(
i.OutputDoubleRegister(), kScratchDoubleReg);
1402 case kSSEFloat64Neg: {
1404 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1405 __ psllq(kScratchDoubleReg, 63);
1406 __ xorpd(
i.OutputDoubleRegister(), kScratchDoubleReg);
1409 case kSSEFloat64Sqrt:
1410 __ sqrtsd(
i.OutputDoubleRegister(),
i.InputOperand(0));
1412 case kSSEFloat64Round: {
1413 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1414 RoundingMode
const mode =
1415 static_cast<RoundingMode
>(MiscField::decode(instr->opcode()));
1416 __ roundsd(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0), mode);
1419 case kSSEFloat32ToFloat64:
1420 __ cvtss2sd(
i.OutputDoubleRegister(),
i.InputOperand(0));
1422 case kSSEFloat64ToFloat32:
1423 __ cvtsd2ss(
i.OutputDoubleRegister(),
i.InputOperand(0));
1425 case kSSEFloat32ToInt32:
1426 __ cvttss2si(
i.OutputRegister(),
i.InputOperand(0));
1428 case kSSEFloat32ToUint32:
1429 __ Cvttss2ui(
i.OutputRegister(),
i.InputOperand(0), kScratchDoubleReg);
1431 case kSSEFloat64ToInt32:
1432 __ cvttsd2si(
i.OutputRegister(),
i.InputOperand(0));
1434 case kSSEFloat64ToUint32:
1435 __ Cvttsd2ui(
i.OutputRegister(),
i.InputOperand(0), kScratchDoubleReg);
1437 case kSSEInt32ToFloat32:
1438 __ cvtsi2ss(
i.OutputDoubleRegister(),
i.InputOperand(0));
1440 case kSSEUint32ToFloat32:
1441 __ Cvtui2ss(
i.OutputDoubleRegister(),
i.InputOperand(0),
1444 case kSSEInt32ToFloat64:
1445 __ cvtsi2sd(
i.OutputDoubleRegister(),
i.InputOperand(0));
1447 case kSSEUint32ToFloat64:
1448 __ Cvtui2sd(
i.OutputDoubleRegister(),
i.InputOperand(0),
1451 case kSSEFloat64ExtractLowWord32:
1452 if (instr->InputAt(0)->IsFPStackSlot()) {
1453 __ mov(
i.OutputRegister(),
i.InputOperand(0));
1455 __ movd(
i.OutputRegister(),
i.InputDoubleRegister(0));
1458 case kSSEFloat64ExtractHighWord32:
1459 if (instr->InputAt(0)->IsFPStackSlot()) {
1460 __ mov(
i.OutputRegister(),
i.InputOperand(0, kDoubleSize / 2));
1462 __ Pextrd(
i.OutputRegister(),
i.InputDoubleRegister(0), 1);
1465 case kSSEFloat64InsertLowWord32:
1466 __ Pinsrd(
i.OutputDoubleRegister(),
i.InputOperand(1), 0);
1468 case kSSEFloat64InsertHighWord32:
1469 __ Pinsrd(
i.OutputDoubleRegister(),
i.InputOperand(1), 1);
1471 case kSSEFloat64LoadLowWord32:
1472 __ movd(
i.OutputDoubleRegister(),
i.InputOperand(0));
1474 case kAVXFloat32Add: {
1475 CpuFeatureScope avx_scope(tasm(), AVX);
1476 __ vaddss(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1480 case kAVXFloat32Sub: {
1481 CpuFeatureScope avx_scope(tasm(), AVX);
1482 __ vsubss(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1486 case kAVXFloat32Mul: {
1487 CpuFeatureScope avx_scope(tasm(), AVX);
1488 __ vmulss(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1492 case kAVXFloat32Div: {
1493 CpuFeatureScope avx_scope(tasm(), AVX);
1494 __ vdivss(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1498 __ movaps(
i.OutputDoubleRegister(),
i.OutputDoubleRegister());
1501 case kAVXFloat64Add: {
1502 CpuFeatureScope avx_scope(tasm(), AVX);
1503 __ vaddsd(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1507 case kAVXFloat64Sub: {
1508 CpuFeatureScope avx_scope(tasm(), AVX);
1509 __ vsubsd(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1513 case kAVXFloat64Mul: {
1514 CpuFeatureScope avx_scope(tasm(), AVX);
1515 __ vmulsd(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1519 case kAVXFloat64Div: {
1520 CpuFeatureScope avx_scope(tasm(), AVX);
1521 __ vdivsd(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0),
1525 __ movaps(
i.OutputDoubleRegister(),
i.OutputDoubleRegister());
1528 case kAVXFloat32Abs: {
1530 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1531 __ psrlq(kScratchDoubleReg, 33);
1532 CpuFeatureScope avx_scope(tasm(), AVX);
1533 __ vandps(
i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
1536 case kAVXFloat32Neg: {
1538 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1539 __ psllq(kScratchDoubleReg, 31);
1540 CpuFeatureScope avx_scope(tasm(), AVX);
1541 __ vxorps(
i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
1544 case kAVXFloat64Abs: {
1546 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1547 __ psrlq(kScratchDoubleReg, 1);
1548 CpuFeatureScope avx_scope(tasm(), AVX);
1549 __ vandpd(
i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
1552 case kAVXFloat64Neg: {
1554 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1555 __ psllq(kScratchDoubleReg, 63);
1556 CpuFeatureScope avx_scope(tasm(), AVX);
1557 __ vxorpd(
i.OutputDoubleRegister(), kScratchDoubleReg,
i.InputOperand(0));
1560 case kSSEFloat64SilenceNaN:
1561 __ xorpd(kScratchDoubleReg, kScratchDoubleReg);
1562 __ subsd(
i.InputDoubleRegister(0), kScratchDoubleReg);
1565 ASSEMBLE_MOVX(movsx_b);
1568 ASSEMBLE_MOVX(movzx_b);
1572 Operand operand =
i.MemoryOperand(&index);
1573 if (HasImmediateInput(instr, index)) {
1574 __ mov_b(operand,
i.InputInt8(index));
1576 __ mov_b(operand,
i.InputRegister(index));
1581 ASSEMBLE_MOVX(movsx_w);
1584 ASSEMBLE_MOVX(movzx_w);
1588 Operand operand =
i.MemoryOperand(&index);
1589 if (HasImmediateInput(instr, index)) {
1590 __ mov_w(operand,
i.InputInt16(index));
1592 __ mov_w(operand,
i.InputRegister(index));
1597 if (instr->HasOutput()) {
1598 __ mov(
i.OutputRegister(),
i.MemoryOperand());
1601 Operand operand =
i.MemoryOperand(&index);
1602 if (HasImmediateInput(instr, index)) {
1603 __ mov(operand,
i.InputImmediate(index));
1605 __ mov(operand,
i.InputRegister(index));
1610 if (instr->HasOutput()) {
1611 __ movsd(
i.OutputDoubleRegister(),
i.MemoryOperand());
1614 Operand operand =
i.MemoryOperand(&index);
1615 __ movsd(operand,
i.InputDoubleRegister(index));
1619 if (instr->HasOutput()) {
1620 __ movss(
i.OutputDoubleRegister(),
i.MemoryOperand());
1623 Operand operand =
i.MemoryOperand(&index);
1624 __ movss(operand,
i.InputDoubleRegister(index));
1628 if (instr->HasOutput()) {
1629 __ Movdqu(
i.OutputSimd128Register(),
i.MemoryOperand());
1632 Operand operand =
i.MemoryOperand(&index);
1633 __ Movdqu(operand,
i.InputSimd128Register(index));
1636 case kIA32BitcastFI:
1637 if (instr->InputAt(0)->IsFPStackSlot()) {
1638 __ mov(
i.OutputRegister(),
i.InputOperand(0));
1640 __ movd(
i.OutputRegister(),
i.InputDoubleRegister(0));
1643 case kIA32BitcastIF:
1644 if (instr->InputAt(0)->IsRegister()) {
1645 __ movd(
i.OutputDoubleRegister(),
i.InputRegister(0));
1647 __ movss(
i.OutputDoubleRegister(),
i.InputOperand(0));
1651 AddressingMode mode = AddressingModeField::decode(instr->opcode());
1655 if (mode == kMode_MI) {
1656 __ Move(
i.OutputRegister(), Immediate(
i.InputInt32(0)));
1657 }
else if (
i.InputRegister(0) ==
i.OutputRegister()) {
1658 if (mode == kMode_MRI) {
1659 int32_t constant_summand =
i.InputInt32(1);
1660 if (constant_summand > 0) {
1661 __ add(
i.OutputRegister(), Immediate(constant_summand));
1662 }
else if (constant_summand < 0) {
1663 __ sub(
i.OutputRegister(), Immediate(-constant_summand));
1665 }
else if (mode == kMode_MR1) {
1666 if (
i.InputRegister(1) ==
i.OutputRegister()) {
1667 __ shl(
i.OutputRegister(), 1);
1669 __ add(
i.OutputRegister(),
i.InputRegister(1));
1671 }
else if (mode == kMode_M2) {
1672 __ shl(
i.OutputRegister(), 1);
1673 }
else if (mode == kMode_M4) {
1674 __ shl(
i.OutputRegister(), 2);
1675 }
else if (mode == kMode_M8) {
1676 __ shl(
i.OutputRegister(), 3);
1678 __ lea(
i.OutputRegister(),
i.MemoryOperand());
1680 }
else if (mode == kMode_MR1 &&
1681 i.InputRegister(1) ==
i.OutputRegister()) {
1682 __ add(
i.OutputRegister(),
i.InputRegister(0));
1684 __ lea(
i.OutputRegister(),
i.MemoryOperand());
1688 case kIA32PushFloat32:
1689 if (instr->InputAt(0)->IsFPRegister()) {
1690 __ sub(esp, Immediate(kFloatSize));
1691 __ movss(Operand(esp, 0),
i.InputDoubleRegister(0));
1692 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1693 }
else if (HasImmediateInput(instr, 0)) {
1694 __ Move(kScratchDoubleReg,
i.InputFloat32(0));
1695 __ sub(esp, Immediate(kFloatSize));
1696 __ movss(Operand(esp, 0), kScratchDoubleReg);
1697 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1699 __ movss(kScratchDoubleReg,
i.InputOperand(0));
1700 __ sub(esp, Immediate(kFloatSize));
1701 __ movss(Operand(esp, 0), kScratchDoubleReg);
1702 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1705 case kIA32PushFloat64:
1706 if (instr->InputAt(0)->IsFPRegister()) {
1707 __ sub(esp, Immediate(kDoubleSize));
1708 __ movsd(Operand(esp, 0),
i.InputDoubleRegister(0));
1709 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1710 }
else if (HasImmediateInput(instr, 0)) {
1711 __ Move(kScratchDoubleReg,
i.InputDouble(0));
1712 __ sub(esp, Immediate(kDoubleSize));
1713 __ movsd(Operand(esp, 0), kScratchDoubleReg);
1714 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1716 __ movsd(kScratchDoubleReg,
i.InputOperand(0));
1717 __ sub(esp, Immediate(kDoubleSize));
1718 __ movsd(Operand(esp, 0), kScratchDoubleReg);
1719 frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1722 case kIA32PushSimd128:
1723 if (instr->InputAt(0)->IsFPRegister()) {
1724 __ sub(esp, Immediate(kSimd128Size));
1725 __ movups(Operand(esp, 0),
i.InputSimd128Register(0));
1727 __ movups(kScratchDoubleReg,
i.InputOperand(0));
1728 __ sub(esp, Immediate(kSimd128Size));
1729 __ movups(Operand(esp, 0), kScratchDoubleReg);
1731 frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1734 if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
1736 Operand operand =
i.MemoryOperand(&index);
1738 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1739 }
else if (instr->InputAt(0)->IsFPRegister()) {
1740 __ sub(esp, Immediate(kFloatSize));
1741 __ movsd(Operand(esp, 0),
i.InputDoubleRegister(0));
1742 frame_access_state()->IncreaseSPDelta(kFloatSize / kPointerSize);
1743 }
else if (HasImmediateInput(instr, 0)) {
1744 __ push(
i.InputImmediate(0));
1745 frame_access_state()->IncreaseSPDelta(1);
1747 __ push(
i.InputOperand(0));
1748 frame_access_state()->IncreaseSPDelta(1);
1752 int slot = MiscField::decode(instr->opcode());
1753 if (HasImmediateInput(instr, 0)) {
1754 __ mov(Operand(esp, slot * kPointerSize),
i.InputImmediate(0));
1756 __ mov(Operand(esp, slot * kPointerSize),
i.InputRegister(0));
1761 int reverse_slot =
i.InputInt32(0) + 1;
1763 FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1764 if (instr->OutputAt(0)->IsFPRegister()) {
1765 LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1766 if (op->representation() == MachineRepresentation::kFloat64) {
1767 __ movsd(
i.OutputDoubleRegister(), Operand(ebp, offset));
1769 DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1770 __ movss(
i.OutputFloatRegister(), Operand(ebp, offset));
1773 __ mov(
i.OutputRegister(), Operand(ebp, offset));
1777 case kSSEF32x4Splat: {
1778 DCHECK_EQ(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1779 XMMRegister dst =
i.OutputSimd128Register();
1780 __ shufps(dst, dst, 0x0);
1783 case kAVXF32x4Splat: {
1784 CpuFeatureScope avx_scope(tasm(), AVX);
1785 XMMRegister src =
i.InputFloatRegister(0);
1786 __ vshufps(
i.OutputSimd128Register(), src, src, 0x0);
1789 case kSSEF32x4ExtractLane: {
1790 DCHECK_EQ(
i.OutputDoubleRegister(),
i.InputDoubleRegister(0));
1791 XMMRegister dst =
i.OutputFloatRegister();
1792 int8_t lane =
i.InputInt8(1);
1795 __ shufps(dst, dst, lane);
1799 case kAVXF32x4ExtractLane: {
1800 CpuFeatureScope avx_scope(tasm(), AVX);
1801 XMMRegister dst =
i.OutputFloatRegister();
1802 XMMRegister src =
i.InputSimd128Register(0);
1803 int8_t lane =
i.InputInt8(1);
1805 if (dst != src) __ vmovaps(dst, src);
1808 __ vshufps(dst, src, src, lane);
1812 case kSSEF32x4ReplaceLane: {
1813 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1814 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1815 __ insertps(
i.OutputSimd128Register(),
i.InputOperand(2),
1816 i.InputInt8(1) << 4);
1819 case kAVXF32x4ReplaceLane: {
1820 CpuFeatureScope avx_scope(tasm(), AVX);
1821 __ vinsertps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1822 i.InputOperand(2),
i.InputInt8(1) << 4);
1825 case kIA32F32x4SConvertI32x4: {
1826 __ Cvtdq2ps(
i.OutputSimd128Register(),
i.InputOperand(0));
1829 case kSSEF32x4UConvertI32x4: {
1830 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1831 CpuFeatureScope sse_scope(tasm(), SSE4_1);
1832 XMMRegister dst =
i.OutputSimd128Register();
1833 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
1834 __ pblendw(kScratchDoubleReg, dst, 0x55);
1835 __ psubd(dst, kScratchDoubleReg);
1836 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);
1838 __ cvtdq2ps(dst, dst);
1840 __ addps(dst, kScratchDoubleReg);
1843 case kAVXF32x4UConvertI32x4: {
1844 CpuFeatureScope avx_scope(tasm(), AVX);
1845 XMMRegister dst =
i.OutputSimd128Register();
1846 XMMRegister src =
i.InputSimd128Register(0);
1847 __ vpxor(kScratchDoubleReg, kScratchDoubleReg,
1849 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg, src,
1851 __ vpsubd(dst, src, kScratchDoubleReg);
1852 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);
1853 __ vpsrld(dst, dst, 1);
1854 __ vcvtdq2ps(dst, dst);
1855 __ vaddps(dst, dst, dst);
1856 __ vaddps(dst, dst, kScratchDoubleReg);
1859 case kSSEF32x4Abs: {
1860 XMMRegister dst =
i.OutputSimd128Register();
1861 Operand src =
i.InputOperand(0);
1862 if (src.is_reg(dst)) {
1863 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1864 __ psrld(kScratchDoubleReg, 1);
1865 __ andps(dst, kScratchDoubleReg);
1867 __ pcmpeqd(dst, dst);
1873 case kAVXF32x4Abs: {
1874 CpuFeatureScope avx_scope(tasm(), AVX);
1875 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1876 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
1877 __ vandps(
i.OutputSimd128Register(), kScratchDoubleReg,
1881 case kSSEF32x4Neg: {
1882 XMMRegister dst =
i.OutputSimd128Register();
1883 Operand src =
i.InputOperand(0);
1884 if (src.is_reg(dst)) {
1885 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
1886 __ pslld(kScratchDoubleReg, 31);
1887 __ xorps(dst, kScratchDoubleReg);
1889 __ pcmpeqd(dst, dst);
1895 case kAVXF32x4Neg: {
1896 CpuFeatureScope avx_scope(tasm(), AVX);
1897 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
1898 __ vpslld(kScratchDoubleReg, kScratchDoubleReg, 31);
1899 __ vxorps(
i.OutputSimd128Register(), kScratchDoubleReg,
1903 case kIA32F32x4RecipApprox: {
1904 __ Rcpps(
i.OutputSimd128Register(),
i.InputOperand(0));
1907 case kIA32F32x4RecipSqrtApprox: {
1908 __ Rsqrtps(
i.OutputSimd128Register(),
i.InputOperand(0));
1911 case kSSEF32x4Add: {
1912 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1913 __ addps(
i.OutputSimd128Register(),
i.InputOperand(1));
1916 case kAVXF32x4Add: {
1917 CpuFeatureScope avx_scope(tasm(), AVX);
1918 __ vaddps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1922 case kSSEF32x4AddHoriz: {
1923 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1924 CpuFeatureScope sse_scope(tasm(), SSE3);
1925 __ haddps(
i.OutputSimd128Register(),
i.InputOperand(1));
1928 case kAVXF32x4AddHoriz: {
1929 CpuFeatureScope avx_scope(tasm(), AVX);
1930 __ vhaddps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1934 case kSSEF32x4Sub: {
1935 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1936 __ subps(
i.OutputSimd128Register(),
i.InputOperand(1));
1939 case kAVXF32x4Sub: {
1940 CpuFeatureScope avx_scope(tasm(), AVX);
1941 __ vsubps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1945 case kSSEF32x4Mul: {
1946 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1947 __ mulps(
i.OutputSimd128Register(),
i.InputOperand(1));
1950 case kAVXF32x4Mul: {
1951 CpuFeatureScope avx_scope(tasm(), AVX);
1952 __ vmulps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1956 case kSSEF32x4Min: {
1957 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1958 __ minps(
i.OutputSimd128Register(),
i.InputOperand(1));
1961 case kAVXF32x4Min: {
1962 CpuFeatureScope avx_scope(tasm(), AVX);
1963 __ vminps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1967 case kSSEF32x4Max: {
1968 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1969 __ maxps(
i.OutputSimd128Register(),
i.InputOperand(1));
1972 case kAVXF32x4Max: {
1973 CpuFeatureScope avx_scope(tasm(), AVX);
1974 __ vmaxps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1979 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1980 __ cmpeqps(
i.OutputSimd128Register(),
i.InputOperand(1));
1984 CpuFeatureScope avx_scope(tasm(), AVX);
1985 __ vcmpeqps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
1990 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
1991 __ cmpneqps(
i.OutputSimd128Register(),
i.InputOperand(1));
1995 CpuFeatureScope avx_scope(tasm(), AVX);
1996 __ vcmpneqps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2001 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2002 __ cmpltps(
i.OutputSimd128Register(),
i.InputOperand(1));
2006 CpuFeatureScope avx_scope(tasm(), AVX);
2007 __ vcmpltps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2012 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2013 __ cmpleps(
i.OutputSimd128Register(),
i.InputOperand(1));
2017 CpuFeatureScope avx_scope(tasm(), AVX);
2018 __ vcmpleps(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2022 case kIA32I32x4Splat: {
2023 XMMRegister dst =
i.OutputSimd128Register();
2024 __ Movd(dst,
i.InputOperand(0));
2025 __ Pshufd(dst, dst, 0x0);
2028 case kIA32I32x4ExtractLane: {
2029 __ Pextrd(
i.OutputRegister(),
i.InputSimd128Register(0),
i.InputInt8(1));
2032 case kSSEI32x4ReplaceLane: {
2033 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2034 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2035 __ pinsrd(
i.OutputSimd128Register(),
i.InputOperand(2),
i.InputInt8(1));
2038 case kAVXI32x4ReplaceLane: {
2039 CpuFeatureScope avx_scope(tasm(), AVX);
2040 __ vpinsrd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2041 i.InputOperand(2),
i.InputInt8(1));
2044 case kSSEI32x4SConvertF32x4: {
2045 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2046 XMMRegister dst =
i.OutputSimd128Register();
2048 __ movaps(kScratchDoubleReg, dst);
2049 __ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
2050 __ pand(dst, kScratchDoubleReg);
2052 __ pxor(kScratchDoubleReg, dst);
2054 __ cvttps2dq(dst, dst);
2056 __ pand(kScratchDoubleReg, dst);
2057 __ psrad(kScratchDoubleReg, 31);
2059 __ pxor(dst, kScratchDoubleReg);
2062 case kAVXI32x4SConvertF32x4: {
2063 CpuFeatureScope avx_scope(tasm(), AVX);
2064 XMMRegister dst =
i.OutputSimd128Register();
2065 XMMRegister src =
i.InputSimd128Register(0);
2067 __ vcmpeqps(kScratchDoubleReg, src, src);
2068 __ vpand(dst, src, kScratchDoubleReg);
2070 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
2072 __ vcvttps2dq(dst, dst);
2074 __ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
2075 __ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
2077 __ vpxor(dst, dst, kScratchDoubleReg);
2080 case kIA32I32x4SConvertI16x8Low: {
2081 __ Pmovsxwd(
i.OutputSimd128Register(),
i.InputOperand(0));
2084 case kIA32I32x4SConvertI16x8High: {
2085 XMMRegister dst =
i.OutputSimd128Register();
2086 __ Palignr(dst,
i.InputOperand(0), 8);
2087 __ Pmovsxwd(dst, dst);
2090 case kIA32I32x4Neg: {
2091 XMMRegister dst =
i.OutputSimd128Register();
2092 Operand src =
i.InputOperand(0);
2093 if (src.is_reg(dst)) {
2094 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2095 __ Psignd(dst, kScratchDoubleReg);
2102 case kSSEI32x4Shl: {
2103 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2104 __ pslld(
i.OutputSimd128Register(),
i.InputInt8(1));
2107 case kAVXI32x4Shl: {
2108 CpuFeatureScope avx_scope(tasm(), AVX);
2109 __ vpslld(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2113 case kSSEI32x4ShrS: {
2114 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2115 __ psrad(
i.OutputSimd128Register(),
i.InputInt8(1));
2118 case kAVXI32x4ShrS: {
2119 CpuFeatureScope avx_scope(tasm(), AVX);
2120 __ vpsrad(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2124 case kSSEI32x4Add: {
2125 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2126 __ paddd(
i.OutputSimd128Register(),
i.InputOperand(1));
2129 case kAVXI32x4Add: {
2130 CpuFeatureScope avx_scope(tasm(), AVX);
2131 __ vpaddd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2135 case kSSEI32x4AddHoriz: {
2136 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2137 CpuFeatureScope sse_scope(tasm(), SSSE3);
2138 __ phaddd(
i.OutputSimd128Register(),
i.InputOperand(1));
2141 case kAVXI32x4AddHoriz: {
2142 CpuFeatureScope avx_scope(tasm(), AVX);
2143 __ vphaddd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2147 case kSSEI32x4Sub: {
2148 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2149 __ psubd(
i.OutputSimd128Register(),
i.InputOperand(1));
2152 case kAVXI32x4Sub: {
2153 CpuFeatureScope avx_scope(tasm(), AVX);
2154 __ vpsubd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2158 case kSSEI32x4Mul: {
2159 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2160 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2161 __ pmulld(
i.OutputSimd128Register(),
i.InputOperand(1));
2164 case kAVXI32x4Mul: {
2165 CpuFeatureScope avx_scope(tasm(), AVX);
2166 __ vpmulld(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2170 case kSSEI32x4MinS: {
2171 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2172 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2173 __ pminsd(
i.OutputSimd128Register(),
i.InputOperand(1));
2176 case kAVXI32x4MinS: {
2177 CpuFeatureScope avx_scope(tasm(), AVX);
2178 __ vpminsd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2182 case kSSEI32x4MaxS: {
2183 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2184 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2185 __ pmaxsd(
i.OutputSimd128Register(),
i.InputOperand(1));
2188 case kAVXI32x4MaxS: {
2189 CpuFeatureScope avx_scope(tasm(), AVX);
2190 __ vpmaxsd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2195 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2196 __ pcmpeqd(
i.OutputSimd128Register(),
i.InputOperand(1));
2200 CpuFeatureScope avx_scope(tasm(), AVX);
2201 __ vpcmpeqd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2206 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2207 __ pcmpeqd(
i.OutputSimd128Register(),
i.InputOperand(1));
2208 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2209 __ pxor(
i.OutputSimd128Register(), kScratchDoubleReg);
2213 CpuFeatureScope avx_scope(tasm(), AVX);
2214 __ vpcmpeqd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2216 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2217 __ vpxor(
i.OutputSimd128Register(),
i.OutputSimd128Register(),
2221 case kSSEI32x4GtS: {
2222 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2223 __ pcmpgtd(
i.OutputSimd128Register(),
i.InputOperand(1));
2226 case kAVXI32x4GtS: {
2227 CpuFeatureScope avx_scope(tasm(), AVX);
2228 __ vpcmpgtd(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2232 case kSSEI32x4GeS: {
2233 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2234 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2235 XMMRegister dst =
i.OutputSimd128Register();
2236 Operand src =
i.InputOperand(1);
2237 __ pminsd(dst, src);
2238 __ pcmpeqd(dst, src);
2241 case kAVXI32x4GeS: {
2242 CpuFeatureScope avx_scope(tasm(), AVX);
2243 XMMRegister src1 =
i.InputSimd128Register(0);
2244 Operand src2 =
i.InputOperand(1);
2245 __ vpminsd(kScratchDoubleReg, src1, src2);
2246 __ vpcmpeqd(
i.OutputSimd128Register(), kScratchDoubleReg, src2);
2249 case kSSEI32x4UConvertF32x4: {
2250 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2251 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2252 XMMRegister dst =
i.OutputSimd128Register();
2253 XMMRegister tmp =
i.ToSimd128Register(instr->TempAt(0));
2255 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2256 __ maxps(dst, kScratchDoubleReg);
2258 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2259 __ psrld(kScratchDoubleReg, 1);
2260 __ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);
2264 __ movaps(tmp, dst);
2265 __ subps(tmp, kScratchDoubleReg);
2266 __ cmpleps(kScratchDoubleReg, tmp);
2267 __ cvttps2dq(tmp, tmp);
2268 __ pxor(tmp, kScratchDoubleReg);
2269 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
2270 __ pmaxsd(tmp, kScratchDoubleReg);
2272 __ cvttps2dq(dst, dst);
2277 case kAVXI32x4UConvertF32x4: {
2278 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2279 CpuFeatureScope avx_scope(tasm(), AVX);
2280 XMMRegister dst =
i.OutputSimd128Register();
2281 XMMRegister tmp =
i.ToSimd128Register(instr->TempAt(0));
2283 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2284 __ vmaxps(dst, dst, kScratchDoubleReg);
2286 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2287 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
2288 __ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg);
2292 __ vsubps(tmp, dst, kScratchDoubleReg);
2293 __ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
2294 __ vcvttps2dq(tmp, tmp);
2295 __ vpxor(tmp, tmp, kScratchDoubleReg);
2296 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2297 __ vpmaxsd(tmp, tmp, kScratchDoubleReg);
2299 __ vcvttps2dq(dst, dst);
2301 __ vpaddd(dst, dst, tmp);
2304 case kIA32I32x4UConvertI16x8Low: {
2305 __ Pmovzxwd(
i.OutputSimd128Register(),
i.InputOperand(0));
2308 case kIA32I32x4UConvertI16x8High: {
2309 XMMRegister dst =
i.OutputSimd128Register();
2310 __ Palignr(dst,
i.InputOperand(0), 8);
2311 __ Pmovzxwd(dst, dst);
2314 case kSSEI32x4ShrU: {
2315 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2316 __ psrld(
i.OutputSimd128Register(),
i.InputInt8(1));
2319 case kAVXI32x4ShrU: {
2320 CpuFeatureScope avx_scope(tasm(), AVX);
2321 __ vpsrld(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2325 case kSSEI32x4MinU: {
2326 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2327 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2328 __ pminud(
i.OutputSimd128Register(),
i.InputOperand(1));
2331 case kAVXI32x4MinU: {
2332 CpuFeatureScope avx_scope(tasm(), AVX);
2333 __ vpminud(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2337 case kSSEI32x4MaxU: {
2338 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2339 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2340 __ pmaxud(
i.OutputSimd128Register(),
i.InputOperand(1));
2343 case kAVXI32x4MaxU: {
2344 CpuFeatureScope avx_scope(tasm(), AVX);
2345 __ vpmaxud(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2349 case kSSEI32x4GtU: {
2350 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2351 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2352 XMMRegister dst =
i.OutputSimd128Register();
2353 Operand src =
i.InputOperand(1);
2354 __ pmaxud(dst, src);
2355 __ pcmpeqd(dst, src);
2356 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2357 __ pxor(dst, kScratchDoubleReg);
2360 case kAVXI32x4GtU: {
2361 CpuFeatureScope avx_scope(tasm(), AVX);
2362 XMMRegister dst =
i.OutputSimd128Register();
2363 XMMRegister src1 =
i.InputSimd128Register(0);
2364 Operand src2 =
i.InputOperand(1);
2365 __ vpmaxud(kScratchDoubleReg, src1, src2);
2366 __ vpcmpeqd(dst, kScratchDoubleReg, src2);
2367 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2368 __ vpxor(dst, dst, kScratchDoubleReg);
2371 case kSSEI32x4GeU: {
2372 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2373 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2374 XMMRegister dst =
i.OutputSimd128Register();
2375 Operand src =
i.InputOperand(1);
2376 __ pminud(dst, src);
2377 __ pcmpeqd(dst, src);
2380 case kAVXI32x4GeU: {
2381 CpuFeatureScope avx_scope(tasm(), AVX);
2382 XMMRegister src1 =
i.InputSimd128Register(0);
2383 Operand src2 =
i.InputOperand(1);
2384 __ vpminud(kScratchDoubleReg, src1, src2);
2385 __ vpcmpeqd(
i.OutputSimd128Register(), kScratchDoubleReg, src2);
2388 case kIA32I16x8Splat: {
2389 XMMRegister dst =
i.OutputSimd128Register();
2390 __ Movd(dst,
i.InputOperand(0));
2391 __ Pshuflw(dst, dst, 0x0);
2392 __ Pshufd(dst, dst, 0x0);
2395 case kIA32I16x8ExtractLane: {
2396 Register dst =
i.OutputRegister();
2397 __ Pextrw(dst,
i.InputSimd128Register(0),
i.InputInt8(1));
2398 __ movsx_w(dst, dst);
2401 case kSSEI16x8ReplaceLane: {
2402 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2403 __ pinsrw(
i.OutputSimd128Register(),
i.InputOperand(2),
i.InputInt8(1));
2406 case kAVXI16x8ReplaceLane: {
2407 CpuFeatureScope avx_scope(tasm(), AVX);
2408 __ vpinsrw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2409 i.InputOperand(2),
i.InputInt8(1));
2412 case kIA32I16x8SConvertI8x16Low: {
2413 __ Pmovsxbw(
i.OutputSimd128Register(),
i.InputOperand(0));
2416 case kIA32I16x8SConvertI8x16High: {
2417 XMMRegister dst =
i.OutputSimd128Register();
2418 __ Palignr(dst,
i.InputOperand(0), 8);
2419 __ Pmovsxbw(dst, dst);
2422 case kIA32I16x8Neg: {
2423 XMMRegister dst =
i.OutputSimd128Register();
2424 Operand src =
i.InputOperand(0);
2425 if (src.is_reg(dst)) {
2426 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2427 __ Psignw(dst, kScratchDoubleReg);
2434 case kSSEI16x8Shl: {
2435 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2436 __ psllw(
i.OutputSimd128Register(),
i.InputInt8(1));
2439 case kAVXI16x8Shl: {
2440 CpuFeatureScope avx_scope(tasm(), AVX);
2441 __ vpsllw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2445 case kSSEI16x8ShrS: {
2446 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2447 __ psraw(
i.OutputSimd128Register(),
i.InputInt8(1));
2450 case kAVXI16x8ShrS: {
2451 CpuFeatureScope avx_scope(tasm(), AVX);
2452 __ vpsraw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2456 case kSSEI16x8SConvertI32x4: {
2457 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2458 __ packssdw(
i.OutputSimd128Register(),
i.InputOperand(1));
2461 case kAVXI16x8SConvertI32x4: {
2462 CpuFeatureScope avx_scope(tasm(), AVX);
2463 __ vpackssdw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2467 case kSSEI16x8Add: {
2468 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2469 __ paddw(
i.OutputSimd128Register(),
i.InputOperand(1));
2472 case kAVXI16x8Add: {
2473 CpuFeatureScope avx_scope(tasm(), AVX);
2474 __ vpaddw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2478 case kSSEI16x8AddSaturateS: {
2479 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2480 __ paddsw(
i.OutputSimd128Register(),
i.InputOperand(1));
2483 case kAVXI16x8AddSaturateS: {
2484 CpuFeatureScope avx_scope(tasm(), AVX);
2485 __ vpaddsw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2489 case kSSEI16x8AddHoriz: {
2490 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2491 CpuFeatureScope sse_scope(tasm(), SSSE3);
2492 __ phaddw(
i.OutputSimd128Register(),
i.InputOperand(1));
2495 case kAVXI16x8AddHoriz: {
2496 CpuFeatureScope avx_scope(tasm(), AVX);
2497 __ vphaddw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2501 case kSSEI16x8Sub: {
2502 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2503 __ psubw(
i.OutputSimd128Register(),
i.InputOperand(1));
2506 case kAVXI16x8Sub: {
2507 CpuFeatureScope avx_scope(tasm(), AVX);
2508 __ vpsubw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2512 case kSSEI16x8SubSaturateS: {
2513 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2514 __ psubsw(
i.OutputSimd128Register(),
i.InputOperand(1));
2517 case kAVXI16x8SubSaturateS: {
2518 CpuFeatureScope avx_scope(tasm(), AVX);
2519 __ vpsubsw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2523 case kSSEI16x8Mul: {
2524 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2525 __ pmullw(
i.OutputSimd128Register(),
i.InputOperand(1));
2528 case kAVXI16x8Mul: {
2529 CpuFeatureScope avx_scope(tasm(), AVX);
2530 __ vpmullw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2534 case kSSEI16x8MinS: {
2535 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2536 __ pminsw(
i.OutputSimd128Register(),
i.InputOperand(1));
2539 case kAVXI16x8MinS: {
2540 CpuFeatureScope avx_scope(tasm(), AVX);
2541 __ vpminsw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2545 case kSSEI16x8MaxS: {
2546 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2547 __ pmaxsw(
i.OutputSimd128Register(),
i.InputOperand(1));
2550 case kAVXI16x8MaxS: {
2551 CpuFeatureScope avx_scope(tasm(), AVX);
2552 __ vpmaxsw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2557 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2558 __ pcmpeqw(
i.OutputSimd128Register(),
i.InputOperand(1));
2562 CpuFeatureScope avx_scope(tasm(), AVX);
2563 __ vpcmpeqw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2568 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2569 __ pcmpeqw(
i.OutputSimd128Register(),
i.InputOperand(1));
2570 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2571 __ pxor(
i.OutputSimd128Register(), kScratchDoubleReg);
2575 CpuFeatureScope avx_scope(tasm(), AVX);
2576 __ vpcmpeqw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2578 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2579 __ vpxor(
i.OutputSimd128Register(),
i.OutputSimd128Register(),
2583 case kSSEI16x8GtS: {
2584 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2585 __ pcmpgtw(
i.OutputSimd128Register(),
i.InputOperand(1));
2588 case kAVXI16x8GtS: {
2589 CpuFeatureScope avx_scope(tasm(), AVX);
2590 __ vpcmpgtw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2594 case kSSEI16x8GeS: {
2595 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2596 XMMRegister dst =
i.OutputSimd128Register();
2597 Operand src =
i.InputOperand(1);
2598 __ pminsw(dst, src);
2599 __ pcmpeqw(dst, src);
2602 case kAVXI16x8GeS: {
2603 CpuFeatureScope avx_scope(tasm(), AVX);
2604 XMMRegister src1 =
i.InputSimd128Register(0);
2605 Operand src2 =
i.InputOperand(1);
2606 __ vpminsw(kScratchDoubleReg, src1, src2);
2607 __ vpcmpeqw(
i.OutputSimd128Register(), kScratchDoubleReg, src2);
2610 case kIA32I16x8UConvertI8x16Low: {
2611 __ Pmovzxbw(
i.OutputSimd128Register(),
i.InputOperand(0));
2614 case kIA32I16x8UConvertI8x16High: {
2615 XMMRegister dst =
i.OutputSimd128Register();
2616 __ Palignr(dst,
i.InputOperand(0), 8);
2617 __ Pmovzxbw(dst, dst);
2620 case kSSEI16x8ShrU: {
2621 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2622 __ psrlw(
i.OutputSimd128Register(),
i.InputInt8(1));
2625 case kAVXI16x8ShrU: {
2626 CpuFeatureScope avx_scope(tasm(), AVX);
2627 __ vpsrlw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2631 case kSSEI16x8UConvertI32x4: {
2632 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2633 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2634 XMMRegister dst =
i.OutputSimd128Register();
2636 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2637 __ psrld(kScratchDoubleReg, 1);
2638 __ pminud(dst, kScratchDoubleReg);
2639 __ pminud(kScratchDoubleReg,
i.InputOperand(1));
2640 __ packusdw(dst, kScratchDoubleReg);
2643 case kAVXI16x8UConvertI32x4: {
2644 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2645 CpuFeatureScope avx_scope(tasm(), AVX);
2646 XMMRegister dst =
i.OutputSimd128Register();
2648 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2649 __ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1);
2650 __ vpminud(dst, kScratchDoubleReg,
i.InputSimd128Register(0));
2651 __ vpminud(kScratchDoubleReg, kScratchDoubleReg,
i.InputOperand(1));
2652 __ vpackusdw(dst, dst, kScratchDoubleReg);
2655 case kSSEI16x8AddSaturateU: {
2656 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2657 __ paddusw(
i.OutputSimd128Register(),
i.InputOperand(1));
2660 case kAVXI16x8AddSaturateU: {
2661 CpuFeatureScope avx_scope(tasm(), AVX);
2662 __ vpaddusw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2666 case kSSEI16x8SubSaturateU: {
2667 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2668 __ psubusw(
i.OutputSimd128Register(),
i.InputOperand(1));
2671 case kAVXI16x8SubSaturateU: {
2672 CpuFeatureScope avx_scope(tasm(), AVX);
2673 __ vpsubusw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2677 case kSSEI16x8MinU: {
2678 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2679 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2680 __ pminuw(
i.OutputSimd128Register(),
i.InputOperand(1));
2683 case kAVXI16x8MinU: {
2684 CpuFeatureScope avx_scope(tasm(), AVX);
2685 __ vpminuw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2689 case kSSEI16x8MaxU: {
2690 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2691 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2692 __ pmaxuw(
i.OutputSimd128Register(),
i.InputOperand(1));
2695 case kAVXI16x8MaxU: {
2696 CpuFeatureScope avx_scope(tasm(), AVX);
2697 __ vpmaxuw(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2701 case kSSEI16x8GtU: {
2702 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2703 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2704 XMMRegister dst =
i.OutputSimd128Register();
2705 Operand src =
i.InputOperand(1);
2706 __ pmaxuw(dst, src);
2707 __ pcmpeqw(dst, src);
2708 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2709 __ pxor(dst, kScratchDoubleReg);
2712 case kAVXI16x8GtU: {
2713 CpuFeatureScope avx_scope(tasm(), AVX);
2714 XMMRegister dst =
i.OutputSimd128Register();
2715 XMMRegister src1 =
i.InputSimd128Register(0);
2716 Operand src2 =
i.InputOperand(1);
2717 __ vpmaxuw(kScratchDoubleReg, src1, src2);
2718 __ vpcmpeqw(dst, kScratchDoubleReg, src2);
2719 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2720 __ vpxor(dst, dst, kScratchDoubleReg);
2723 case kSSEI16x8GeU: {
2724 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2725 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2726 XMMRegister dst =
i.OutputSimd128Register();
2727 Operand src =
i.InputOperand(1);
2728 __ pminuw(dst, src);
2729 __ pcmpeqw(dst, src);
2732 case kAVXI16x8GeU: {
2733 CpuFeatureScope avx_scope(tasm(), AVX);
2734 XMMRegister src1 =
i.InputSimd128Register(0);
2735 Operand src2 =
i.InputOperand(1);
2736 __ vpminuw(kScratchDoubleReg, src1, src2);
2737 __ vpcmpeqw(
i.OutputSimd128Register(), kScratchDoubleReg, src2);
2740 case kIA32I8x16Splat: {
2741 XMMRegister dst =
i.OutputSimd128Register();
2742 __ Movd(dst,
i.InputOperand(0));
2743 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
2744 __ Pshufb(dst, kScratchDoubleReg);
2747 case kIA32I8x16ExtractLane: {
2748 Register dst =
i.OutputRegister();
2749 __ Pextrb(dst,
i.InputSimd128Register(0),
i.InputInt8(1));
2750 __ movsx_b(dst, dst);
2753 case kSSEI8x16ReplaceLane: {
2754 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2755 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2756 __ pinsrb(
i.OutputSimd128Register(),
i.InputOperand(2),
i.InputInt8(1));
2759 case kAVXI8x16ReplaceLane: {
2760 CpuFeatureScope avx_scope(tasm(), AVX);
2761 __ vpinsrb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2762 i.InputOperand(2),
i.InputInt8(1));
2765 case kSSEI8x16SConvertI16x8: {
2766 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2767 __ packsswb(
i.OutputSimd128Register(),
i.InputOperand(1));
2770 case kAVXI8x16SConvertI16x8: {
2771 CpuFeatureScope avx_scope(tasm(), AVX);
2772 __ vpacksswb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2776 case kIA32I8x16Neg: {
2777 XMMRegister dst =
i.OutputSimd128Register();
2778 Operand src =
i.InputOperand(0);
2779 if (src.is_reg(dst)) {
2780 __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
2781 __ Psignb(dst, kScratchDoubleReg);
2788 case kSSEI8x16Shl: {
2789 XMMRegister dst =
i.OutputSimd128Register();
2790 DCHECK_EQ(dst,
i.InputSimd128Register(0));
2791 int8_t shift =
i.InputInt8(1) & 0x7;
2794 for (
int i = 0;
i < shift; ++
i) {
2799 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
2800 __ psrlw(kScratchDoubleReg, 8 + shift);
2801 __ packuswb(kScratchDoubleReg, kScratchDoubleReg);
2802 __ pand(dst, kScratchDoubleReg);
2803 __ psllw(dst, shift);
2807 case kAVXI8x16Shl: {
2808 CpuFeatureScope avx_scope(tasm(), AVX);
2809 XMMRegister dst =
i.OutputSimd128Register();
2810 XMMRegister src =
i.InputSimd128Register(0);
2811 int8_t shift =
i.InputInt8(1) & 0x7;
2814 for (
int i = 0;
i < shift; ++
i) {
2815 __ vpaddb(dst, src, src);
2820 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2821 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8 + shift);
2822 __ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
2823 __ vpand(dst, src, kScratchDoubleReg);
2824 __ vpsllw(dst, dst, shift);
2828 case kIA32I8x16ShrS: {
2829 XMMRegister dst =
i.OutputSimd128Register();
2830 XMMRegister src =
i.InputSimd128Register(0);
2831 int8_t shift =
i.InputInt8(1) & 0x7;
2833 __ Punpckhbw(kScratchDoubleReg, src);
2834 __ Punpcklbw(dst, src);
2835 __ Psraw(kScratchDoubleReg, 8 + shift);
2836 __ Psraw(dst, 8 + shift);
2837 __ Packsswb(dst, kScratchDoubleReg);
2840 case kSSEI8x16Add: {
2841 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2842 __ paddb(
i.OutputSimd128Register(),
i.InputOperand(1));
2845 case kAVXI8x16Add: {
2846 CpuFeatureScope avx_scope(tasm(), AVX);
2847 __ vpaddb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2851 case kSSEI8x16AddSaturateS: {
2852 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2853 __ paddsb(
i.OutputSimd128Register(),
i.InputOperand(1));
2856 case kAVXI8x16AddSaturateS: {
2857 CpuFeatureScope avx_scope(tasm(), AVX);
2858 __ vpaddsb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2862 case kSSEI8x16Sub: {
2863 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2864 __ psubb(
i.OutputSimd128Register(),
i.InputOperand(1));
2867 case kAVXI8x16Sub: {
2868 CpuFeatureScope avx_scope(tasm(), AVX);
2869 __ vpsubb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2873 case kSSEI8x16SubSaturateS: {
2874 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2875 __ psubsb(
i.OutputSimd128Register(),
i.InputOperand(1));
2878 case kAVXI8x16SubSaturateS: {
2879 CpuFeatureScope avx_scope(tasm(), AVX);
2880 __ vpsubsb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2884 case kSSEI8x16Mul: {
2885 XMMRegister dst =
i.OutputSimd128Register();
2886 DCHECK_EQ(dst,
i.InputSimd128Register(0));
2887 XMMRegister right =
i.InputSimd128Register(1);
2888 XMMRegister tmp =
i.ToSimd128Register(instr->TempAt(0));
2896 __ movaps(tmp, dst);
2897 __ movaps(kScratchDoubleReg, right);
2899 __ psrlw(kScratchDoubleReg, 8);
2905 __ pmullw(tmp, kScratchDoubleReg);
2908 __ pmullw(dst, right);
2923 case kAVXI8x16Mul: {
2924 CpuFeatureScope avx_scope(tasm(), AVX);
2925 XMMRegister dst =
i.OutputSimd128Register();
2926 XMMRegister left =
i.InputSimd128Register(0);
2927 XMMRegister right =
i.InputSimd128Register(1);
2928 XMMRegister tmp =
i.ToSimd128Register(instr->TempAt(0));
2936 __ vpsrlw(tmp, left, 8);
2937 __ vpsrlw(kScratchDoubleReg, right, 8);
2941 __ vpmullw(tmp, tmp, kScratchDoubleReg);
2944 __ vpsllw(kScratchDoubleReg, left, 8);
2948 __ vpmullw(dst, kScratchDoubleReg, right);
2952 __ vpsrlw(dst, dst, 8);
2956 __ vpsllw(tmp, tmp, 8);
2960 __ vpor(dst, dst, tmp);
2963 case kSSEI8x16MinS: {
2964 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2965 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2966 __ pminsb(
i.OutputSimd128Register(),
i.InputOperand(1));
2969 case kAVXI8x16MinS: {
2970 CpuFeatureScope avx_scope(tasm(), AVX);
2971 __ vpminsb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2975 case kSSEI8x16MaxS: {
2976 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2977 CpuFeatureScope sse_scope(tasm(), SSE4_1);
2978 __ pmaxsb(
i.OutputSimd128Register(),
i.InputOperand(1));
2981 case kAVXI8x16MaxS: {
2982 CpuFeatureScope avx_scope(tasm(), AVX);
2983 __ vpmaxsb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2988 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
2989 __ pcmpeqb(
i.OutputSimd128Register(),
i.InputOperand(1));
2993 CpuFeatureScope avx_scope(tasm(), AVX);
2994 __ vpcmpeqb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
2999 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3000 __ pcmpeqb(
i.OutputSimd128Register(),
i.InputOperand(1));
3001 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3002 __ pxor(
i.OutputSimd128Register(), kScratchDoubleReg);
3006 CpuFeatureScope avx_scope(tasm(), AVX);
3007 __ vpcmpeqb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3009 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3010 __ vpxor(
i.OutputSimd128Register(),
i.OutputSimd128Register(),
3014 case kSSEI8x16GtS: {
3015 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3016 __ pcmpgtb(
i.OutputSimd128Register(),
i.InputOperand(1));
3019 case kAVXI8x16GtS: {
3020 CpuFeatureScope avx_scope(tasm(), AVX);
3021 __ vpcmpgtb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3025 case kSSEI8x16GeS: {
3026 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3027 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3028 XMMRegister dst =
i.OutputSimd128Register();
3029 Operand src =
i.InputOperand(1);
3030 __ pminsb(dst, src);
3031 __ pcmpeqb(dst, src);
3034 case kAVXI8x16GeS: {
3035 CpuFeatureScope avx_scope(tasm(), AVX);
3036 XMMRegister src1 =
i.InputSimd128Register(0);
3037 Operand src2 =
i.InputOperand(1);
3038 __ vpminsb(kScratchDoubleReg, src1, src2);
3039 __ vpcmpeqb(
i.OutputSimd128Register(), kScratchDoubleReg, src2);
3042 case kSSEI8x16UConvertI16x8: {
3043 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3044 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3045 XMMRegister dst =
i.OutputSimd128Register();
3047 __ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
3048 __ psrlw(kScratchDoubleReg, 1);
3049 __ pminuw(dst, kScratchDoubleReg);
3050 __ pminuw(kScratchDoubleReg,
i.InputOperand(1));
3051 __ packuswb(dst, kScratchDoubleReg);
3054 case kAVXI8x16UConvertI16x8: {
3055 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3056 CpuFeatureScope avx_scope(tasm(), AVX);
3057 XMMRegister dst =
i.OutputSimd128Register();
3059 __ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3060 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 1);
3061 __ vpminuw(dst, kScratchDoubleReg,
i.InputSimd128Register(0));
3062 __ vpminuw(kScratchDoubleReg, kScratchDoubleReg,
i.InputOperand(1));
3063 __ vpackuswb(dst, dst, kScratchDoubleReg);
3066 case kSSEI8x16AddSaturateU: {
3067 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3068 __ paddusb(
i.OutputSimd128Register(),
i.InputOperand(1));
3071 case kAVXI8x16AddSaturateU: {
3072 CpuFeatureScope avx_scope(tasm(), AVX);
3073 __ vpaddusb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3077 case kSSEI8x16SubSaturateU: {
3078 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3079 __ psubusb(
i.OutputSimd128Register(),
i.InputOperand(1));
3082 case kAVXI8x16SubSaturateU: {
3083 CpuFeatureScope avx_scope(tasm(), AVX);
3084 __ vpsubusb(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3088 case kIA32I8x16ShrU: {
3089 XMMRegister dst =
i.OutputSimd128Register();
3090 XMMRegister src =
i.InputSimd128Register(0);
3091 int8_t shift =
i.InputInt8(1) & 0x7;
3093 __ Punpckhbw(kScratchDoubleReg, src);
3094 __ Punpcklbw(dst, src);
3095 __ Psrlw(kScratchDoubleReg, 8 + shift);
3096 __ Psrlw(dst, 8 + shift);
3097 __ Packuswb(dst, kScratchDoubleReg);
3100 case kSSEI8x16MinU: {
3101 XMMRegister dst =
i.OutputSimd128Register();
3102 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3103 __ pminub(dst,
i.InputOperand(1));
3106 case kAVXI8x16MinU: {
3107 CpuFeatureScope avx_scope(tasm(), AVX);
3108 __ vpminub(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3112 case kSSEI8x16MaxU: {
3113 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3114 __ pmaxub(
i.OutputSimd128Register(),
i.InputOperand(1));
3117 case kAVXI8x16MaxU: {
3118 CpuFeatureScope avx_scope(tasm(), AVX);
3119 __ vpmaxub(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3123 case kSSEI8x16GtU: {
3124 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3125 XMMRegister dst =
i.OutputSimd128Register();
3126 Operand src =
i.InputOperand(1);
3127 __ pmaxub(dst, src);
3128 __ pcmpeqb(dst, src);
3129 __ pcmpeqb(kScratchDoubleReg, kScratchDoubleReg);
3130 __ pxor(dst, kScratchDoubleReg);
3133 case kAVXI8x16GtU: {
3134 CpuFeatureScope avx_scope(tasm(), AVX);
3135 XMMRegister dst =
i.OutputSimd128Register();
3136 XMMRegister src1 =
i.InputSimd128Register(0);
3137 Operand src2 =
i.InputOperand(1);
3138 __ vpmaxub(kScratchDoubleReg, src1, src2);
3139 __ vpcmpeqb(dst, kScratchDoubleReg, src2);
3140 __ vpcmpeqb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3141 __ vpxor(dst, dst, kScratchDoubleReg);
3144 case kSSEI8x16GeU: {
3145 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3146 XMMRegister dst =
i.OutputSimd128Register();
3147 Operand src =
i.InputOperand(1);
3148 __ pminub(dst, src);
3149 __ pcmpeqb(dst, src);
3152 case kAVXI8x16GeU: {
3153 CpuFeatureScope avx_scope(tasm(), AVX);
3154 XMMRegister src1 =
i.InputSimd128Register(0);
3155 Operand src2 =
i.InputOperand(1);
3156 __ vpminub(kScratchDoubleReg, src1, src2);
3157 __ vpcmpeqb(
i.OutputSimd128Register(), kScratchDoubleReg, src2);
3160 case kIA32S128Zero: {
3161 XMMRegister dst =
i.OutputSimd128Register();
3166 XMMRegister dst =
i.OutputSimd128Register();
3167 Operand src =
i.InputOperand(0);
3168 if (src.is_reg(dst)) {
3169 __ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
3170 __ pxor(dst, kScratchDoubleReg);
3172 __ pcmpeqd(dst, dst);
3178 CpuFeatureScope avx_scope(tasm(), AVX);
3179 __ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3180 __ vpxor(
i.OutputSimd128Register(), kScratchDoubleReg,
i.InputOperand(0));
3184 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3185 __ pand(
i.OutputSimd128Register(),
i.InputOperand(1));
3189 CpuFeatureScope avx_scope(tasm(), AVX);
3190 __ vpand(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3195 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3196 __ por(
i.OutputSimd128Register(),
i.InputOperand(1));
3200 CpuFeatureScope avx_scope(tasm(), AVX);
3201 __ vpor(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3206 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3207 __ pxor(
i.OutputSimd128Register(),
i.InputOperand(1));
3211 CpuFeatureScope avx_scope(tasm(), AVX);
3212 __ vpxor(
i.OutputSimd128Register(),
i.InputSimd128Register(0),
3216 case kSSES128Select: {
3217 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3219 XMMRegister dst =
i.OutputSimd128Register();
3220 __ movaps(kScratchDoubleReg,
i.InputSimd128Register(1));
3221 __ xorps(kScratchDoubleReg,
i.InputSimd128Register(2));
3222 __ andps(dst, kScratchDoubleReg);
3223 __ xorps(dst,
i.InputSimd128Register(2));
3226 case kAVXS128Select: {
3227 CpuFeatureScope avx_scope(tasm(), AVX);
3228 XMMRegister dst =
i.OutputSimd128Register();
3229 __ vxorps(kScratchDoubleReg,
i.InputSimd128Register(2),
3231 __ vandps(dst, kScratchDoubleReg,
i.InputOperand(0));
3232 __ vxorps(dst, dst,
i.InputSimd128Register(2));
3235 case kIA32S8x16Shuffle: {
3236 XMMRegister dst =
i.OutputSimd128Register();
3237 Operand src0 =
i.InputOperand(0);
3238 Register tmp =
i.TempRegister(0);
3242 if (instr->InputCount() == 5) {
3243 DCHECK_EQ(
i.OutputSimd128Register(),
i.InputSimd128Register(0));
3244 for (
int j = 4; j > 0; j--) {
3246 __ push(Immediate(mask));
3248 __ Pshufb(dst, Operand(esp, 0));
3250 DCHECK_EQ(6, instr->InputCount());
3251 __ movups(kScratchDoubleReg, src0);
3252 for (
int j = 5; j > 1; j--) {
3255 for (
int k = 0; k < 32; k += 8) {
3256 uint8_t lane = lanes >> k;
3257 mask |= (lane < kSimd128Size ? lane : 0x80) << k;
3259 __ push(Immediate(mask));
3261 __ Pshufb(kScratchDoubleReg, Operand(esp, 0));
3262 Operand src1 =
i.InputOperand(1);
3263 if (!src1.is_reg(dst)) __ movups(dst, src1);
3264 for (
int j = 5; j > 1; j--) {
3267 for (
int k = 0; k < 32; k += 8) {
3268 uint8_t lane = lanes >> k;
3269 mask |= (lane >= kSimd128Size ? (lane & 0xF) : 0x80) << k;
3271 __ push(Immediate(mask));
3273 __ Pshufb(dst, Operand(esp, 0));
3274 __ por(dst, kScratchDoubleReg);
3279 case kIA32S32x4Swizzle: {
3280 DCHECK_EQ(2, instr->InputCount());
3281 __ Pshufd(
i.OutputSimd128Register(),
i.InputOperand(0),
i.InputInt8(1));
3284 case kIA32S32x4Shuffle: {
3285 DCHECK_EQ(4, instr->InputCount());
3286 int8_t shuffle =
i.InputInt8(2);
3287 DCHECK_NE(0xe4, shuffle);
3288 __ Pshufd(kScratchDoubleReg,
i.InputOperand(1), shuffle);
3289 __ Pshufd(
i.OutputSimd128Register(),
i.InputOperand(0), shuffle);
3290 __ Pblendw(
i.OutputSimd128Register(), kScratchDoubleReg,
i.InputInt8(3));
3293 case kIA32S16x8Blend:
3294 ASSEMBLE_SIMD_IMM_SHUFFLE(pblendw, SSE4_1,
i.InputInt8(2));
3296 case kIA32S16x8HalfShuffle1: {
3297 XMMRegister dst =
i.OutputSimd128Register();
3298 __ Pshuflw(dst,
i.InputOperand(0),
i.InputInt8(1));
3299 __ Pshufhw(dst, dst,
i.InputInt8(2));
3302 case kIA32S16x8HalfShuffle2: {
3303 XMMRegister dst =
i.OutputSimd128Register();
3304 __ Pshuflw(kScratchDoubleReg,
i.InputOperand(1),
i.InputInt8(2));
3305 __ Pshufhw(kScratchDoubleReg, kScratchDoubleReg,
i.InputInt8(3));
3306 __ Pshuflw(dst,
i.InputOperand(0),
i.InputInt8(2));
3307 __ Pshufhw(dst, dst,
i.InputInt8(3));
3308 __ Pblendw(dst, kScratchDoubleReg,
i.InputInt8(4));
3311 case kIA32S8x16Alignr:
3312 ASSEMBLE_SIMD_IMM_SHUFFLE(palignr, SSSE3,
i.InputInt8(2));
3314 case kIA32S16x8Dup: {
3315 XMMRegister dst =
i.OutputSimd128Register();
3316 Operand src =
i.InputOperand(0);
3317 int8_t lane =
i.InputInt8(1) & 0x7;
3318 int8_t lane4 = lane & 0x3;
3319 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3321 __ Pshuflw(dst, src, half_dup);
3322 __ Pshufd(dst, dst, 0);
3324 __ Pshufhw(dst, src, half_dup);
3325 __ Pshufd(dst, dst, 0xaa);
3329 case kIA32S8x16Dup: {
3330 XMMRegister dst =
i.OutputSimd128Register();
3331 XMMRegister src =
i.InputSimd128Register(0);
3332 int8_t lane =
i.InputInt8(1) & 0xf;
3333 if (CpuFeatures::IsSupported(AVX)) {
3334 CpuFeatureScope avx_scope(tasm(), AVX);
3336 __ vpunpcklbw(dst, src, src);
3338 __ vpunpckhbw(dst, src, src);
3341 DCHECK_EQ(dst, src);
3343 __ punpcklbw(dst, dst);
3345 __ punpckhbw(dst, dst);
3349 int8_t lane4 = lane & 0x3;
3350 int8_t half_dup = lane4 | (lane4 << 2) | (lane4 << 4) | (lane4 << 6);
3352 __ Pshuflw(dst, dst, half_dup);
3353 __ Pshufd(dst, dst, 0);
3355 __ Pshufhw(dst, dst, half_dup);
3356 __ Pshufd(dst, dst, 0xaa);
3360 case kIA32S64x2UnpackHigh:
3361 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhqdq);
3363 case kIA32S32x4UnpackHigh:
3364 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhdq);
3366 case kIA32S16x8UnpackHigh:
3367 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhwd);
3369 case kIA32S8x16UnpackHigh:
3370 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckhbw);
3372 case kIA32S64x2UnpackLow:
3373 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklqdq);
3375 case kIA32S32x4UnpackLow:
3376 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpckldq);
3378 case kIA32S16x8UnpackLow:
3379 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklwd);
3381 case kIA32S8x16UnpackLow:
3382 ASSEMBLE_SIMD_PUNPCK_SHUFFLE(punpcklbw);
3384 case kSSES16x8UnzipHigh: {
3385 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3386 XMMRegister dst =
i.OutputSimd128Register();
3387 XMMRegister src2 = dst;
3388 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3389 if (instr->InputCount() == 2) {
3390 __ movups(kScratchDoubleReg,
i.InputOperand(1));
3391 __ psrld(kScratchDoubleReg, 16);
3392 src2 = kScratchDoubleReg;
3395 __ packusdw(dst, src2);
3398 case kAVXS16x8UnzipHigh: {
3399 CpuFeatureScope avx_scope(tasm(), AVX);
3400 XMMRegister dst =
i.OutputSimd128Register();
3401 XMMRegister src2 = dst;
3402 if (instr->InputCount() == 2) {
3403 __ vpsrld(kScratchDoubleReg,
i.InputSimd128Register(1), 16);
3404 src2 = kScratchDoubleReg;
3406 __ vpsrld(dst,
i.InputSimd128Register(0), 16);
3407 __ vpackusdw(dst, dst, src2);
3410 case kSSES16x8UnzipLow: {
3411 CpuFeatureScope sse_scope(tasm(), SSE4_1);
3412 XMMRegister dst =
i.OutputSimd128Register();
3413 XMMRegister src2 = dst;
3414 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3415 __ pxor(kScratchDoubleReg, kScratchDoubleReg);
3416 if (instr->InputCount() == 2) {
3417 __ pblendw(kScratchDoubleReg,
i.InputOperand(1), 0x55);
3418 src2 = kScratchDoubleReg;
3420 __ pblendw(dst, kScratchDoubleReg, 0xaa);
3421 __ packusdw(dst, src2);
3424 case kAVXS16x8UnzipLow: {
3425 CpuFeatureScope avx_scope(tasm(), AVX);
3426 XMMRegister dst =
i.OutputSimd128Register();
3427 XMMRegister src2 = dst;
3428 __ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
3429 if (instr->InputCount() == 2) {
3430 __ vpblendw(kScratchDoubleReg, kScratchDoubleReg,
i.InputOperand(1),
3432 src2 = kScratchDoubleReg;
3434 __ vpblendw(dst, kScratchDoubleReg,
i.InputSimd128Register(0), 0x55);
3435 __ vpackusdw(dst, dst, src2);
3438 case kSSES8x16UnzipHigh: {
3439 XMMRegister dst =
i.OutputSimd128Register();
3440 XMMRegister src2 = dst;
3441 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3442 if (instr->InputCount() == 2) {
3443 __ movups(kScratchDoubleReg,
i.InputOperand(1));
3444 __ psrlw(kScratchDoubleReg, 8);
3445 src2 = kScratchDoubleReg;
3448 __ packuswb(dst, src2);
3451 case kAVXS8x16UnzipHigh: {
3452 CpuFeatureScope avx_scope(tasm(), AVX);
3453 XMMRegister dst =
i.OutputSimd128Register();
3454 XMMRegister src2 = dst;
3455 if (instr->InputCount() == 2) {
3456 __ vpsrlw(kScratchDoubleReg,
i.InputSimd128Register(1), 8);
3457 src2 = kScratchDoubleReg;
3459 __ vpsrlw(dst,
i.InputSimd128Register(0), 8);
3460 __ vpackuswb(dst, dst, src2);
3463 case kSSES8x16UnzipLow: {
3464 XMMRegister dst =
i.OutputSimd128Register();
3465 XMMRegister src2 = dst;
3466 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3467 if (instr->InputCount() == 2) {
3468 __ movups(kScratchDoubleReg,
i.InputOperand(1));
3469 __ psllw(kScratchDoubleReg, 8);
3470 __ psrlw(kScratchDoubleReg, 8);
3471 src2 = kScratchDoubleReg;
3475 __ packuswb(dst, src2);
3478 case kAVXS8x16UnzipLow: {
3479 CpuFeatureScope avx_scope(tasm(), AVX);
3480 XMMRegister dst =
i.OutputSimd128Register();
3481 XMMRegister src2 = dst;
3482 if (instr->InputCount() == 2) {
3483 __ vpsllw(kScratchDoubleReg,
i.InputSimd128Register(1), 8);
3484 __ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, 8);
3485 src2 = kScratchDoubleReg;
3487 __ vpsllw(dst,
i.InputSimd128Register(0), 8);
3488 __ vpsrlw(dst, dst, 8);
3489 __ vpackuswb(dst, dst, src2);
3492 case kSSES8x16TransposeLow: {
3493 XMMRegister dst =
i.OutputSimd128Register();
3494 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3496 if (instr->InputCount() == 1) {
3497 __ movups(kScratchDoubleReg, dst);
3499 DCHECK_EQ(2, instr->InputCount());
3500 __ movups(kScratchDoubleReg,
i.InputOperand(1));
3501 __ psllw(kScratchDoubleReg, 8);
3504 __ por(dst, kScratchDoubleReg);
3507 case kAVXS8x16TransposeLow: {
3508 CpuFeatureScope avx_scope(tasm(), AVX);
3509 XMMRegister dst =
i.OutputSimd128Register();
3510 if (instr->InputCount() == 1) {
3511 __ vpsllw(kScratchDoubleReg,
i.InputSimd128Register(0), 8);
3512 __ vpsrlw(dst, kScratchDoubleReg, 8);
3514 DCHECK_EQ(2, instr->InputCount());
3515 __ vpsllw(kScratchDoubleReg,
i.InputSimd128Register(1), 8);
3516 __ vpsllw(dst,
i.InputSimd128Register(0), 8);
3517 __ vpsrlw(dst, dst, 8);
3519 __ vpor(dst, dst, kScratchDoubleReg);
3522 case kSSES8x16TransposeHigh: {
3523 XMMRegister dst =
i.OutputSimd128Register();
3524 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3526 if (instr->InputCount() == 1) {
3527 __ movups(kScratchDoubleReg, dst);
3529 DCHECK_EQ(2, instr->InputCount());
3530 __ movups(kScratchDoubleReg,
i.InputOperand(1));
3531 __ psrlw(kScratchDoubleReg, 8);
3533 __ psllw(kScratchDoubleReg, 8);
3534 __ por(dst, kScratchDoubleReg);
3537 case kAVXS8x16TransposeHigh: {
3538 CpuFeatureScope avx_scope(tasm(), AVX);
3539 XMMRegister dst =
i.OutputSimd128Register();
3540 if (instr->InputCount() == 1) {
3541 __ vpsrlw(dst,
i.InputSimd128Register(0), 8);
3542 __ vpsllw(kScratchDoubleReg, dst, 8);
3544 DCHECK_EQ(2, instr->InputCount());
3545 __ vpsrlw(kScratchDoubleReg,
i.InputSimd128Register(1), 8);
3546 __ vpsrlw(dst,
i.InputSimd128Register(0), 8);
3547 __ vpsllw(kScratchDoubleReg, kScratchDoubleReg, 8);
3549 __ vpor(dst, dst, kScratchDoubleReg);
3552 case kSSES8x8Reverse:
3553 case kSSES8x4Reverse:
3554 case kSSES8x2Reverse: {
3555 DCHECK_EQ(1, instr->InputCount());
3556 XMMRegister dst =
i.OutputSimd128Register();
3557 DCHECK_EQ(dst,
i.InputSimd128Register(0));
3558 if (arch_opcode != kSSES8x2Reverse) {
3560 int8_t shuffle_mask = arch_opcode == kSSES8x4Reverse ? 0xB1 : 0x1B;
3561 __ pshuflw(dst, dst, shuffle_mask);
3562 __ pshufhw(dst, dst, shuffle_mask);
3564 __ movaps(kScratchDoubleReg, dst);
3565 __ psrlw(kScratchDoubleReg, 8);
3567 __ por(dst, kScratchDoubleReg);
3570 case kAVXS8x2Reverse:
3571 case kAVXS8x4Reverse:
3572 case kAVXS8x8Reverse: {
3573 DCHECK_EQ(1, instr->InputCount());
3574 CpuFeatureScope avx_scope(tasm(), AVX);
3575 XMMRegister dst =
i.OutputSimd128Register();
3576 XMMRegister src = dst;
3577 if (arch_opcode != kAVXS8x2Reverse) {
3579 int8_t shuffle_mask = arch_opcode == kAVXS8x4Reverse ? 0xB1 : 0x1B;
3580 __ vpshuflw(dst,
i.InputOperand(0), shuffle_mask);
3581 __ vpshufhw(dst, dst, shuffle_mask);
3583 src =
i.InputSimd128Register(0);
3586 __ vpsrlw(kScratchDoubleReg, src, 8);
3587 __ vpsllw(dst, src, 8);
3588 __ vpor(dst, dst, kScratchDoubleReg);
3591 case kIA32S1x4AnyTrue:
3592 case kIA32S1x8AnyTrue:
3593 case kIA32S1x16AnyTrue: {
3594 Register dst =
i.OutputRegister();
3595 XMMRegister src =
i.InputSimd128Register(0);
3596 Register tmp =
i.TempRegister(0);
3598 __ mov(dst, Immediate(-1));
3600 __ cmov(zero, dst, tmp);
3603 case kIA32S1x4AllTrue:
3604 case kIA32S1x8AllTrue:
3605 case kIA32S1x16AllTrue: {
3606 Register dst =
i.OutputRegister();
3607 Operand src =
i.InputOperand(0);
3608 Register tmp =
i.TempRegister(0);
3609 __ mov(tmp, Immediate(-1));
3612 __ Pxor(kScratchDoubleReg, kScratchDoubleReg);
3613 if (arch_opcode == kIA32S1x4AllTrue) {
3614 __ Pcmpeqd(kScratchDoubleReg, src);
3615 }
else if (arch_opcode == kIA32S1x8AllTrue) {
3616 __ Pcmpeqw(kScratchDoubleReg, src);
3618 __ Pcmpeqb(kScratchDoubleReg, src);
3621 __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
3622 __ cmov(zero, dst, tmp);
3625 case kIA32StackCheck: {
3626 __ CompareStackLimit(esp);
3629 case kIA32Word32AtomicPairLoad: {
3630 XMMRegister tmp =
i.ToDoubleRegister(instr->TempAt(0));
3631 __ movq(tmp,
i.MemoryOperand());
3632 if (instr->OutputCount() == 2) {
3633 __ Pextrd(
i.OutputRegister(0), tmp, 0);
3634 __ Pextrd(
i.OutputRegister(1), tmp, 1);
3635 }
else if (instr->OutputCount() == 1) {
3636 __ Pextrd(
i.OutputRegister(0), tmp, 0);
3637 __ Pextrd(
i.TempRegister(1), tmp, 1);
3641 case kIA32Word32AtomicPairStore: {
3644 __ mov(
i.TempRegister(0),
i.MemoryOperand(2));
3645 __ mov(
i.TempRegister(1),
i.NextMemoryOperand(2));
3647 frame_access_state()->IncreaseSPDelta(1);
3648 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3650 __ cmpxchg8b(
i.MemoryOperand(2));
3652 frame_access_state()->IncreaseSPDelta(-1);
3653 __ j(not_equal, &store);
3656 case kWord32AtomicExchangeInt8: {
3657 __ xchg_b(
i.InputRegister(0),
i.MemoryOperand(1));
3658 __ movsx_b(
i.InputRegister(0),
i.InputRegister(0));
3661 case kWord32AtomicExchangeUint8: {
3662 __ xchg_b(
i.InputRegister(0),
i.MemoryOperand(1));
3663 __ movzx_b(
i.InputRegister(0),
i.InputRegister(0));
3666 case kWord32AtomicExchangeInt16: {
3667 __ xchg_w(
i.InputRegister(0),
i.MemoryOperand(1));
3668 __ movsx_w(
i.InputRegister(0),
i.InputRegister(0));
3671 case kWord32AtomicExchangeUint16: {
3672 __ xchg_w(
i.InputRegister(0),
i.MemoryOperand(1));
3673 __ movzx_w(
i.InputRegister(0),
i.InputRegister(0));
3676 case kWord32AtomicExchangeWord32: {
3677 __ xchg(
i.InputRegister(0),
i.MemoryOperand(1));
3680 case kIA32Word32AtomicPairExchange: {
3681 DCHECK(VerifyOutputOfAtomicPairInstr(&
i, instr));
3684 __ mov(eax,
i.MemoryOperand(2));
3685 __ mov(edx,
i.NextMemoryOperand(2));
3687 frame_access_state()->IncreaseSPDelta(1);
3688 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3690 __ cmpxchg8b(
i.MemoryOperand(2));
3692 frame_access_state()->IncreaseSPDelta(-1);
3693 __ j(not_equal, &exchange);
3696 case kWord32AtomicCompareExchangeInt8: {
3698 __ cmpxchg_b(
i.MemoryOperand(2),
i.InputRegister(1));
3699 __ movsx_b(eax, eax);
3702 case kWord32AtomicCompareExchangeUint8: {
3704 __ cmpxchg_b(
i.MemoryOperand(2),
i.InputRegister(1));
3705 __ movzx_b(eax, eax);
3708 case kWord32AtomicCompareExchangeInt16: {
3710 __ cmpxchg_w(
i.MemoryOperand(2),
i.InputRegister(1));
3711 __ movsx_w(eax, eax);
3714 case kWord32AtomicCompareExchangeUint16: {
3716 __ cmpxchg_w(
i.MemoryOperand(2),
i.InputRegister(1));
3717 __ movzx_w(eax, eax);
3720 case kWord32AtomicCompareExchangeWord32: {
3722 __ cmpxchg(
i.MemoryOperand(2),
i.InputRegister(1));
3725 case kIA32Word32AtomicPairCompareExchange: {
3727 frame_access_state()->IncreaseSPDelta(1);
3728 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(2));
3730 __ cmpxchg8b(
i.MemoryOperand(4));
3732 frame_access_state()->IncreaseSPDelta(-1);
3735 #define ATOMIC_BINOP_CASE(op, inst) \ 3736 case kWord32Atomic##op##Int8: { \ 3737 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \ 3738 __ movsx_b(eax, eax); \ 3741 case kWord32Atomic##op##Uint8: { \ 3742 ASSEMBLE_ATOMIC_BINOP(inst, mov_b, cmpxchg_b); \ 3743 __ movzx_b(eax, eax); \ 3746 case kWord32Atomic##op##Int16: { \ 3747 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \ 3748 __ movsx_w(eax, eax); \ 3751 case kWord32Atomic##op##Uint16: { \ 3752 ASSEMBLE_ATOMIC_BINOP(inst, mov_w, cmpxchg_w); \ 3753 __ movzx_w(eax, eax); \ 3756 case kWord32Atomic##op##Word32: { \ 3757 ASSEMBLE_ATOMIC_BINOP(inst, mov, cmpxchg); \ 3760 ATOMIC_BINOP_CASE(Add, add)
3761 ATOMIC_BINOP_CASE(Sub, sub)
3762 ATOMIC_BINOP_CASE(And, and_)
3763 ATOMIC_BINOP_CASE(Or, or_)
3764 ATOMIC_BINOP_CASE(Xor, xor_)
3765 #undef ATOMIC_BINOP_CASE 3766 #define ATOMIC_BINOP_CASE(op, instr1, instr2) \ 3767 case kIA32Word32AtomicPair##op: { \ 3768 DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr)); \ 3769 ASSEMBLE_I64ATOMIC_BINOP(instr1, instr2) \ 3772 ATOMIC_BINOP_CASE(Add, add, adc)
3773 ATOMIC_BINOP_CASE(And, and_, and_)
3774 ATOMIC_BINOP_CASE(Or, or_, or_)
3775 ATOMIC_BINOP_CASE(Xor, xor_, xor_)
3776 #undef ATOMIC_BINOP_CASE 3777 case kIA32Word32AtomicPairSub: {
3778 DCHECK(VerifyOutputOfAtomicPairInstr(&
i, instr));
3782 __ mov(eax,
i.MemoryOperand(2));
3783 __ mov(edx,
i.NextMemoryOperand(2));
3786 frame_access_state()->IncreaseSPDelta(1);
3787 i.MoveInstructionOperandToRegister(ebx, instr->InputAt(0));
3788 __ push(
i.InputRegister(1));
3791 __ adc(
i.InputRegister(1), 0);
3792 __ neg(
i.InputRegister(1));
3795 __ adc(
i.InputRegister(1), edx);
3797 __ cmpxchg8b(
i.MemoryOperand(2));
3799 __ pop(
i.InputRegister(1));
3801 frame_access_state()->IncreaseSPDelta(-1);
3802 __ j(not_equal, &binop);
3805 case kWord32AtomicLoadInt8:
3806 case kWord32AtomicLoadUint8:
3807 case kWord32AtomicLoadInt16:
3808 case kWord32AtomicLoadUint16:
3809 case kWord32AtomicLoadWord32:
3810 case kWord32AtomicStoreWord8:
3811 case kWord32AtomicStoreWord16:
3812 case kWord32AtomicStoreWord32:
3819 static Condition FlagsConditionToCondition(FlagsCondition condition) {
3820 switch (condition) {
3821 case kUnorderedEqual:
3825 case kUnorderedNotEqual:
3829 case kSignedLessThan:
3832 case kSignedGreaterThanOrEqual:
3833 return greater_equal;
3835 case kSignedLessThanOrEqual:
3838 case kSignedGreaterThan:
3841 case kUnsignedLessThan:
3844 case kUnsignedGreaterThanOrEqual:
3847 case kUnsignedLessThanOrEqual:
3850 case kUnsignedGreaterThan:
3866 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
3867 Label::Distance flabel_distance =
3868 branch->fallthru ? Label::kNear : Label::kFar;
3869 Label* tlabel = branch->true_label;
3870 Label* flabel = branch->false_label;
3871 if (branch->condition == kUnorderedEqual) {
3872 __ j(parity_even, flabel, flabel_distance);
3873 }
else if (branch->condition == kUnorderedNotEqual) {
3874 __ j(parity_even, tlabel);
3876 __ j(FlagsConditionToCondition(branch->condition), tlabel);
3879 if (!branch->fallthru) __ jmp(flabel);
3882 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
3883 Instruction* instr) {
3888 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
3889 BranchInfo* branch) {
3890 AssembleArchBranch(instr, branch);
3893 void CodeGenerator::AssembleArchJump(RpoNumber target) {
3894 if (!IsNextInAssemblyOrder(target)) __ jmp(GetLabel(target));
3897 void CodeGenerator::AssembleArchTrap(Instruction* instr,
3898 FlagsCondition condition) {
3899 class OutOfLineTrap final :
public OutOfLineCode {
3901 OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
3902 : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
3904 void Generate() final {
3905 IA32OperandConverter
i(gen_, instr_);
3907 static_cast<TrapId
>(
i.InputInt32(instr_->InputCount() - 1));
3908 GenerateCallToTrap(trap_id);
3912 void GenerateCallToTrap(TrapId trap_id) {
3913 if (trap_id == TrapId::kInvalid) {
3916 __ PrepareCallCFunction(0, esi);
3918 ExternalReference::wasm_call_trap_callback_for_testing(), 0);
3919 __ LeaveFrame(StackFrame::WASM_COMPILED);
3920 auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
3921 size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
3923 __ Ret(static_cast<int>(pop_size), ecx);
3925 gen_->AssembleSourcePosition(instr_);
3929 __ wasm_call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
3930 ReferenceMap* reference_map =
3931 new (gen_->zone()) ReferenceMap(gen_->zone());
3932 gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3933 Safepoint::kNoLazyDeopt);
3934 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
3938 Instruction* instr_;
3939 CodeGenerator* gen_;
3941 auto ool =
new (zone()) OutOfLineTrap(
this, instr);
3942 Label* tlabel = ool->entry();
3944 if (condition == kUnorderedEqual) {
3945 __ j(parity_even, &end);
3946 }
else if (condition == kUnorderedNotEqual) {
3947 __ j(parity_even, tlabel);
3949 __ j(FlagsConditionToCondition(condition), tlabel);
3954 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
3955 FlagsCondition condition) {
3956 IA32OperandConverter
i(
this, instr);
3962 DCHECK_NE(0u, instr->OutputCount());
3963 Register reg =
i.OutputRegister(instr->OutputCount() - 1);
3964 if (condition == kUnorderedEqual) {
3965 __ j(parity_odd, &check, Label::kNear);
3966 __ Move(reg, Immediate(0));
3967 __ jmp(&done, Label::kNear);
3968 }
else if (condition == kUnorderedNotEqual) {
3969 __ j(parity_odd, &check, Label::kNear);
3970 __ mov(reg, Immediate(1));
3971 __ jmp(&done, Label::kNear);
3973 Condition cc = FlagsConditionToCondition(condition);
3976 if (reg.is_byte_register()) {
3979 __ movzx_b(reg, reg);
3983 __ j(cc, &
set, Label::kNear);
3984 __ Move(reg, Immediate(0));
3985 __ jmp(&done, Label::kNear);
3987 __ mov(reg, Immediate(1));
3992 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
3993 IA32OperandConverter
i(
this, instr);
3994 Register input =
i.InputRegister(0);
3995 std::vector<std::pair<int32_t, Label*>> cases;
3996 for (
size_t index = 2; index < instr->InputCount(); index += 2) {
3997 cases.push_back({
i.InputInt32(index + 0), GetLabel(
i.InputRpo(index + 1))});
3999 AssembleArchBinarySearchSwitchRange(input,
i.InputRpo(1), cases.data(),
4000 cases.data() + cases.size());
4003 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
4004 IA32OperandConverter
i(
this, instr);
4005 Register input =
i.InputRegister(0);
4006 for (
size_t index = 2; index < instr->InputCount(); index += 2) {
4007 __ cmp(input, Immediate(
i.InputInt32(index + 0)));
4008 __ j(equal, GetLabel(
i.InputRpo(index + 1)));
4010 AssembleArchJump(
i.InputRpo(1));
4013 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
4014 IA32OperandConverter
i(
this, instr);
4015 Register input =
i.InputRegister(0);
4016 size_t const case_count = instr->InputCount() - 2;
4017 Label** cases = zone()->NewArray<Label*>(case_count);
4018 for (
size_t index = 0; index < case_count; ++index) {
4019 cases[index] = GetLabel(
i.InputRpo(index + 2));
4021 Label*
const table = AddJumpTable(cases, case_count);
4022 __ cmp(input, Immediate(case_count));
4023 __ j(above_equal, GetLabel(
i.InputRpo(1)));
4024 __ jmp(Operand::JumpTable(input, times_4, table));
4153 void CodeGenerator::FinishFrame(Frame* frame) {
4154 auto call_descriptor = linkage()->GetIncomingDescriptor();
4155 const RegList saves = call_descriptor->CalleeSavedRegisters();
4157 DCHECK(!info()->is_osr());
4159 for (
int i = Register::kNumRegisters - 1;
i >= 0;
i--) {
4160 if (!((1 <<
i) & saves))
continue;
4163 frame->AllocateSavedCalleeRegisterSlots(pushed);
4167 void CodeGenerator::AssembleConstructFrame() {
4168 auto call_descriptor = linkage()->GetIncomingDescriptor();
4169 if (frame_access_state()->has_frame()) {
4170 if (call_descriptor->IsCFunctionCall()) {
4173 }
else if (call_descriptor->IsJSFunctionCall()) {
4175 if (call_descriptor->PushArgumentCount()) {
4176 __ push(kJavaScriptCallArgCountRegister);
4179 __ StubPrologue(info()->GetOutputStackFrameType());
4180 if (call_descriptor->IsWasmFunctionCall()) {
4181 __ push(kWasmInstanceRegister);
4182 }
else if (call_descriptor->IsWasmImportWrapper()) {
4187 __ mov(kJSFunctionRegister,
4188 Operand(kWasmInstanceRegister,
4189 Tuple2::kValue2Offset - kHeapObjectTag));
4190 __ mov(kWasmInstanceRegister,
4191 Operand(kWasmInstanceRegister,
4192 Tuple2::kValue1Offset - kHeapObjectTag));
4193 __ push(kWasmInstanceRegister);
4198 int shrink_slots = frame()->GetTotalFrameSlotCount() -
4199 call_descriptor->CalculateFixedFrameSize();
4201 if (info()->is_osr()) {
4203 __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
4209 if (FLAG_code_comments) __ RecordComment(
"-- OSR entrypoint --");
4210 osr_pc_offset_ = __ pc_offset();
4211 shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
4214 const RegList saves = call_descriptor->CalleeSavedRegisters();
4215 if (shrink_slots > 0) {
4216 DCHECK(frame_access_state()->has_frame());
4217 if (info()->IsWasm() && shrink_slots > 128) {
4227 if (shrink_slots * kPointerSize < FLAG_stack_size * 1024) {
4228 Register scratch = esi;
4231 FieldOperand(kWasmInstanceRegister,
4232 WasmInstanceObject::kRealStackLimitAddressOffset));
4233 __ mov(scratch, Operand(scratch, 0));
4234 __ add(scratch, Immediate(shrink_slots * kPointerSize));
4235 __ cmp(esp, scratch);
4237 __ j(above_equal, &done);
4239 __ mov(ecx, FieldOperand(kWasmInstanceRegister,
4240 WasmInstanceObject::kCEntryStubOffset));
4241 __ Move(esi, Smi::zero());
4242 __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, ecx);
4243 ReferenceMap* reference_map =
new (zone()) ReferenceMap(zone());
4244 RecordSafepoint(reference_map, Safepoint::kSimple, 0,
4245 Safepoint::kNoLazyDeopt);
4246 __ AssertUnreachable(AbortReason::kUnexpectedReturnFromWasmTrap);
4251 shrink_slots -= base::bits::CountPopulation(saves);
4252 shrink_slots -= frame()->GetReturnSlotCount();
4253 if (shrink_slots > 0) {
4254 __ sub(esp, Immediate(shrink_slots * kPointerSize));
4259 DCHECK(!info()->is_osr());
4260 for (
int i = Register::kNumRegisters - 1;
i >= 0;
i--) {
4261 if (((1 <<
i) & saves)) __ push(Register::from_code(
i));
4266 if (frame()->GetReturnSlotCount() > 0) {
4267 __ sub(esp, Immediate(frame()->GetReturnSlotCount() * kPointerSize));
4271 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
4272 auto call_descriptor = linkage()->GetIncomingDescriptor();
4274 const RegList saves = call_descriptor->CalleeSavedRegisters();
4277 const int returns = frame()->GetReturnSlotCount();
4279 __ add(esp, Immediate(returns * kPointerSize));
4281 for (
int i = 0;
i < Register::kNumRegisters;
i++) {
4282 if (!((1 <<
i) & saves))
continue;
4283 __ pop(Register::from_code(
i));
4289 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4290 size_t pop_size = call_descriptor->StackParameterCount() * kPointerSize;
4291 IA32OperandConverter g(
this,
nullptr);
4292 if (call_descriptor->IsCFunctionCall()) {
4293 AssembleDeconstructFrame();
4294 }
else if (frame_access_state()->has_frame()) {
4297 if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
4298 if (return_label_.is_bound()) {
4299 __ jmp(&return_label_);
4302 __ bind(&return_label_);
4303 AssembleDeconstructFrame();
4306 AssembleDeconstructFrame();
4309 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & edx.bit());
4310 DCHECK_EQ(0u, call_descriptor->CalleeSavedRegisters() & ecx.bit());
4311 if (pop->IsImmediate()) {
4312 DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
4313 pop_size += g.ToConstant(pop).ToInt32() * kPointerSize;
4314 __ Ret(static_cast<int>(pop_size), ecx);
4316 Register pop_reg = g.ToRegister(pop);
4317 Register scratch_reg = pop_reg == ecx ? edx : ecx;
4318 __ pop(scratch_reg);
4319 __ lea(esp, Operand(esp, pop_reg, times_4, static_cast<int>(pop_size)));
4320 __ jmp(scratch_reg);
4324 void CodeGenerator::FinishCode() {}
4326 void CodeGenerator::AssembleMove(InstructionOperand* source,
4327 InstructionOperand* destination) {
4328 IA32OperandConverter g(
this,
nullptr);
4330 switch (MoveType::InferMove(source, destination)) {
4331 case MoveType::kRegisterToRegister:
4332 if (source->IsRegister()) {
4333 __ mov(g.ToRegister(destination), g.ToRegister(source));
4335 DCHECK(source->IsFPRegister());
4336 __ movaps(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
4339 case MoveType::kRegisterToStack: {
4340 Operand dst = g.ToOperand(destination);
4341 if (source->IsRegister()) {
4342 __ mov(dst, g.ToRegister(source));
4344 DCHECK(source->IsFPRegister());
4345 XMMRegister src = g.ToDoubleRegister(source);
4346 MachineRepresentation rep =
4347 LocationOperand::cast(source)->representation();
4348 if (rep == MachineRepresentation::kFloat32) {
4350 }
else if (rep == MachineRepresentation::kFloat64) {
4353 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4354 __ movups(dst, src);
4359 case MoveType::kStackToRegister: {
4360 Operand src = g.ToOperand(source);
4361 if (source->IsStackSlot()) {
4362 __ mov(g.ToRegister(destination), src);
4364 DCHECK(source->IsFPStackSlot());
4365 XMMRegister dst = g.ToDoubleRegister(destination);
4366 MachineRepresentation rep =
4367 LocationOperand::cast(source)->representation();
4368 if (rep == MachineRepresentation::kFloat32) {
4370 }
else if (rep == MachineRepresentation::kFloat64) {
4373 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4374 __ movups(dst, src);
4379 case MoveType::kStackToStack: {
4380 Operand src = g.ToOperand(source);
4381 Operand dst = g.ToOperand(destination);
4382 if (source->IsStackSlot()) {
4386 MachineRepresentation rep =
4387 LocationOperand::cast(source)->representation();
4388 if (rep == MachineRepresentation::kFloat32) {
4389 __ movss(kScratchDoubleReg, src);
4390 __ movss(dst, kScratchDoubleReg);
4391 }
else if (rep == MachineRepresentation::kFloat64) {
4392 __ movsd(kScratchDoubleReg, src);
4393 __ movsd(dst, kScratchDoubleReg);
4395 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4396 __ movups(kScratchDoubleReg, src);
4397 __ movups(dst, kScratchDoubleReg);
4402 case MoveType::kConstantToRegister: {
4403 Constant src = g.ToConstant(source);
4404 if (destination->IsRegister()) {
4405 Register dst = g.ToRegister(destination);
4406 if (src.type() == Constant::kHeapObject) {
4407 __ Move(dst, src.ToHeapObject());
4409 __ Move(dst, g.ToImmediate(source));
4412 DCHECK(destination->IsFPRegister());
4413 XMMRegister dst = g.ToDoubleRegister(destination);
4414 if (src.type() == Constant::kFloat32) {
4416 __ Move(dst, src.ToFloat32AsInt());
4418 DCHECK_EQ(src.type(), Constant::kFloat64);
4419 __ Move(dst, src.ToFloat64().AsUint64());
4424 case MoveType::kConstantToStack: {
4425 Constant src = g.ToConstant(source);
4426 Operand dst = g.ToOperand(destination);
4427 if (destination->IsStackSlot()) {
4428 __ Move(dst, g.ToImmediate(source));
4430 DCHECK(destination->IsFPStackSlot());
4431 if (src.type() == Constant::kFloat32) {
4432 __ Move(dst, Immediate(src.ToFloat32AsInt()));
4434 DCHECK_EQ(src.type(), Constant::kFloat64);
4435 uint64_t constant_value = src.ToFloat64().AsUint64();
4439 Operand dst1 = g.ToOperand(destination, kPointerSize);
4440 __ Move(dst0, Immediate(lower));
4441 __ Move(dst1, Immediate(upper));
4450 void CodeGenerator::AssembleSwap(InstructionOperand* source,
4451 InstructionOperand* destination) {
4452 IA32OperandConverter g(
this,
nullptr);
4455 switch (MoveType::InferSwap(source, destination)) {
4456 case MoveType::kRegisterToRegister: {
4457 if (source->IsRegister()) {
4458 Register src = g.ToRegister(source);
4459 Register dst = g.ToRegister(destination);
4464 DCHECK(source->IsFPRegister());
4465 XMMRegister src = g.ToDoubleRegister(source);
4466 XMMRegister dst = g.ToDoubleRegister(destination);
4467 __ movaps(kScratchDoubleReg, src);
4468 __ movaps(src, dst);
4469 __ movaps(dst, kScratchDoubleReg);
4473 case MoveType::kRegisterToStack: {
4474 if (source->IsRegister()) {
4475 Register src = g.ToRegister(source);
4477 frame_access_state()->IncreaseSPDelta(1);
4478 Operand dst = g.ToOperand(destination);
4480 frame_access_state()->IncreaseSPDelta(-1);
4481 dst = g.ToOperand(destination);
4484 DCHECK(source->IsFPRegister());
4485 XMMRegister src = g.ToDoubleRegister(source);
4486 Operand dst = g.ToOperand(destination);
4487 MachineRepresentation rep =
4488 LocationOperand::cast(source)->representation();
4489 if (rep == MachineRepresentation::kFloat32) {
4490 __ movss(kScratchDoubleReg, dst);
4492 __ movaps(src, kScratchDoubleReg);
4493 }
else if (rep == MachineRepresentation::kFloat64) {
4494 __ movsd(kScratchDoubleReg, dst);
4496 __ movaps(src, kScratchDoubleReg);
4498 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4499 __ movups(kScratchDoubleReg, dst);
4500 __ movups(dst, src);
4501 __ movups(src, kScratchDoubleReg);
4506 case MoveType::kStackToStack: {
4507 if (source->IsStackSlot()) {
4508 Operand dst1 = g.ToOperand(destination);
4510 frame_access_state()->IncreaseSPDelta(1);
4511 Operand src1 = g.ToOperand(source);
4513 Operand dst2 = g.ToOperand(destination);
4515 frame_access_state()->IncreaseSPDelta(-1);
4516 Operand src2 = g.ToOperand(source);
4519 DCHECK(source->IsFPStackSlot());
4520 Operand src0 = g.ToOperand(source);
4521 Operand dst0 = g.ToOperand(destination);
4522 MachineRepresentation rep =
4523 LocationOperand::cast(source)->representation();
4524 if (rep == MachineRepresentation::kFloat32) {
4525 __ movss(kScratchDoubleReg, dst0);
4528 __ movss(src0, kScratchDoubleReg);
4529 }
else if (rep == MachineRepresentation::kFloat64) {
4530 __ movsd(kScratchDoubleReg, dst0);
4533 __ push(g.ToOperand(source, kPointerSize));
4534 __ pop(g.ToOperand(destination, kPointerSize));
4535 __ movsd(src0, kScratchDoubleReg);
4537 DCHECK_EQ(MachineRepresentation::kSimd128, rep);
4538 __ movups(kScratchDoubleReg, dst0);
4541 __ push(g.ToOperand(source, kPointerSize));
4542 __ pop(g.ToOperand(destination, kPointerSize));
4543 __ push(g.ToOperand(source, 2 * kPointerSize));
4544 __ pop(g.ToOperand(destination, 2 * kPointerSize));
4545 __ push(g.ToOperand(source, 3 * kPointerSize));
4546 __ pop(g.ToOperand(destination, 3 * kPointerSize));
4547 __ movups(src0, kScratchDoubleReg);
4558 void CodeGenerator::AssembleJumpTable(Label** targets,
size_t target_count) {
4559 for (
size_t index = 0; index < target_count; ++index) {
4560 __ dd(targets[index]);
4565 #undef kScratchDoubleReg 4566 #undef ASSEMBLE_COMPARE 4567 #undef ASSEMBLE_IEEE754_BINOP 4568 #undef ASSEMBLE_IEEE754_UNOP 4569 #undef ASSEMBLE_BINOP 4570 #undef ASSEMBLE_ATOMIC_BINOP 4571 #undef ASSEMBLE_I64ATOMIC_BINOP 4572 #undef ASSEMBLE_MOVX 4573 #undef ASSEMBLE_SIMD_PUNPCK_SHUFFLE 4574 #undef ASSEMBLE_SIMD_IMM_SHUFFLE