V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
code-generator-arm.cc
1 // Copyright 2014 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/compiler/backend/code-generator.h"
6 
7 #include "src/assembler-inl.h"
8 #include "src/boxed-float.h"
9 #include "src/compiler/backend/code-generator-impl.h"
10 #include "src/compiler/backend/gap-resolver.h"
11 #include "src/compiler/node-matchers.h"
12 #include "src/compiler/osr.h"
13 #include "src/double.h"
14 #include "src/heap/heap-inl.h" // crbug.com/v8/8499
15 #include "src/macro-assembler.h"
16 #include "src/optimized-compilation-info.h"
17 #include "src/wasm/wasm-code-manager.h"
18 #include "src/wasm/wasm-objects.h"
19 
20 namespace v8 {
21 namespace internal {
22 namespace compiler {
23 
24 #define __ tasm()->
25 
26 // Adds Arm-specific methods to convert InstructionOperands.
28  public:
30  : InstructionOperandConverter(gen, instr) {}
31 
32  SBit OutputSBit() const {
33  switch (instr_->flags_mode()) {
34  case kFlags_branch:
35  case kFlags_branch_and_poison:
36  case kFlags_deoptimize:
37  case kFlags_deoptimize_and_poison:
38  case kFlags_set:
39  case kFlags_trap:
40  return SetCC;
41  case kFlags_none:
42  return LeaveCC;
43  }
44  UNREACHABLE();
45  }
46 
47  Operand InputImmediate(size_t index) {
48  return ToImmediate(instr_->InputAt(index));
49  }
50 
51  Operand InputOperand2(size_t first_index) {
52  const size_t index = first_index;
53  switch (AddressingModeField::decode(instr_->opcode())) {
54  case kMode_None:
55  case kMode_Offset_RI:
56  case kMode_Offset_RR:
57  break;
58  case kMode_Operand2_I:
59  return InputImmediate(index + 0);
60  case kMode_Operand2_R:
61  return Operand(InputRegister(index + 0));
62  case kMode_Operand2_R_ASR_I:
63  return Operand(InputRegister(index + 0), ASR, InputInt5(index + 1));
64  case kMode_Operand2_R_ASR_R:
65  return Operand(InputRegister(index + 0), ASR, InputRegister(index + 1));
66  case kMode_Operand2_R_LSL_I:
67  return Operand(InputRegister(index + 0), LSL, InputInt5(index + 1));
68  case kMode_Operand2_R_LSL_R:
69  return Operand(InputRegister(index + 0), LSL, InputRegister(index + 1));
70  case kMode_Operand2_R_LSR_I:
71  return Operand(InputRegister(index + 0), LSR, InputInt5(index + 1));
72  case kMode_Operand2_R_LSR_R:
73  return Operand(InputRegister(index + 0), LSR, InputRegister(index + 1));
74  case kMode_Operand2_R_ROR_I:
75  return Operand(InputRegister(index + 0), ROR, InputInt5(index + 1));
76  case kMode_Operand2_R_ROR_R:
77  return Operand(InputRegister(index + 0), ROR, InputRegister(index + 1));
78  }
79  UNREACHABLE();
80  }
81 
82  MemOperand InputOffset(size_t* first_index) {
83  const size_t index = *first_index;
84  switch (AddressingModeField::decode(instr_->opcode())) {
85  case kMode_None:
86  case kMode_Operand2_I:
87  case kMode_Operand2_R:
88  case kMode_Operand2_R_ASR_I:
89  case kMode_Operand2_R_ASR_R:
90  case kMode_Operand2_R_LSL_R:
91  case kMode_Operand2_R_LSR_I:
92  case kMode_Operand2_R_LSR_R:
93  case kMode_Operand2_R_ROR_I:
94  case kMode_Operand2_R_ROR_R:
95  break;
96  case kMode_Operand2_R_LSL_I:
97  *first_index += 3;
98  return MemOperand(InputRegister(index + 0), InputRegister(index + 1),
99  LSL, InputInt32(index + 2));
100  case kMode_Offset_RI:
101  *first_index += 2;
102  return MemOperand(InputRegister(index + 0), InputInt32(index + 1));
103  case kMode_Offset_RR:
104  *first_index += 2;
105  return MemOperand(InputRegister(index + 0), InputRegister(index + 1));
106  }
107  UNREACHABLE();
108  }
109 
110  MemOperand InputOffset(size_t first_index = 0) {
111  return InputOffset(&first_index);
112  }
113 
114  Operand ToImmediate(InstructionOperand* operand) {
115  Constant constant = ToConstant(operand);
116  switch (constant.type()) {
117  case Constant::kInt32:
118  if (RelocInfo::IsWasmReference(constant.rmode())) {
119  return Operand(constant.ToInt32(), constant.rmode());
120  } else {
121  return Operand(constant.ToInt32());
122  }
123  case Constant::kFloat32:
124  return Operand::EmbeddedNumber(constant.ToFloat32());
125  case Constant::kFloat64:
126  return Operand::EmbeddedNumber(constant.ToFloat64().value());
127  case Constant::kExternalReference:
128  return Operand(constant.ToExternalReference());
129  case Constant::kDelayedStringConstant:
130  return Operand::EmbeddedStringConstant(
131  constant.ToDelayedStringConstant());
132  case Constant::kInt64:
133  case Constant::kHeapObject:
134  // TODO(dcarney): loading RPO constants on arm.
135  case Constant::kRpoNumber:
136  break;
137  }
138  UNREACHABLE();
139  }
140 
141  MemOperand ToMemOperand(InstructionOperand* op) const {
142  DCHECK_NOT_NULL(op);
143  DCHECK(op->IsStackSlot() || op->IsFPStackSlot());
144  return SlotToMemOperand(AllocatedOperand::cast(op)->index());
145  }
146 
147  MemOperand SlotToMemOperand(int slot) const {
148  FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
149  return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
150  }
151 
152  NeonMemOperand NeonInputOperand(size_t first_index) {
153  const size_t index = first_index;
154  switch (AddressingModeField::decode(instr_->opcode())) {
155  case kMode_Offset_RR:
156  return NeonMemOperand(InputRegister(index + 0),
157  InputRegister(index + 1));
158  case kMode_Operand2_R:
159  return NeonMemOperand(InputRegister(index + 0));
160  default:
161  break;
162  }
163  UNREACHABLE();
164  }
165 };
166 
167 namespace {
168 
169 class OutOfLineRecordWrite final : public OutOfLineCode {
170  public:
171  OutOfLineRecordWrite(CodeGenerator* gen, Register object, Register index,
172  Register value, Register scratch0, Register scratch1,
173  RecordWriteMode mode, StubCallMode stub_mode,
174  UnwindingInfoWriter* unwinding_info_writer)
175  : OutOfLineCode(gen),
176  object_(object),
177  index_(index),
178  index_immediate_(0),
179  value_(value),
180  scratch0_(scratch0),
181  scratch1_(scratch1),
182  mode_(mode),
183  stub_mode_(stub_mode),
184  must_save_lr_(!gen->frame_access_state()->has_frame()),
185  unwinding_info_writer_(unwinding_info_writer),
186  zone_(gen->zone()) {}
187 
188  OutOfLineRecordWrite(CodeGenerator* gen, Register object, int32_t index,
189  Register value, Register scratch0, Register scratch1,
190  RecordWriteMode mode, StubCallMode stub_mode,
191  UnwindingInfoWriter* unwinding_info_writer)
192  : OutOfLineCode(gen),
193  object_(object),
194  index_(no_reg),
195  index_immediate_(index),
196  value_(value),
197  scratch0_(scratch0),
198  scratch1_(scratch1),
199  mode_(mode),
200  stub_mode_(stub_mode),
201  must_save_lr_(!gen->frame_access_state()->has_frame()),
202  unwinding_info_writer_(unwinding_info_writer),
203  zone_(gen->zone()) {}
204 
205  void Generate() final {
206  if (mode_ > RecordWriteMode::kValueIsPointer) {
207  __ JumpIfSmi(value_, exit());
208  }
209  __ CheckPageFlag(value_, scratch0_,
210  MemoryChunk::kPointersToHereAreInterestingMask, eq,
211  exit());
212  if (index_ == no_reg) {
213  __ add(scratch1_, object_, Operand(index_immediate_));
214  } else {
215  DCHECK_EQ(0, index_immediate_);
216  __ add(scratch1_, object_, Operand(index_));
217  }
218  RememberedSetAction const remembered_set_action =
219  mode_ > RecordWriteMode::kValueIsMap ? EMIT_REMEMBERED_SET
220  : OMIT_REMEMBERED_SET;
221  SaveFPRegsMode const save_fp_mode =
222  frame()->DidAllocateDoubleRegisters() ? kSaveFPRegs : kDontSaveFPRegs;
223  if (must_save_lr_) {
224  // We need to save and restore lr if the frame was elided.
225  __ Push(lr);
226  unwinding_info_writer_->MarkLinkRegisterOnTopOfStack(__ pc_offset());
227  }
228  if (stub_mode_ == StubCallMode::kCallWasmRuntimeStub) {
229  __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
230  save_fp_mode, wasm::WasmCode::kWasmRecordWrite);
231  } else {
232  __ CallRecordWriteStub(object_, scratch1_, remembered_set_action,
233  save_fp_mode);
234  }
235  if (must_save_lr_) {
236  __ Pop(lr);
237  unwinding_info_writer_->MarkPopLinkRegisterFromTopOfStack(__ pc_offset());
238  }
239  }
240 
241  private:
242  Register const object_;
243  Register const index_;
244  int32_t const index_immediate_; // Valid if index_==no_reg.
245  Register const value_;
246  Register const scratch0_;
247  Register const scratch1_;
248  RecordWriteMode const mode_;
249  StubCallMode stub_mode_;
250  bool must_save_lr_;
251  UnwindingInfoWriter* const unwinding_info_writer_;
252  Zone* zone_;
253 };
254 
255 template <typename T>
256 class OutOfLineFloatMin final : public OutOfLineCode {
257  public:
258  OutOfLineFloatMin(CodeGenerator* gen, T result, T left, T right)
259  : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
260 
261  void Generate() final { __ FloatMinOutOfLine(result_, left_, right_); }
262 
263  private:
264  T const result_;
265  T const left_;
266  T const right_;
267 };
268 typedef OutOfLineFloatMin<SwVfpRegister> OutOfLineFloat32Min;
269 typedef OutOfLineFloatMin<DwVfpRegister> OutOfLineFloat64Min;
270 
271 template <typename T>
272 class OutOfLineFloatMax final : public OutOfLineCode {
273  public:
274  OutOfLineFloatMax(CodeGenerator* gen, T result, T left, T right)
275  : OutOfLineCode(gen), result_(result), left_(left), right_(right) {}
276 
277  void Generate() final { __ FloatMaxOutOfLine(result_, left_, right_); }
278 
279  private:
280  T const result_;
281  T const left_;
282  T const right_;
283 };
284 typedef OutOfLineFloatMax<SwVfpRegister> OutOfLineFloat32Max;
285 typedef OutOfLineFloatMax<DwVfpRegister> OutOfLineFloat64Max;
286 
287 Condition FlagsConditionToCondition(FlagsCondition condition) {
288  switch (condition) {
289  case kEqual:
290  return eq;
291  case kNotEqual:
292  return ne;
293  case kSignedLessThan:
294  return lt;
295  case kSignedGreaterThanOrEqual:
296  return ge;
297  case kSignedLessThanOrEqual:
298  return le;
299  case kSignedGreaterThan:
300  return gt;
301  case kUnsignedLessThan:
302  return lo;
303  case kUnsignedGreaterThanOrEqual:
304  return hs;
305  case kUnsignedLessThanOrEqual:
306  return ls;
307  case kUnsignedGreaterThan:
308  return hi;
309  case kFloatLessThanOrUnordered:
310  return lt;
311  case kFloatGreaterThanOrEqual:
312  return ge;
313  case kFloatLessThanOrEqual:
314  return ls;
315  case kFloatGreaterThanOrUnordered:
316  return hi;
317  case kFloatLessThan:
318  return lo;
319  case kFloatGreaterThanOrEqualOrUnordered:
320  return hs;
321  case kFloatLessThanOrEqualOrUnordered:
322  return le;
323  case kFloatGreaterThan:
324  return gt;
325  case kOverflow:
326  return vs;
327  case kNotOverflow:
328  return vc;
329  case kPositiveOrZero:
330  return pl;
331  case kNegative:
332  return mi;
333  default:
334  break;
335  }
336  UNREACHABLE();
337 }
338 
339 void EmitWordLoadPoisoningIfNeeded(CodeGenerator* codegen,
340  InstructionCode opcode,
341  ArmOperandConverter& i) {
342  const MemoryAccessMode access_mode =
343  static_cast<MemoryAccessMode>(MiscField::decode(opcode));
344  if (access_mode == kMemoryAccessPoisoned) {
345  Register value = i.OutputRegister();
346  codegen->tasm()->and_(value, value, Operand(kSpeculationPoisonRegister));
347  }
348 }
349 
350 void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
351  InstructionCode opcode,
352  ArmOperandConverter& i, Register address) {
353  DCHECK_EQ(kMemoryAccessPoisoned,
354  static_cast<MemoryAccessMode>(MiscField::decode(opcode)));
355  switch (AddressingModeField::decode(opcode)) {
356  case kMode_Offset_RI:
357  codegen->tasm()->mov(address, i.InputImmediate(1));
358  codegen->tasm()->add(address, address, i.InputRegister(0));
359  break;
360  case kMode_Offset_RR:
361  codegen->tasm()->add(address, i.InputRegister(0), i.InputRegister(1));
362  break;
363  default:
364  UNREACHABLE();
365  }
366  codegen->tasm()->and_(address, address, Operand(kSpeculationPoisonRegister));
367 }
368 
369 } // namespace
370 
371 #define ASSEMBLE_ATOMIC_LOAD_INTEGER(asm_instr) \
372  do { \
373  __ asm_instr(i.OutputRegister(), \
374  MemOperand(i.InputRegister(0), i.InputRegister(1))); \
375  __ dmb(ISH); \
376  } while (0)
377 
378 #define ASSEMBLE_ATOMIC_STORE_INTEGER(asm_instr) \
379  do { \
380  __ dmb(ISH); \
381  __ asm_instr(i.InputRegister(2), \
382  MemOperand(i.InputRegister(0), i.InputRegister(1))); \
383  __ dmb(ISH); \
384  } while (0)
385 
386 #define ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(load_instr, store_instr) \
387  do { \
388  Label exchange; \
389  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
390  __ dmb(ISH); \
391  __ bind(&exchange); \
392  __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
393  __ store_instr(i.TempRegister(0), i.InputRegister(2), i.TempRegister(1)); \
394  __ teq(i.TempRegister(0), Operand(0)); \
395  __ b(ne, &exchange); \
396  __ dmb(ISH); \
397  } while (0)
398 
399 #define ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(load_instr, store_instr, \
400  cmp_reg) \
401  do { \
402  Label compareExchange; \
403  Label exit; \
404  __ dmb(ISH); \
405  __ bind(&compareExchange); \
406  __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
407  __ teq(cmp_reg, Operand(i.OutputRegister(0))); \
408  __ b(ne, &exit); \
409  __ store_instr(i.TempRegister(0), i.InputRegister(3), i.TempRegister(1)); \
410  __ teq(i.TempRegister(0), Operand(0)); \
411  __ b(ne, &compareExchange); \
412  __ bind(&exit); \
413  __ dmb(ISH); \
414  } while (0)
415 
416 #define ASSEMBLE_ATOMIC_BINOP(load_instr, store_instr, bin_instr) \
417  do { \
418  Label binop; \
419  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1)); \
420  __ dmb(ISH); \
421  __ bind(&binop); \
422  __ load_instr(i.OutputRegister(0), i.TempRegister(1)); \
423  __ bin_instr(i.TempRegister(0), i.OutputRegister(0), \
424  Operand(i.InputRegister(2))); \
425  __ store_instr(i.TempRegister(2), i.TempRegister(0), i.TempRegister(1)); \
426  __ teq(i.TempRegister(2), Operand(0)); \
427  __ b(ne, &binop); \
428  __ dmb(ISH); \
429  } while (0)
430 
431 #define ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2) \
432  do { \
433  Label binop; \
434  __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
435  __ dmb(ISH); \
436  __ bind(&binop); \
437  __ ldrexd(r2, r3, i.TempRegister(0)); \
438  __ instr1(i.TempRegister(1), r2, i.InputRegister(0), SBit::SetCC); \
439  __ instr2(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
440  DCHECK_EQ(LeaveCC, i.OutputSBit()); \
441  __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
442  i.TempRegister(0)); \
443  __ teq(i.TempRegister(3), Operand(0)); \
444  __ b(ne, &binop); \
445  __ dmb(ISH); \
446  } while (0)
447 
448 #define ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr) \
449  do { \
450  Label binop; \
451  __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3)); \
452  __ dmb(ISH); \
453  __ bind(&binop); \
454  __ ldrexd(r2, r3, i.TempRegister(0)); \
455  __ instr(i.TempRegister(1), r2, Operand(i.InputRegister(0))); \
456  __ instr(i.TempRegister(2), r3, Operand(i.InputRegister(1))); \
457  __ strexd(i.TempRegister(3), i.TempRegister(1), i.TempRegister(2), \
458  i.TempRegister(0)); \
459  __ teq(i.TempRegister(3), Operand(0)); \
460  __ b(ne, &binop); \
461  __ dmb(ISH); \
462  } while (0)
463 
464 #define ASSEMBLE_IEEE754_BINOP(name) \
465  do { \
466  /* TODO(bmeurer): We should really get rid of this special instruction, */ \
467  /* and generate a CallAddress instruction instead. */ \
468  FrameScope scope(tasm(), StackFrame::MANUAL); \
469  __ PrepareCallCFunction(0, 2); \
470  __ MovToFloatParameters(i.InputDoubleRegister(0), \
471  i.InputDoubleRegister(1)); \
472  __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 2); \
473  /* Move the result in the double result register. */ \
474  __ MovFromFloatResult(i.OutputDoubleRegister()); \
475  DCHECK_EQ(LeaveCC, i.OutputSBit()); \
476  } while (0)
477 
478 #define ASSEMBLE_IEEE754_UNOP(name) \
479  do { \
480  /* TODO(bmeurer): We should really get rid of this special instruction, */ \
481  /* and generate a CallAddress instruction instead. */ \
482  FrameScope scope(tasm(), StackFrame::MANUAL); \
483  __ PrepareCallCFunction(0, 1); \
484  __ MovToFloatParameter(i.InputDoubleRegister(0)); \
485  __ CallCFunction(ExternalReference::ieee754_##name##_function(), 0, 1); \
486  /* Move the result in the double result register. */ \
487  __ MovFromFloatResult(i.OutputDoubleRegister()); \
488  DCHECK_EQ(LeaveCC, i.OutputSBit()); \
489  } while (0)
490 
491 #define ASSEMBLE_NEON_NARROWING_OP(dt) \
492  do { \
493  Simd128Register dst = i.OutputSimd128Register(), \
494  src0 = i.InputSimd128Register(0), \
495  src1 = i.InputSimd128Register(1); \
496  if (dst == src0 && dst == src1) { \
497  __ vqmovn(dt, dst.low(), src0); \
498  __ vmov(dst.high(), dst.low()); \
499  } else if (dst == src0) { \
500  __ vqmovn(dt, dst.low(), src0); \
501  __ vqmovn(dt, dst.high(), src1); \
502  } else { \
503  __ vqmovn(dt, dst.high(), src1); \
504  __ vqmovn(dt, dst.low(), src0); \
505  } \
506  } while (0)
507 
508 #define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \
509  do { \
510  Simd128Register dst = i.OutputSimd128Register(), \
511  src0 = i.InputSimd128Register(0), \
512  src1 = i.InputSimd128Register(1); \
513  if (dst == src0) { \
514  __ op(size, dst.low(), src0.low(), src0.high()); \
515  if (dst == src1) { \
516  __ vmov(dst.high(), dst.low()); \
517  } else { \
518  __ op(size, dst.high(), src1.low(), src1.high()); \
519  } \
520  } else { \
521  __ op(size, dst.high(), src1.low(), src1.high()); \
522  __ op(size, dst.low(), src0.low(), src0.high()); \
523  } \
524  } while (0)
525 
526 void CodeGenerator::AssembleDeconstructFrame() {
527  __ LeaveFrame(StackFrame::MANUAL);
528  unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());
529 }
530 
531 void CodeGenerator::AssemblePrepareTailCall() {
532  if (frame_access_state()->has_frame()) {
533  __ ldr(lr, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
534  __ ldr(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
535  }
536  frame_access_state()->SetFrameAccessToSP();
537 }
538 
539 void CodeGenerator::AssemblePopArgumentsAdaptorFrame(Register args_reg,
540  Register scratch1,
541  Register scratch2,
542  Register scratch3) {
543  DCHECK(!AreAliased(args_reg, scratch1, scratch2, scratch3));
544  Label done;
545 
546  // Check if current frame is an arguments adaptor frame.
547  __ ldr(scratch1, MemOperand(fp, StandardFrameConstants::kContextOffset));
548  __ cmp(scratch1,
549  Operand(StackFrame::TypeToMarker(StackFrame::ARGUMENTS_ADAPTOR)));
550  __ b(ne, &done);
551 
552  // Load arguments count from current arguments adaptor frame (note, it
553  // does not include receiver).
554  Register caller_args_count_reg = scratch1;
555  __ ldr(caller_args_count_reg,
556  MemOperand(fp, ArgumentsAdaptorFrameConstants::kLengthOffset));
557  __ SmiUntag(caller_args_count_reg);
558 
559  ParameterCount callee_args_count(args_reg);
560  __ PrepareForTailCall(callee_args_count, caller_args_count_reg, scratch2,
561  scratch3);
562  __ bind(&done);
563 }
564 
565 namespace {
566 
567 void FlushPendingPushRegisters(TurboAssembler* tasm,
568  FrameAccessState* frame_access_state,
569  ZoneVector<Register>* pending_pushes) {
570  switch (pending_pushes->size()) {
571  case 0:
572  break;
573  case 1:
574  tasm->push((*pending_pushes)[0]);
575  break;
576  case 2:
577  tasm->Push((*pending_pushes)[0], (*pending_pushes)[1]);
578  break;
579  case 3:
580  tasm->Push((*pending_pushes)[0], (*pending_pushes)[1],
581  (*pending_pushes)[2]);
582  break;
583  default:
584  UNREACHABLE();
585  break;
586  }
587  frame_access_state->IncreaseSPDelta(pending_pushes->size());
588  pending_pushes->clear();
589 }
590 
591 void AdjustStackPointerForTailCall(
592  TurboAssembler* tasm, FrameAccessState* state, int new_slot_above_sp,
593  ZoneVector<Register>* pending_pushes = nullptr,
594  bool allow_shrinkage = true) {
595  int current_sp_offset = state->GetSPToFPSlotCount() +
596  StandardFrameConstants::kFixedSlotCountAboveFp;
597  int stack_slot_delta = new_slot_above_sp - current_sp_offset;
598  if (stack_slot_delta > 0) {
599  if (pending_pushes != nullptr) {
600  FlushPendingPushRegisters(tasm, state, pending_pushes);
601  }
602  tasm->sub(sp, sp, Operand(stack_slot_delta * kPointerSize));
603  state->IncreaseSPDelta(stack_slot_delta);
604  } else if (allow_shrinkage && stack_slot_delta < 0) {
605  if (pending_pushes != nullptr) {
606  FlushPendingPushRegisters(tasm, state, pending_pushes);
607  }
608  tasm->add(sp, sp, Operand(-stack_slot_delta * kPointerSize));
609  state->IncreaseSPDelta(stack_slot_delta);
610  }
611 }
612 
613 #if DEBUG
614 bool VerifyOutputOfAtomicPairInstr(ArmOperandConverter* converter,
615  const Instruction* instr, Register low,
616  Register high) {
617  if (instr->OutputCount() > 0) {
618  if (converter->OutputRegister(0) != low) return false;
619  if (instr->OutputCount() == 2 && converter->OutputRegister(1) != high)
620  return false;
621  }
622  return true;
623 }
624 #endif
625 
626 } // namespace
627 
628 void CodeGenerator::AssembleTailCallBeforeGap(Instruction* instr,
629  int first_unused_stack_slot) {
630  ZoneVector<MoveOperands*> pushes(zone());
631  GetPushCompatibleMoves(instr, kRegisterPush, &pushes);
632 
633  if (!pushes.empty() &&
634  (LocationOperand::cast(pushes.back()->destination()).index() + 1 ==
635  first_unused_stack_slot)) {
636  ArmOperandConverter g(this, instr);
637  ZoneVector<Register> pending_pushes(zone());
638  for (auto move : pushes) {
639  LocationOperand destination_location(
640  LocationOperand::cast(move->destination()));
641  InstructionOperand source(move->source());
642  AdjustStackPointerForTailCall(
643  tasm(), frame_access_state(),
644  destination_location.index() - pending_pushes.size(),
645  &pending_pushes);
646  // Pushes of non-register data types are not supported.
647  DCHECK(source.IsRegister());
648  LocationOperand source_location(LocationOperand::cast(source));
649  pending_pushes.push_back(source_location.GetRegister());
650  // TODO(arm): We can push more than 3 registers at once. Add support in
651  // the macro-assembler for pushing a list of registers.
652  if (pending_pushes.size() == 3) {
653  FlushPendingPushRegisters(tasm(), frame_access_state(),
654  &pending_pushes);
655  }
656  move->Eliminate();
657  }
658  FlushPendingPushRegisters(tasm(), frame_access_state(), &pending_pushes);
659  }
660  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
661  first_unused_stack_slot, nullptr, false);
662 }
663 
664 void CodeGenerator::AssembleTailCallAfterGap(Instruction* instr,
665  int first_unused_stack_slot) {
666  AdjustStackPointerForTailCall(tasm(), frame_access_state(),
667  first_unused_stack_slot);
668 }
669 
670 // Check that {kJavaScriptCallCodeStartRegister} is correct.
671 void CodeGenerator::AssembleCodeStartRegisterCheck() {
672  UseScratchRegisterScope temps(tasm());
673  Register scratch = temps.Acquire();
674  __ ComputeCodeStartAddress(scratch);
675  __ cmp(scratch, kJavaScriptCallCodeStartRegister);
676  __ Assert(eq, AbortReason::kWrongFunctionCodeStart);
677 }
678 
679 // Check if the code object is marked for deoptimization. If it is, then it
680 // jumps to the CompileLazyDeoptimizedCode builtin. In order to do this we need
681 // to:
682 // 1. read from memory the word that contains that bit, which can be found in
683 // the flags in the referenced {CodeDataContainer} object;
684 // 2. test kMarkedForDeoptimizationBit in those flags; and
685 // 3. if it is not zero then it jumps to the builtin.
686 void CodeGenerator::BailoutIfDeoptimized() {
687  UseScratchRegisterScope temps(tasm());
688  Register scratch = temps.Acquire();
689  int offset = Code::kCodeDataContainerOffset - Code::kHeaderSize;
690  __ ldr(scratch, MemOperand(kJavaScriptCallCodeStartRegister, offset));
691  __ ldr(scratch,
692  FieldMemOperand(scratch, CodeDataContainer::kKindSpecificFlagsOffset));
693  __ tst(scratch, Operand(1 << Code::kMarkedForDeoptimizationBit));
694  // Ensure we're not serializing (otherwise we'd need to use an indirection to
695  // access the builtin below).
696  DCHECK(!isolate()->ShouldLoadConstantsFromRootList());
697  Handle<Code> code = isolate()->builtins()->builtin_handle(
698  Builtins::kCompileLazyDeoptimizedCode);
699  __ Jump(code, RelocInfo::CODE_TARGET, ne);
700 }
701 
702 void CodeGenerator::GenerateSpeculationPoisonFromCodeStartRegister() {
703  UseScratchRegisterScope temps(tasm());
704  Register scratch = temps.Acquire();
705 
706  // Set a mask which has all bits set in the normal case, but has all
707  // bits cleared if we are speculatively executing the wrong PC.
708  __ ComputeCodeStartAddress(scratch);
709  __ cmp(kJavaScriptCallCodeStartRegister, scratch);
710  __ mov(kSpeculationPoisonRegister, Operand(-1), SBit::LeaveCC, eq);
711  __ mov(kSpeculationPoisonRegister, Operand(0), SBit::LeaveCC, ne);
712  __ csdb();
713 }
714 
715 void CodeGenerator::AssembleRegisterArgumentPoisoning() {
716  __ and_(kJSFunctionRegister, kJSFunctionRegister, kSpeculationPoisonRegister);
717  __ and_(kContextRegister, kContextRegister, kSpeculationPoisonRegister);
718  __ and_(sp, sp, kSpeculationPoisonRegister);
719 }
720 
721 // Assembles an instruction after register allocation, producing machine code.
722 CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
723  Instruction* instr) {
724  ArmOperandConverter i(this, instr);
725 
726  __ MaybeCheckConstPool();
727  InstructionCode opcode = instr->opcode();
728  ArchOpcode arch_opcode = ArchOpcodeField::decode(opcode);
729  switch (arch_opcode) {
730  case kArchCallCodeObject: {
731  if (instr->InputAt(0)->IsImmediate()) {
732  __ Call(i.InputCode(0), RelocInfo::CODE_TARGET);
733  } else {
734  Register reg = i.InputRegister(0);
735  DCHECK_IMPLIES(
736  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
737  reg == kJavaScriptCallCodeStartRegister);
738  __ add(reg, reg, Operand(Code::kHeaderSize - kHeapObjectTag));
739  __ Call(reg);
740  }
741  RecordCallPosition(instr);
742  DCHECK_EQ(LeaveCC, i.OutputSBit());
743  frame_access_state()->ClearSPDelta();
744  break;
745  }
746  case kArchCallWasmFunction: {
747  if (instr->InputAt(0)->IsImmediate()) {
748  Constant constant = i.ToConstant(instr->InputAt(0));
749  Address wasm_code = static_cast<Address>(constant.ToInt32());
750  __ Call(wasm_code, constant.rmode());
751  } else {
752  __ Call(i.InputRegister(0));
753  }
754  RecordCallPosition(instr);
755  DCHECK_EQ(LeaveCC, i.OutputSBit());
756  frame_access_state()->ClearSPDelta();
757  break;
758  }
759  case kArchTailCallCodeObjectFromJSFunction:
760  case kArchTailCallCodeObject: {
761  if (arch_opcode == kArchTailCallCodeObjectFromJSFunction) {
762  AssemblePopArgumentsAdaptorFrame(kJavaScriptCallArgCountRegister,
763  i.TempRegister(0), i.TempRegister(1),
764  i.TempRegister(2));
765  }
766  if (instr->InputAt(0)->IsImmediate()) {
767  __ Jump(i.InputCode(0), RelocInfo::CODE_TARGET);
768  } else {
769  Register reg = i.InputRegister(0);
770  DCHECK_IMPLIES(
771  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
772  reg == kJavaScriptCallCodeStartRegister);
773  __ add(reg, reg, Operand(Code::kHeaderSize - kHeapObjectTag));
774  __ Jump(reg);
775  }
776  DCHECK_EQ(LeaveCC, i.OutputSBit());
777  unwinding_info_writer_.MarkBlockWillExit();
778  frame_access_state()->ClearSPDelta();
779  frame_access_state()->SetFrameAccessToDefault();
780  break;
781  }
782  case kArchTailCallWasm: {
783  if (instr->InputAt(0)->IsImmediate()) {
784  Constant constant = i.ToConstant(instr->InputAt(0));
785  Address wasm_code = static_cast<Address>(constant.ToInt32());
786  __ Jump(wasm_code, constant.rmode());
787  } else {
788  __ Jump(i.InputRegister(0));
789  }
790  DCHECK_EQ(LeaveCC, i.OutputSBit());
791  unwinding_info_writer_.MarkBlockWillExit();
792  frame_access_state()->ClearSPDelta();
793  frame_access_state()->SetFrameAccessToDefault();
794  break;
795  }
796  case kArchTailCallAddress: {
797  CHECK(!instr->InputAt(0)->IsImmediate());
798  Register reg = i.InputRegister(0);
799  DCHECK_IMPLIES(
800  HasCallDescriptorFlag(instr, CallDescriptor::kFixedTargetRegister),
801  reg == kJavaScriptCallCodeStartRegister);
802  __ Jump(reg);
803  unwinding_info_writer_.MarkBlockWillExit();
804  frame_access_state()->ClearSPDelta();
805  frame_access_state()->SetFrameAccessToDefault();
806  break;
807  }
808  case kArchCallJSFunction: {
809  Register func = i.InputRegister(0);
810  if (FLAG_debug_code) {
811  UseScratchRegisterScope temps(tasm());
812  Register scratch = temps.Acquire();
813  // Check the function's context matches the context argument.
814  __ ldr(scratch, FieldMemOperand(func, JSFunction::kContextOffset));
815  __ cmp(cp, scratch);
816  __ Assert(eq, AbortReason::kWrongFunctionContext);
817  }
818  static_assert(kJavaScriptCallCodeStartRegister == r2, "ABI mismatch");
819  __ ldr(r2, FieldMemOperand(func, JSFunction::kCodeOffset));
820  __ add(r2, r2, Operand(Code::kHeaderSize - kHeapObjectTag));
821  __ Call(r2);
822  RecordCallPosition(instr);
823  DCHECK_EQ(LeaveCC, i.OutputSBit());
824  frame_access_state()->ClearSPDelta();
825  break;
826  }
827  case kArchPrepareCallCFunction: {
828  int const num_parameters = MiscField::decode(instr->opcode());
829  __ PrepareCallCFunction(num_parameters);
830  // Frame alignment requires using FP-relative frame addressing.
831  frame_access_state()->SetFrameAccessToFP();
832  break;
833  }
834  case kArchSaveCallerRegisters: {
835  fp_mode_ =
836  static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode()));
837  DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
838  // kReturnRegister0 should have been saved before entering the stub.
839  int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
840  DCHECK_EQ(0, bytes % kPointerSize);
841  DCHECK_EQ(0, frame_access_state()->sp_delta());
842  frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
843  DCHECK(!caller_registers_saved_);
844  caller_registers_saved_ = true;
845  break;
846  }
847  case kArchRestoreCallerRegisters: {
848  DCHECK(fp_mode_ ==
849  static_cast<SaveFPRegsMode>(MiscField::decode(instr->opcode())));
850  DCHECK(fp_mode_ == kDontSaveFPRegs || fp_mode_ == kSaveFPRegs);
851  // Don't overwrite the returned value.
852  int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
853  frame_access_state()->IncreaseSPDelta(-(bytes / kPointerSize));
854  DCHECK_EQ(0, frame_access_state()->sp_delta());
855  DCHECK(caller_registers_saved_);
856  caller_registers_saved_ = false;
857  break;
858  }
859  case kArchPrepareTailCall:
860  AssemblePrepareTailCall();
861  break;
862  case kArchCallCFunction: {
863  int const num_parameters = MiscField::decode(instr->opcode());
864  if (instr->InputAt(0)->IsImmediate()) {
865  ExternalReference ref = i.InputExternalReference(0);
866  __ CallCFunction(ref, num_parameters);
867  } else {
868  Register func = i.InputRegister(0);
869  __ CallCFunction(func, num_parameters);
870  }
871  frame_access_state()->SetFrameAccessToDefault();
872  // Ideally, we should decrement SP delta to match the change of stack
873  // pointer in CallCFunction. However, for certain architectures (e.g.
874  // ARM), there may be more strict alignment requirement, causing old SP
875  // to be saved on the stack. In those cases, we can not calculate the SP
876  // delta statically.
877  frame_access_state()->ClearSPDelta();
878  if (caller_registers_saved_) {
879  // Need to re-sync SP delta introduced in kArchSaveCallerRegisters.
880  // Here, we assume the sequence to be:
881  // kArchSaveCallerRegisters;
882  // kArchCallCFunction;
883  // kArchRestoreCallerRegisters;
884  int bytes =
885  __ RequiredStackSizeForCallerSaved(fp_mode_, kReturnRegister0);
886  frame_access_state()->IncreaseSPDelta(bytes / kPointerSize);
887  }
888  break;
889  }
890  case kArchJmp:
891  AssembleArchJump(i.InputRpo(0));
892  DCHECK_EQ(LeaveCC, i.OutputSBit());
893  break;
894  case kArchBinarySearchSwitch:
895  AssembleArchBinarySearchSwitch(instr);
896  break;
897  case kArchLookupSwitch:
898  AssembleArchLookupSwitch(instr);
899  DCHECK_EQ(LeaveCC, i.OutputSBit());
900  break;
901  case kArchTableSwitch:
902  AssembleArchTableSwitch(instr);
903  DCHECK_EQ(LeaveCC, i.OutputSBit());
904  break;
905  case kArchDebugAbort:
906  DCHECK(i.InputRegister(0) == r1);
907  if (!frame_access_state()->has_frame()) {
908  // We don't actually want to generate a pile of code for this, so just
909  // claim there is a stack frame, without generating one.
910  FrameScope scope(tasm(), StackFrame::NONE);
911  __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
912  RelocInfo::CODE_TARGET);
913  } else {
914  __ Call(isolate()->builtins()->builtin_handle(Builtins::kAbortJS),
915  RelocInfo::CODE_TARGET);
916  }
917  __ stop("kArchDebugAbort");
918  unwinding_info_writer_.MarkBlockWillExit();
919  break;
920  case kArchDebugBreak:
921  __ stop("kArchDebugBreak");
922  break;
923  case kArchComment:
924  __ RecordComment(reinterpret_cast<const char*>(i.InputInt32(0)));
925  break;
926  case kArchThrowTerminator:
927  DCHECK_EQ(LeaveCC, i.OutputSBit());
928  unwinding_info_writer_.MarkBlockWillExit();
929  break;
930  case kArchNop:
931  // don't emit code for nops.
932  DCHECK_EQ(LeaveCC, i.OutputSBit());
933  break;
934  case kArchDeoptimize: {
935  int deopt_state_id =
936  BuildTranslation(instr, -1, 0, OutputFrameStateCombine::Ignore());
937  CodeGenResult result =
938  AssembleDeoptimizerCall(deopt_state_id, current_source_position_);
939  if (result != kSuccess) return result;
940  unwinding_info_writer_.MarkBlockWillExit();
941  break;
942  }
943  case kArchRet:
944  AssembleReturn(instr->InputAt(0));
945  DCHECK_EQ(LeaveCC, i.OutputSBit());
946  break;
947  case kArchStackPointer:
948  __ mov(i.OutputRegister(), sp);
949  DCHECK_EQ(LeaveCC, i.OutputSBit());
950  break;
951  case kArchFramePointer:
952  __ mov(i.OutputRegister(), fp);
953  DCHECK_EQ(LeaveCC, i.OutputSBit());
954  break;
955  case kArchParentFramePointer:
956  if (frame_access_state()->has_frame()) {
957  __ ldr(i.OutputRegister(), MemOperand(fp, 0));
958  } else {
959  __ mov(i.OutputRegister(), fp);
960  }
961  break;
962  case kArchTruncateDoubleToI:
963  __ TruncateDoubleToI(isolate(), zone(), i.OutputRegister(),
964  i.InputDoubleRegister(0), DetermineStubCallMode());
965  DCHECK_EQ(LeaveCC, i.OutputSBit());
966  break;
967  case kArchStoreWithWriteBarrier: {
968  RecordWriteMode mode =
969  static_cast<RecordWriteMode>(MiscField::decode(instr->opcode()));
970  Register object = i.InputRegister(0);
971  Register value = i.InputRegister(2);
972  Register scratch0 = i.TempRegister(0);
973  Register scratch1 = i.TempRegister(1);
974  OutOfLineRecordWrite* ool;
975 
976  AddressingMode addressing_mode =
977  AddressingModeField::decode(instr->opcode());
978  if (addressing_mode == kMode_Offset_RI) {
979  int32_t index = i.InputInt32(1);
980  ool = new (zone()) OutOfLineRecordWrite(
981  this, object, index, value, scratch0, scratch1, mode,
982  DetermineStubCallMode(), &unwinding_info_writer_);
983  __ str(value, MemOperand(object, index));
984  } else {
985  DCHECK_EQ(kMode_Offset_RR, addressing_mode);
986  Register index(i.InputRegister(1));
987  ool = new (zone()) OutOfLineRecordWrite(
988  this, object, index, value, scratch0, scratch1, mode,
989  DetermineStubCallMode(), &unwinding_info_writer_);
990  __ str(value, MemOperand(object, index));
991  }
992  __ CheckPageFlag(object, scratch0,
993  MemoryChunk::kPointersFromHereAreInterestingMask, ne,
994  ool->entry());
995  __ bind(ool->exit());
996  break;
997  }
998  case kArchStackSlot: {
999  FrameOffset offset =
1000  frame_access_state()->GetFrameOffset(i.InputInt32(0));
1001  Register base = offset.from_stack_pointer() ? sp : fp;
1002  __ add(i.OutputRegister(0), base, Operand(offset.offset()));
1003  break;
1004  }
1005  case kIeee754Float64Acos:
1006  ASSEMBLE_IEEE754_UNOP(acos);
1007  break;
1008  case kIeee754Float64Acosh:
1009  ASSEMBLE_IEEE754_UNOP(acosh);
1010  break;
1011  case kIeee754Float64Asin:
1012  ASSEMBLE_IEEE754_UNOP(asin);
1013  break;
1014  case kIeee754Float64Asinh:
1015  ASSEMBLE_IEEE754_UNOP(asinh);
1016  break;
1017  case kIeee754Float64Atan:
1018  ASSEMBLE_IEEE754_UNOP(atan);
1019  break;
1020  case kIeee754Float64Atanh:
1021  ASSEMBLE_IEEE754_UNOP(atanh);
1022  break;
1023  case kIeee754Float64Atan2:
1024  ASSEMBLE_IEEE754_BINOP(atan2);
1025  break;
1026  case kIeee754Float64Cbrt:
1027  ASSEMBLE_IEEE754_UNOP(cbrt);
1028  break;
1029  case kIeee754Float64Cos:
1030  ASSEMBLE_IEEE754_UNOP(cos);
1031  break;
1032  case kIeee754Float64Cosh:
1033  ASSEMBLE_IEEE754_UNOP(cosh);
1034  break;
1035  case kIeee754Float64Exp:
1036  ASSEMBLE_IEEE754_UNOP(exp);
1037  break;
1038  case kIeee754Float64Expm1:
1039  ASSEMBLE_IEEE754_UNOP(expm1);
1040  break;
1041  case kIeee754Float64Log:
1042  ASSEMBLE_IEEE754_UNOP(log);
1043  break;
1044  case kIeee754Float64Log1p:
1045  ASSEMBLE_IEEE754_UNOP(log1p);
1046  break;
1047  case kIeee754Float64Log2:
1048  ASSEMBLE_IEEE754_UNOP(log2);
1049  break;
1050  case kIeee754Float64Log10:
1051  ASSEMBLE_IEEE754_UNOP(log10);
1052  break;
1053  case kIeee754Float64Pow: {
1054  __ Call(BUILTIN_CODE(isolate(), MathPowInternal), RelocInfo::CODE_TARGET);
1055  __ vmov(d0, d2);
1056  break;
1057  }
1058  case kIeee754Float64Sin:
1059  ASSEMBLE_IEEE754_UNOP(sin);
1060  break;
1061  case kIeee754Float64Sinh:
1062  ASSEMBLE_IEEE754_UNOP(sinh);
1063  break;
1064  case kIeee754Float64Tan:
1065  ASSEMBLE_IEEE754_UNOP(tan);
1066  break;
1067  case kIeee754Float64Tanh:
1068  ASSEMBLE_IEEE754_UNOP(tanh);
1069  break;
1070  case kArmAdd:
1071  __ add(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1072  i.OutputSBit());
1073  break;
1074  case kArmAnd:
1075  __ and_(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1076  i.OutputSBit());
1077  break;
1078  case kArmBic:
1079  __ bic(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1080  i.OutputSBit());
1081  break;
1082  case kArmMul:
1083  __ mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1084  i.OutputSBit());
1085  break;
1086  case kArmMla:
1087  __ mla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1088  i.InputRegister(2), i.OutputSBit());
1089  break;
1090  case kArmMls: {
1091  CpuFeatureScope scope(tasm(), ARMv7);
1092  __ mls(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1093  i.InputRegister(2));
1094  DCHECK_EQ(LeaveCC, i.OutputSBit());
1095  break;
1096  }
1097  case kArmSmull:
1098  __ smull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1099  i.InputRegister(1));
1100  break;
1101  case kArmSmmul:
1102  __ smmul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1103  DCHECK_EQ(LeaveCC, i.OutputSBit());
1104  break;
1105  case kArmSmmla:
1106  __ smmla(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1107  i.InputRegister(2));
1108  DCHECK_EQ(LeaveCC, i.OutputSBit());
1109  break;
1110  case kArmUmull:
1111  __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1112  i.InputRegister(1), i.OutputSBit());
1113  break;
1114  case kArmSdiv: {
1115  CpuFeatureScope scope(tasm(), SUDIV);
1116  __ sdiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1117  DCHECK_EQ(LeaveCC, i.OutputSBit());
1118  break;
1119  }
1120  case kArmUdiv: {
1121  CpuFeatureScope scope(tasm(), SUDIV);
1122  __ udiv(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1));
1123  DCHECK_EQ(LeaveCC, i.OutputSBit());
1124  break;
1125  }
1126  case kArmMov:
1127  __ Move(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1128  break;
1129  case kArmMvn:
1130  __ mvn(i.OutputRegister(), i.InputOperand2(0), i.OutputSBit());
1131  break;
1132  case kArmOrr:
1133  __ orr(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1134  i.OutputSBit());
1135  break;
1136  case kArmEor:
1137  __ eor(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1138  i.OutputSBit());
1139  break;
1140  case kArmSub:
1141  __ sub(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1142  i.OutputSBit());
1143  break;
1144  case kArmRsb:
1145  __ rsb(i.OutputRegister(), i.InputRegister(0), i.InputOperand2(1),
1146  i.OutputSBit());
1147  break;
1148  case kArmBfc: {
1149  CpuFeatureScope scope(tasm(), ARMv7);
1150  __ bfc(i.OutputRegister(), i.InputInt8(1), i.InputInt8(2));
1151  DCHECK_EQ(LeaveCC, i.OutputSBit());
1152  break;
1153  }
1154  case kArmUbfx: {
1155  CpuFeatureScope scope(tasm(), ARMv7);
1156  __ ubfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1157  i.InputInt8(2));
1158  DCHECK_EQ(LeaveCC, i.OutputSBit());
1159  break;
1160  }
1161  case kArmSbfx: {
1162  CpuFeatureScope scope(tasm(), ARMv7);
1163  __ sbfx(i.OutputRegister(), i.InputRegister(0), i.InputInt8(1),
1164  i.InputInt8(2));
1165  DCHECK_EQ(LeaveCC, i.OutputSBit());
1166  break;
1167  }
1168  case kArmSxtb:
1169  __ sxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1170  DCHECK_EQ(LeaveCC, i.OutputSBit());
1171  break;
1172  case kArmSxth:
1173  __ sxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1174  DCHECK_EQ(LeaveCC, i.OutputSBit());
1175  break;
1176  case kArmSxtab:
1177  __ sxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1178  i.InputInt32(2));
1179  DCHECK_EQ(LeaveCC, i.OutputSBit());
1180  break;
1181  case kArmSxtah:
1182  __ sxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1183  i.InputInt32(2));
1184  DCHECK_EQ(LeaveCC, i.OutputSBit());
1185  break;
1186  case kArmUxtb:
1187  __ uxtb(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1188  DCHECK_EQ(LeaveCC, i.OutputSBit());
1189  break;
1190  case kArmUxth:
1191  __ uxth(i.OutputRegister(), i.InputRegister(0), i.InputInt32(1));
1192  DCHECK_EQ(LeaveCC, i.OutputSBit());
1193  break;
1194  case kArmUxtab:
1195  __ uxtab(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1196  i.InputInt32(2));
1197  DCHECK_EQ(LeaveCC, i.OutputSBit());
1198  break;
1199  case kArmUxtah:
1200  __ uxtah(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1),
1201  i.InputInt32(2));
1202  DCHECK_EQ(LeaveCC, i.OutputSBit());
1203  break;
1204  case kArmRbit: {
1205  CpuFeatureScope scope(tasm(), ARMv7);
1206  __ rbit(i.OutputRegister(), i.InputRegister(0));
1207  DCHECK_EQ(LeaveCC, i.OutputSBit());
1208  break;
1209  }
1210  case kArmRev:
1211  __ rev(i.OutputRegister(), i.InputRegister(0));
1212  DCHECK_EQ(LeaveCC, i.OutputSBit());
1213  break;
1214  case kArmClz:
1215  __ clz(i.OutputRegister(), i.InputRegister(0));
1216  DCHECK_EQ(LeaveCC, i.OutputSBit());
1217  break;
1218  case kArmCmp:
1219  __ cmp(i.InputRegister(0), i.InputOperand2(1));
1220  DCHECK_EQ(SetCC, i.OutputSBit());
1221  break;
1222  case kArmCmn:
1223  __ cmn(i.InputRegister(0), i.InputOperand2(1));
1224  DCHECK_EQ(SetCC, i.OutputSBit());
1225  break;
1226  case kArmTst:
1227  __ tst(i.InputRegister(0), i.InputOperand2(1));
1228  DCHECK_EQ(SetCC, i.OutputSBit());
1229  break;
1230  case kArmTeq:
1231  __ teq(i.InputRegister(0), i.InputOperand2(1));
1232  DCHECK_EQ(SetCC, i.OutputSBit());
1233  break;
1234  case kArmAddPair:
1235  // i.InputRegister(0) ... left low word.
1236  // i.InputRegister(1) ... left high word.
1237  // i.InputRegister(2) ... right low word.
1238  // i.InputRegister(3) ... right high word.
1239  __ add(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1240  SBit::SetCC);
1241  __ adc(i.OutputRegister(1), i.InputRegister(1),
1242  Operand(i.InputRegister(3)));
1243  DCHECK_EQ(LeaveCC, i.OutputSBit());
1244  break;
1245  case kArmSubPair:
1246  // i.InputRegister(0) ... left low word.
1247  // i.InputRegister(1) ... left high word.
1248  // i.InputRegister(2) ... right low word.
1249  // i.InputRegister(3) ... right high word.
1250  __ sub(i.OutputRegister(0), i.InputRegister(0), i.InputRegister(2),
1251  SBit::SetCC);
1252  __ sbc(i.OutputRegister(1), i.InputRegister(1),
1253  Operand(i.InputRegister(3)));
1254  DCHECK_EQ(LeaveCC, i.OutputSBit());
1255  break;
1256  case kArmMulPair:
1257  // i.InputRegister(0) ... left low word.
1258  // i.InputRegister(1) ... left high word.
1259  // i.InputRegister(2) ... right low word.
1260  // i.InputRegister(3) ... right high word.
1261  __ umull(i.OutputRegister(0), i.OutputRegister(1), i.InputRegister(0),
1262  i.InputRegister(2));
1263  __ mla(i.OutputRegister(1), i.InputRegister(0), i.InputRegister(3),
1264  i.OutputRegister(1));
1265  __ mla(i.OutputRegister(1), i.InputRegister(2), i.InputRegister(1),
1266  i.OutputRegister(1));
1267  break;
1268  case kArmLslPair: {
1269  Register second_output =
1270  instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1271  if (instr->InputAt(2)->IsImmediate()) {
1272  __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1273  i.InputRegister(1), i.InputInt32(2));
1274  } else {
1275  __ LslPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1276  i.InputRegister(1), i.InputRegister(2));
1277  }
1278  break;
1279  }
1280  case kArmLsrPair: {
1281  Register second_output =
1282  instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1283  if (instr->InputAt(2)->IsImmediate()) {
1284  __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1285  i.InputRegister(1), i.InputInt32(2));
1286  } else {
1287  __ LsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1288  i.InputRegister(1), i.InputRegister(2));
1289  }
1290  break;
1291  }
1292  case kArmAsrPair: {
1293  Register second_output =
1294  instr->OutputCount() >= 2 ? i.OutputRegister(1) : i.TempRegister(0);
1295  if (instr->InputAt(2)->IsImmediate()) {
1296  __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1297  i.InputRegister(1), i.InputInt32(2));
1298  } else {
1299  __ AsrPair(i.OutputRegister(0), second_output, i.InputRegister(0),
1300  i.InputRegister(1), i.InputRegister(2));
1301  }
1302  break;
1303  }
1304  case kArmVcmpF32:
1305  if (instr->InputAt(1)->IsFPRegister()) {
1306  __ VFPCompareAndSetFlags(i.InputFloatRegister(0),
1307  i.InputFloatRegister(1));
1308  } else {
1309  DCHECK(instr->InputAt(1)->IsImmediate());
1310  // 0.0 is the only immediate supported by vcmp instructions.
1311  DCHECK_EQ(0.0f, i.InputFloat32(1));
1312  __ VFPCompareAndSetFlags(i.InputFloatRegister(0), i.InputFloat32(1));
1313  }
1314  DCHECK_EQ(SetCC, i.OutputSBit());
1315  break;
1316  case kArmVaddF32:
1317  __ vadd(i.OutputFloatRegister(), i.InputFloatRegister(0),
1318  i.InputFloatRegister(1));
1319  DCHECK_EQ(LeaveCC, i.OutputSBit());
1320  break;
1321  case kArmVsubF32:
1322  __ vsub(i.OutputFloatRegister(), i.InputFloatRegister(0),
1323  i.InputFloatRegister(1));
1324  DCHECK_EQ(LeaveCC, i.OutputSBit());
1325  break;
1326  case kArmVmulF32:
1327  __ vmul(i.OutputFloatRegister(), i.InputFloatRegister(0),
1328  i.InputFloatRegister(1));
1329  DCHECK_EQ(LeaveCC, i.OutputSBit());
1330  break;
1331  case kArmVmlaF32:
1332  __ vmla(i.OutputFloatRegister(), i.InputFloatRegister(1),
1333  i.InputFloatRegister(2));
1334  DCHECK_EQ(LeaveCC, i.OutputSBit());
1335  break;
1336  case kArmVmlsF32:
1337  __ vmls(i.OutputFloatRegister(), i.InputFloatRegister(1),
1338  i.InputFloatRegister(2));
1339  DCHECK_EQ(LeaveCC, i.OutputSBit());
1340  break;
1341  case kArmVdivF32:
1342  __ vdiv(i.OutputFloatRegister(), i.InputFloatRegister(0),
1343  i.InputFloatRegister(1));
1344  DCHECK_EQ(LeaveCC, i.OutputSBit());
1345  break;
1346  case kArmVsqrtF32:
1347  __ vsqrt(i.OutputFloatRegister(), i.InputFloatRegister(0));
1348  break;
1349  case kArmVabsF32:
1350  __ vabs(i.OutputFloatRegister(), i.InputFloatRegister(0));
1351  break;
1352  case kArmVnegF32:
1353  __ vneg(i.OutputFloatRegister(), i.InputFloatRegister(0));
1354  break;
1355  case kArmVcmpF64:
1356  if (instr->InputAt(1)->IsFPRegister()) {
1357  __ VFPCompareAndSetFlags(i.InputDoubleRegister(0),
1358  i.InputDoubleRegister(1));
1359  } else {
1360  DCHECK(instr->InputAt(1)->IsImmediate());
1361  // 0.0 is the only immediate supported by vcmp instructions.
1362  DCHECK_EQ(0.0, i.InputDouble(1));
1363  __ VFPCompareAndSetFlags(i.InputDoubleRegister(0), i.InputDouble(1));
1364  }
1365  DCHECK_EQ(SetCC, i.OutputSBit());
1366  break;
1367  case kArmVaddF64:
1368  __ vadd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1369  i.InputDoubleRegister(1));
1370  DCHECK_EQ(LeaveCC, i.OutputSBit());
1371  break;
1372  case kArmVsubF64:
1373  __ vsub(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1374  i.InputDoubleRegister(1));
1375  DCHECK_EQ(LeaveCC, i.OutputSBit());
1376  break;
1377  case kArmVmulF64:
1378  __ vmul(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1379  i.InputDoubleRegister(1));
1380  DCHECK_EQ(LeaveCC, i.OutputSBit());
1381  break;
1382  case kArmVmlaF64:
1383  __ vmla(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1384  i.InputDoubleRegister(2));
1385  DCHECK_EQ(LeaveCC, i.OutputSBit());
1386  break;
1387  case kArmVmlsF64:
1388  __ vmls(i.OutputDoubleRegister(), i.InputDoubleRegister(1),
1389  i.InputDoubleRegister(2));
1390  DCHECK_EQ(LeaveCC, i.OutputSBit());
1391  break;
1392  case kArmVdivF64:
1393  __ vdiv(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
1394  i.InputDoubleRegister(1));
1395  DCHECK_EQ(LeaveCC, i.OutputSBit());
1396  break;
1397  case kArmVmodF64: {
1398  // TODO(bmeurer): We should really get rid of this special instruction,
1399  // and generate a CallAddress instruction instead.
1400  FrameScope scope(tasm(), StackFrame::MANUAL);
1401  __ PrepareCallCFunction(0, 2);
1402  __ MovToFloatParameters(i.InputDoubleRegister(0),
1403  i.InputDoubleRegister(1));
1404  __ CallCFunction(ExternalReference::mod_two_doubles_operation(), 0, 2);
1405  // Move the result in the double result register.
1406  __ MovFromFloatResult(i.OutputDoubleRegister());
1407  DCHECK_EQ(LeaveCC, i.OutputSBit());
1408  break;
1409  }
1410  case kArmVsqrtF64:
1411  __ vsqrt(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1412  break;
1413  case kArmVabsF64:
1414  __ vabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1415  break;
1416  case kArmVnegF64:
1417  __ vneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1418  break;
1419  case kArmVrintmF32: {
1420  CpuFeatureScope scope(tasm(), ARMv8);
1421  __ vrintm(i.OutputFloatRegister(), i.InputFloatRegister(0));
1422  break;
1423  }
1424  case kArmVrintmF64: {
1425  CpuFeatureScope scope(tasm(), ARMv8);
1426  __ vrintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1427  break;
1428  }
1429  case kArmVrintpF32: {
1430  CpuFeatureScope scope(tasm(), ARMv8);
1431  __ vrintp(i.OutputFloatRegister(), i.InputFloatRegister(0));
1432  break;
1433  }
1434  case kArmVrintpF64: {
1435  CpuFeatureScope scope(tasm(), ARMv8);
1436  __ vrintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1437  break;
1438  }
1439  case kArmVrintzF32: {
1440  CpuFeatureScope scope(tasm(), ARMv8);
1441  __ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
1442  break;
1443  }
1444  case kArmVrintzF64: {
1445  CpuFeatureScope scope(tasm(), ARMv8);
1446  __ vrintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1447  break;
1448  }
1449  case kArmVrintaF64: {
1450  CpuFeatureScope scope(tasm(), ARMv8);
1451  __ vrinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1452  break;
1453  }
1454  case kArmVrintnF32: {
1455  CpuFeatureScope scope(tasm(), ARMv8);
1456  __ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
1457  break;
1458  }
1459  case kArmVrintnF64: {
1460  CpuFeatureScope scope(tasm(), ARMv8);
1461  __ vrintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
1462  break;
1463  }
1464  case kArmVcvtF32F64: {
1465  __ vcvt_f32_f64(i.OutputFloatRegister(), i.InputDoubleRegister(0));
1466  DCHECK_EQ(LeaveCC, i.OutputSBit());
1467  break;
1468  }
1469  case kArmVcvtF64F32: {
1470  __ vcvt_f64_f32(i.OutputDoubleRegister(), i.InputFloatRegister(0));
1471  DCHECK_EQ(LeaveCC, i.OutputSBit());
1472  break;
1473  }
1474  case kArmVcvtF32S32: {
1475  UseScratchRegisterScope temps(tasm());
1476  SwVfpRegister scratch = temps.AcquireS();
1477  __ vmov(scratch, i.InputRegister(0));
1478  __ vcvt_f32_s32(i.OutputFloatRegister(), scratch);
1479  DCHECK_EQ(LeaveCC, i.OutputSBit());
1480  break;
1481  }
1482  case kArmVcvtF32U32: {
1483  UseScratchRegisterScope temps(tasm());
1484  SwVfpRegister scratch = temps.AcquireS();
1485  __ vmov(scratch, i.InputRegister(0));
1486  __ vcvt_f32_u32(i.OutputFloatRegister(), scratch);
1487  DCHECK_EQ(LeaveCC, i.OutputSBit());
1488  break;
1489  }
1490  case kArmVcvtF64S32: {
1491  UseScratchRegisterScope temps(tasm());
1492  SwVfpRegister scratch = temps.AcquireS();
1493  __ vmov(scratch, i.InputRegister(0));
1494  __ vcvt_f64_s32(i.OutputDoubleRegister(), scratch);
1495  DCHECK_EQ(LeaveCC, i.OutputSBit());
1496  break;
1497  }
1498  case kArmVcvtF64U32: {
1499  UseScratchRegisterScope temps(tasm());
1500  SwVfpRegister scratch = temps.AcquireS();
1501  __ vmov(scratch, i.InputRegister(0));
1502  __ vcvt_f64_u32(i.OutputDoubleRegister(), scratch);
1503  DCHECK_EQ(LeaveCC, i.OutputSBit());
1504  break;
1505  }
1506  case kArmVcvtS32F32: {
1507  UseScratchRegisterScope temps(tasm());
1508  SwVfpRegister scratch = temps.AcquireS();
1509  __ vcvt_s32_f32(scratch, i.InputFloatRegister(0));
1510  __ vmov(i.OutputRegister(), scratch);
1511  // Avoid INT32_MAX as an overflow indicator and use INT32_MIN instead,
1512  // because INT32_MIN allows easier out-of-bounds detection.
1513  __ cmn(i.OutputRegister(), Operand(1));
1514  __ mov(i.OutputRegister(), Operand(INT32_MIN), SBit::LeaveCC, vs);
1515  DCHECK_EQ(LeaveCC, i.OutputSBit());
1516  break;
1517  }
1518  case kArmVcvtU32F32: {
1519  UseScratchRegisterScope temps(tasm());
1520  SwVfpRegister scratch = temps.AcquireS();
1521  __ vcvt_u32_f32(scratch, i.InputFloatRegister(0));
1522  __ vmov(i.OutputRegister(), scratch);
1523  // Avoid UINT32_MAX as an overflow indicator and use 0 instead,
1524  // because 0 allows easier out-of-bounds detection.
1525  __ cmn(i.OutputRegister(), Operand(1));
1526  __ adc(i.OutputRegister(), i.OutputRegister(), Operand::Zero());
1527  DCHECK_EQ(LeaveCC, i.OutputSBit());
1528  break;
1529  }
1530  case kArmVcvtS32F64: {
1531  UseScratchRegisterScope temps(tasm());
1532  SwVfpRegister scratch = temps.AcquireS();
1533  __ vcvt_s32_f64(scratch, i.InputDoubleRegister(0));
1534  __ vmov(i.OutputRegister(), scratch);
1535  DCHECK_EQ(LeaveCC, i.OutputSBit());
1536  break;
1537  }
1538  case kArmVcvtU32F64: {
1539  UseScratchRegisterScope temps(tasm());
1540  SwVfpRegister scratch = temps.AcquireS();
1541  __ vcvt_u32_f64(scratch, i.InputDoubleRegister(0));
1542  __ vmov(i.OutputRegister(), scratch);
1543  DCHECK_EQ(LeaveCC, i.OutputSBit());
1544  break;
1545  }
1546  case kArmVmovU32F32:
1547  __ vmov(i.OutputRegister(), i.InputFloatRegister(0));
1548  DCHECK_EQ(LeaveCC, i.OutputSBit());
1549  break;
1550  case kArmVmovF32U32:
1551  __ vmov(i.OutputFloatRegister(), i.InputRegister(0));
1552  DCHECK_EQ(LeaveCC, i.OutputSBit());
1553  break;
1554  case kArmVmovLowU32F64:
1555  __ VmovLow(i.OutputRegister(), i.InputDoubleRegister(0));
1556  DCHECK_EQ(LeaveCC, i.OutputSBit());
1557  break;
1558  case kArmVmovLowF64U32:
1559  __ VmovLow(i.OutputDoubleRegister(), i.InputRegister(1));
1560  DCHECK_EQ(LeaveCC, i.OutputSBit());
1561  break;
1562  case kArmVmovHighU32F64:
1563  __ VmovHigh(i.OutputRegister(), i.InputDoubleRegister(0));
1564  DCHECK_EQ(LeaveCC, i.OutputSBit());
1565  break;
1566  case kArmVmovHighF64U32:
1567  __ VmovHigh(i.OutputDoubleRegister(), i.InputRegister(1));
1568  DCHECK_EQ(LeaveCC, i.OutputSBit());
1569  break;
1570  case kArmVmovF64U32U32:
1571  __ vmov(i.OutputDoubleRegister(), i.InputRegister(0), i.InputRegister(1));
1572  DCHECK_EQ(LeaveCC, i.OutputSBit());
1573  break;
1574  case kArmVmovU32U32F64:
1575  __ vmov(i.OutputRegister(0), i.OutputRegister(1),
1576  i.InputDoubleRegister(0));
1577  DCHECK_EQ(LeaveCC, i.OutputSBit());
1578  break;
1579  case kArmLdrb:
1580  __ ldrb(i.OutputRegister(), i.InputOffset());
1581  DCHECK_EQ(LeaveCC, i.OutputSBit());
1582  EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1583  break;
1584  case kArmLdrsb:
1585  __ ldrsb(i.OutputRegister(), i.InputOffset());
1586  DCHECK_EQ(LeaveCC, i.OutputSBit());
1587  EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1588  break;
1589  case kArmStrb:
1590  __ strb(i.InputRegister(0), i.InputOffset(1));
1591  DCHECK_EQ(LeaveCC, i.OutputSBit());
1592  break;
1593  case kArmLdrh:
1594  __ ldrh(i.OutputRegister(), i.InputOffset());
1595  EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1596  break;
1597  case kArmLdrsh:
1598  __ ldrsh(i.OutputRegister(), i.InputOffset());
1599  EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1600  break;
1601  case kArmStrh:
1602  __ strh(i.InputRegister(0), i.InputOffset(1));
1603  DCHECK_EQ(LeaveCC, i.OutputSBit());
1604  break;
1605  case kArmLdr:
1606  __ ldr(i.OutputRegister(), i.InputOffset());
1607  EmitWordLoadPoisoningIfNeeded(this, opcode, i);
1608  break;
1609  case kArmStr:
1610  __ str(i.InputRegister(0), i.InputOffset(1));
1611  DCHECK_EQ(LeaveCC, i.OutputSBit());
1612  break;
1613  case kArmVldrF32: {
1614  const MemoryAccessMode access_mode =
1615  static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1616  if (access_mode == kMemoryAccessPoisoned) {
1617  UseScratchRegisterScope temps(tasm());
1618  Register address = temps.Acquire();
1619  ComputePoisonedAddressForLoad(this, opcode, i, address);
1620  __ vldr(i.OutputFloatRegister(), address, 0);
1621  } else {
1622  __ vldr(i.OutputFloatRegister(), i.InputOffset());
1623  }
1624  DCHECK_EQ(LeaveCC, i.OutputSBit());
1625  break;
1626  }
1627  case kArmVstrF32:
1628  __ vstr(i.InputFloatRegister(0), i.InputOffset(1));
1629  DCHECK_EQ(LeaveCC, i.OutputSBit());
1630  break;
1631  case kArmVld1F64: {
1632  __ vld1(Neon8, NeonListOperand(i.OutputDoubleRegister()),
1633  i.NeonInputOperand(0));
1634  break;
1635  }
1636  case kArmVst1F64: {
1637  __ vst1(Neon8, NeonListOperand(i.InputDoubleRegister(0)),
1638  i.NeonInputOperand(1));
1639  break;
1640  }
1641  case kArmVld1S128: {
1642  __ vld1(Neon8, NeonListOperand(i.OutputSimd128Register()),
1643  i.NeonInputOperand(0));
1644  break;
1645  }
1646  case kArmVst1S128: {
1647  __ vst1(Neon8, NeonListOperand(i.InputSimd128Register(0)),
1648  i.NeonInputOperand(1));
1649  break;
1650  }
1651  case kArmVldrF64: {
1652  const MemoryAccessMode access_mode =
1653  static_cast<MemoryAccessMode>(MiscField::decode(opcode));
1654  if (access_mode == kMemoryAccessPoisoned) {
1655  UseScratchRegisterScope temps(tasm());
1656  Register address = temps.Acquire();
1657  ComputePoisonedAddressForLoad(this, opcode, i, address);
1658  __ vldr(i.OutputDoubleRegister(), address, 0);
1659  } else {
1660  __ vldr(i.OutputDoubleRegister(), i.InputOffset());
1661  }
1662  DCHECK_EQ(LeaveCC, i.OutputSBit());
1663  break;
1664  }
1665  case kArmVstrF64:
1666  __ vstr(i.InputDoubleRegister(0), i.InputOffset(1));
1667  DCHECK_EQ(LeaveCC, i.OutputSBit());
1668  break;
1669  case kArmFloat32Max: {
1670  SwVfpRegister result = i.OutputFloatRegister();
1671  SwVfpRegister left = i.InputFloatRegister(0);
1672  SwVfpRegister right = i.InputFloatRegister(1);
1673  if (left == right) {
1674  __ Move(result, left);
1675  } else {
1676  auto ool = new (zone()) OutOfLineFloat32Max(this, result, left, right);
1677  __ FloatMax(result, left, right, ool->entry());
1678  __ bind(ool->exit());
1679  }
1680  DCHECK_EQ(LeaveCC, i.OutputSBit());
1681  break;
1682  }
1683  case kArmFloat64Max: {
1684  DwVfpRegister result = i.OutputDoubleRegister();
1685  DwVfpRegister left = i.InputDoubleRegister(0);
1686  DwVfpRegister right = i.InputDoubleRegister(1);
1687  if (left == right) {
1688  __ Move(result, left);
1689  } else {
1690  auto ool = new (zone()) OutOfLineFloat64Max(this, result, left, right);
1691  __ FloatMax(result, left, right, ool->entry());
1692  __ bind(ool->exit());
1693  }
1694  DCHECK_EQ(LeaveCC, i.OutputSBit());
1695  break;
1696  }
1697  case kArmFloat32Min: {
1698  SwVfpRegister result = i.OutputFloatRegister();
1699  SwVfpRegister left = i.InputFloatRegister(0);
1700  SwVfpRegister right = i.InputFloatRegister(1);
1701  if (left == right) {
1702  __ Move(result, left);
1703  } else {
1704  auto ool = new (zone()) OutOfLineFloat32Min(this, result, left, right);
1705  __ FloatMin(result, left, right, ool->entry());
1706  __ bind(ool->exit());
1707  }
1708  DCHECK_EQ(LeaveCC, i.OutputSBit());
1709  break;
1710  }
1711  case kArmFloat64Min: {
1712  DwVfpRegister result = i.OutputDoubleRegister();
1713  DwVfpRegister left = i.InputDoubleRegister(0);
1714  DwVfpRegister right = i.InputDoubleRegister(1);
1715  if (left == right) {
1716  __ Move(result, left);
1717  } else {
1718  auto ool = new (zone()) OutOfLineFloat64Min(this, result, left, right);
1719  __ FloatMin(result, left, right, ool->entry());
1720  __ bind(ool->exit());
1721  }
1722  DCHECK_EQ(LeaveCC, i.OutputSBit());
1723  break;
1724  }
1725  case kArmFloat64SilenceNaN: {
1726  DwVfpRegister value = i.InputDoubleRegister(0);
1727  DwVfpRegister result = i.OutputDoubleRegister();
1728  __ VFPCanonicalizeNaN(result, value);
1729  break;
1730  }
1731  case kArmPush:
1732  if (instr->InputAt(0)->IsFPRegister()) {
1733  LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
1734  switch (op->representation()) {
1735  case MachineRepresentation::kFloat32:
1736  __ vpush(i.InputFloatRegister(0));
1737  frame_access_state()->IncreaseSPDelta(1);
1738  break;
1739  case MachineRepresentation::kFloat64:
1740  __ vpush(i.InputDoubleRegister(0));
1741  frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
1742  break;
1743  case MachineRepresentation::kSimd128: {
1744  __ vpush(i.InputSimd128Register(0));
1745  frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
1746  break;
1747  }
1748  default:
1749  UNREACHABLE();
1750  break;
1751  }
1752  } else {
1753  __ push(i.InputRegister(0));
1754  frame_access_state()->IncreaseSPDelta(1);
1755  }
1756  DCHECK_EQ(LeaveCC, i.OutputSBit());
1757  break;
1758  case kArmPoke: {
1759  int const slot = MiscField::decode(instr->opcode());
1760  __ str(i.InputRegister(0), MemOperand(sp, slot * kPointerSize));
1761  DCHECK_EQ(LeaveCC, i.OutputSBit());
1762  break;
1763  }
1764  case kArmPeek: {
1765  // The incoming value is 0-based, but we need a 1-based value.
1766  int reverse_slot = i.InputInt32(0) + 1;
1767  int offset =
1768  FrameSlotToFPOffset(frame()->GetTotalFrameSlotCount() - reverse_slot);
1769  if (instr->OutputAt(0)->IsFPRegister()) {
1770  LocationOperand* op = LocationOperand::cast(instr->OutputAt(0));
1771  if (op->representation() == MachineRepresentation::kFloat64) {
1772  __ vldr(i.OutputDoubleRegister(), MemOperand(fp, offset));
1773  } else {
1774  DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
1775  __ vldr(i.OutputFloatRegister(), MemOperand(fp, offset));
1776  }
1777  } else {
1778  __ ldr(i.OutputRegister(), MemOperand(fp, offset));
1779  }
1780  break;
1781  }
1782  case kArmDsbIsb: {
1783  __ dsb(SY);
1784  __ isb(SY);
1785  break;
1786  }
1787  case kArchWordPoisonOnSpeculation:
1788  __ and_(i.OutputRegister(0), i.InputRegister(0),
1789  Operand(kSpeculationPoisonRegister));
1790  break;
1791  case kArmF32x4Splat: {
1792  int src_code = i.InputFloatRegister(0).code();
1793  __ vdup(Neon32, i.OutputSimd128Register(),
1794  DwVfpRegister::from_code(src_code / 2), src_code % 2);
1795  break;
1796  }
1797  case kArmF32x4ExtractLane: {
1798  __ ExtractLane(i.OutputFloatRegister(), i.InputSimd128Register(0),
1799  i.InputInt8(1));
1800  break;
1801  }
1802  case kArmF32x4ReplaceLane: {
1803  __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1804  i.InputFloatRegister(2), i.InputInt8(1));
1805  break;
1806  }
1807  case kArmF32x4SConvertI32x4: {
1808  __ vcvt_f32_s32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1809  break;
1810  }
1811  case kArmF32x4UConvertI32x4: {
1812  __ vcvt_f32_u32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1813  break;
1814  }
1815  case kArmF32x4Abs: {
1816  __ vabs(i.OutputSimd128Register(), i.InputSimd128Register(0));
1817  break;
1818  }
1819  case kArmF32x4Neg: {
1820  __ vneg(i.OutputSimd128Register(), i.InputSimd128Register(0));
1821  break;
1822  }
1823  case kArmF32x4RecipApprox: {
1824  __ vrecpe(i.OutputSimd128Register(), i.InputSimd128Register(0));
1825  break;
1826  }
1827  case kArmF32x4RecipSqrtApprox: {
1828  __ vrsqrte(i.OutputSimd128Register(), i.InputSimd128Register(0));
1829  break;
1830  }
1831  case kArmF32x4Add: {
1832  __ vadd(i.OutputSimd128Register(), i.InputSimd128Register(0),
1833  i.InputSimd128Register(1));
1834  break;
1835  }
1836  case kArmF32x4AddHoriz: {
1837  Simd128Register dst = i.OutputSimd128Register(),
1838  src0 = i.InputSimd128Register(0),
1839  src1 = i.InputSimd128Register(1);
1840  // Make sure we don't overwrite source data before it's used.
1841  if (dst == src0) {
1842  __ vpadd(dst.low(), src0.low(), src0.high());
1843  if (dst == src1) {
1844  __ vmov(dst.high(), dst.low());
1845  } else {
1846  __ vpadd(dst.high(), src1.low(), src1.high());
1847  }
1848  } else {
1849  __ vpadd(dst.high(), src1.low(), src1.high());
1850  __ vpadd(dst.low(), src0.low(), src0.high());
1851  }
1852  break;
1853  }
1854  case kArmF32x4Sub: {
1855  __ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),
1856  i.InputSimd128Register(1));
1857  break;
1858  }
1859  case kArmF32x4Mul: {
1860  __ vmul(i.OutputSimd128Register(), i.InputSimd128Register(0),
1861  i.InputSimd128Register(1));
1862  break;
1863  }
1864  case kArmF32x4Min: {
1865  __ vmin(i.OutputSimd128Register(), i.InputSimd128Register(0),
1866  i.InputSimd128Register(1));
1867  break;
1868  }
1869  case kArmF32x4Max: {
1870  __ vmax(i.OutputSimd128Register(), i.InputSimd128Register(0),
1871  i.InputSimd128Register(1));
1872  break;
1873  }
1874  case kArmF32x4Eq: {
1875  __ vceq(i.OutputSimd128Register(), i.InputSimd128Register(0),
1876  i.InputSimd128Register(1));
1877  break;
1878  }
1879  case kArmF32x4Ne: {
1880  Simd128Register dst = i.OutputSimd128Register();
1881  __ vceq(dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
1882  __ vmvn(dst, dst);
1883  break;
1884  }
1885  case kArmF32x4Lt: {
1886  __ vcgt(i.OutputSimd128Register(), i.InputSimd128Register(1),
1887  i.InputSimd128Register(0));
1888  break;
1889  }
1890  case kArmF32x4Le: {
1891  __ vcge(i.OutputSimd128Register(), i.InputSimd128Register(1),
1892  i.InputSimd128Register(0));
1893  break;
1894  }
1895  case kArmI32x4Splat: {
1896  __ vdup(Neon32, i.OutputSimd128Register(), i.InputRegister(0));
1897  break;
1898  }
1899  case kArmI32x4ExtractLane: {
1900  __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS32,
1901  i.InputInt8(1));
1902  break;
1903  }
1904  case kArmI32x4ReplaceLane: {
1905  __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
1906  i.InputRegister(2), NeonS32, i.InputInt8(1));
1907  break;
1908  }
1909  case kArmI32x4SConvertF32x4: {
1910  __ vcvt_s32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1911  break;
1912  }
1913  case kArmI32x4SConvertI16x8Low: {
1914  __ vmovl(NeonS16, i.OutputSimd128Register(),
1915  i.InputSimd128Register(0).low());
1916  break;
1917  }
1918  case kArmI32x4SConvertI16x8High: {
1919  __ vmovl(NeonS16, i.OutputSimd128Register(),
1920  i.InputSimd128Register(0).high());
1921  break;
1922  }
1923  case kArmI32x4Neg: {
1924  __ vneg(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
1925  break;
1926  }
1927  case kArmI32x4Shl: {
1928  __ vshl(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1929  i.InputInt5(1));
1930  break;
1931  }
1932  case kArmI32x4ShrS: {
1933  __ vshr(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1934  i.InputInt5(1));
1935  break;
1936  }
1937  case kArmI32x4Add: {
1938  __ vadd(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1939  i.InputSimd128Register(1));
1940  break;
1941  }
1942  case kArmI32x4AddHoriz:
1943  ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);
1944  break;
1945  case kArmI32x4Sub: {
1946  __ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1947  i.InputSimd128Register(1));
1948  break;
1949  }
1950  case kArmI32x4Mul: {
1951  __ vmul(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1952  i.InputSimd128Register(1));
1953  break;
1954  }
1955  case kArmI32x4MinS: {
1956  __ vmin(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1957  i.InputSimd128Register(1));
1958  break;
1959  }
1960  case kArmI32x4MaxS: {
1961  __ vmax(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1962  i.InputSimd128Register(1));
1963  break;
1964  }
1965  case kArmI32x4Eq: {
1966  __ vceq(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1967  i.InputSimd128Register(1));
1968  break;
1969  }
1970  case kArmI32x4Ne: {
1971  Simd128Register dst = i.OutputSimd128Register();
1972  __ vceq(Neon32, dst, i.InputSimd128Register(0),
1973  i.InputSimd128Register(1));
1974  __ vmvn(dst, dst);
1975  break;
1976  }
1977  case kArmI32x4GtS: {
1978  __ vcgt(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1979  i.InputSimd128Register(1));
1980  break;
1981  }
1982  case kArmI32x4GeS: {
1983  __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
1984  i.InputSimd128Register(1));
1985  break;
1986  }
1987  case kArmI32x4UConvertF32x4: {
1988  __ vcvt_u32_f32(i.OutputSimd128Register(), i.InputSimd128Register(0));
1989  break;
1990  }
1991  case kArmI32x4UConvertI16x8Low: {
1992  __ vmovl(NeonU16, i.OutputSimd128Register(),
1993  i.InputSimd128Register(0).low());
1994  break;
1995  }
1996  case kArmI32x4UConvertI16x8High: {
1997  __ vmovl(NeonU16, i.OutputSimd128Register(),
1998  i.InputSimd128Register(0).high());
1999  break;
2000  }
2001  case kArmI32x4ShrU: {
2002  __ vshr(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2003  i.InputInt5(1));
2004  break;
2005  }
2006  case kArmI32x4MinU: {
2007  __ vmin(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2008  i.InputSimd128Register(1));
2009  break;
2010  }
2011  case kArmI32x4MaxU: {
2012  __ vmax(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2013  i.InputSimd128Register(1));
2014  break;
2015  }
2016  case kArmI32x4GtU: {
2017  __ vcgt(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2018  i.InputSimd128Register(1));
2019  break;
2020  }
2021  case kArmI32x4GeU: {
2022  __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
2023  i.InputSimd128Register(1));
2024  break;
2025  }
2026  case kArmI16x8Splat: {
2027  __ vdup(Neon16, i.OutputSimd128Register(), i.InputRegister(0));
2028  break;
2029  }
2030  case kArmI16x8ExtractLane: {
2031  __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS16,
2032  i.InputInt8(1));
2033  break;
2034  }
2035  case kArmI16x8ReplaceLane: {
2036  __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2037  i.InputRegister(2), NeonS16, i.InputInt8(1));
2038  break;
2039  }
2040  case kArmI16x8SConvertI8x16Low: {
2041  __ vmovl(NeonS8, i.OutputSimd128Register(),
2042  i.InputSimd128Register(0).low());
2043  break;
2044  }
2045  case kArmI16x8SConvertI8x16High: {
2046  __ vmovl(NeonS8, i.OutputSimd128Register(),
2047  i.InputSimd128Register(0).high());
2048  break;
2049  }
2050  case kArmI16x8Neg: {
2051  __ vneg(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2052  break;
2053  }
2054  case kArmI16x8Shl: {
2055  __ vshl(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2056  i.InputInt4(1));
2057  break;
2058  }
2059  case kArmI16x8ShrS: {
2060  __ vshr(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2061  i.InputInt4(1));
2062  break;
2063  }
2064  case kArmI16x8SConvertI32x4:
2065  ASSEMBLE_NEON_NARROWING_OP(NeonS16);
2066  break;
2067  case kArmI16x8Add: {
2068  __ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2069  i.InputSimd128Register(1));
2070  break;
2071  }
2072  case kArmI16x8AddSaturateS: {
2073  __ vqadd(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2074  i.InputSimd128Register(1));
2075  break;
2076  }
2077  case kArmI16x8AddHoriz:
2078  ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);
2079  break;
2080  case kArmI16x8Sub: {
2081  __ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2082  i.InputSimd128Register(1));
2083  break;
2084  }
2085  case kArmI16x8SubSaturateS: {
2086  __ vqsub(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2087  i.InputSimd128Register(1));
2088  break;
2089  }
2090  case kArmI16x8Mul: {
2091  __ vmul(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2092  i.InputSimd128Register(1));
2093  break;
2094  }
2095  case kArmI16x8MinS: {
2096  __ vmin(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2097  i.InputSimd128Register(1));
2098  break;
2099  }
2100  case kArmI16x8MaxS: {
2101  __ vmax(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2102  i.InputSimd128Register(1));
2103  break;
2104  }
2105  case kArmI16x8Eq: {
2106  __ vceq(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2107  i.InputSimd128Register(1));
2108  break;
2109  }
2110  case kArmI16x8Ne: {
2111  Simd128Register dst = i.OutputSimd128Register();
2112  __ vceq(Neon16, dst, i.InputSimd128Register(0),
2113  i.InputSimd128Register(1));
2114  __ vmvn(dst, dst);
2115  break;
2116  }
2117  case kArmI16x8GtS: {
2118  __ vcgt(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2119  i.InputSimd128Register(1));
2120  break;
2121  }
2122  case kArmI16x8GeS: {
2123  __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2124  i.InputSimd128Register(1));
2125  break;
2126  }
2127  case kArmI16x8UConvertI8x16Low: {
2128  __ vmovl(NeonU8, i.OutputSimd128Register(),
2129  i.InputSimd128Register(0).low());
2130  break;
2131  }
2132  case kArmI16x8UConvertI8x16High: {
2133  __ vmovl(NeonU8, i.OutputSimd128Register(),
2134  i.InputSimd128Register(0).high());
2135  break;
2136  }
2137  case kArmI16x8ShrU: {
2138  __ vshr(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2139  i.InputInt4(1));
2140  break;
2141  }
2142  case kArmI16x8UConvertI32x4:
2143  ASSEMBLE_NEON_NARROWING_OP(NeonU16);
2144  break;
2145  case kArmI16x8AddSaturateU: {
2146  __ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2147  i.InputSimd128Register(1));
2148  break;
2149  }
2150  case kArmI16x8SubSaturateU: {
2151  __ vqsub(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2152  i.InputSimd128Register(1));
2153  break;
2154  }
2155  case kArmI16x8MinU: {
2156  __ vmin(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2157  i.InputSimd128Register(1));
2158  break;
2159  }
2160  case kArmI16x8MaxU: {
2161  __ vmax(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2162  i.InputSimd128Register(1));
2163  break;
2164  }
2165  case kArmI16x8GtU: {
2166  __ vcgt(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2167  i.InputSimd128Register(1));
2168  break;
2169  }
2170  case kArmI16x8GeU: {
2171  __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
2172  i.InputSimd128Register(1));
2173  break;
2174  }
2175  case kArmI8x16Splat: {
2176  __ vdup(Neon8, i.OutputSimd128Register(), i.InputRegister(0));
2177  break;
2178  }
2179  case kArmI8x16ExtractLane: {
2180  __ ExtractLane(i.OutputRegister(), i.InputSimd128Register(0), NeonS8,
2181  i.InputInt8(1));
2182  break;
2183  }
2184  case kArmI8x16ReplaceLane: {
2185  __ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
2186  i.InputRegister(2), NeonS8, i.InputInt8(1));
2187  break;
2188  }
2189  case kArmI8x16Neg: {
2190  __ vneg(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2191  break;
2192  }
2193  case kArmI8x16Shl: {
2194  __ vshl(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2195  i.InputInt3(1));
2196  break;
2197  }
2198  case kArmI8x16ShrS: {
2199  __ vshr(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2200  i.InputInt3(1));
2201  break;
2202  }
2203  case kArmI8x16SConvertI16x8:
2204  ASSEMBLE_NEON_NARROWING_OP(NeonS8);
2205  break;
2206  case kArmI8x16Add: {
2207  __ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2208  i.InputSimd128Register(1));
2209  break;
2210  }
2211  case kArmI8x16AddSaturateS: {
2212  __ vqadd(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2213  i.InputSimd128Register(1));
2214  break;
2215  }
2216  case kArmI8x16Sub: {
2217  __ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2218  i.InputSimd128Register(1));
2219  break;
2220  }
2221  case kArmI8x16SubSaturateS: {
2222  __ vqsub(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2223  i.InputSimd128Register(1));
2224  break;
2225  }
2226  case kArmI8x16Mul: {
2227  __ vmul(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2228  i.InputSimd128Register(1));
2229  break;
2230  }
2231  case kArmI8x16MinS: {
2232  __ vmin(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2233  i.InputSimd128Register(1));
2234  break;
2235  }
2236  case kArmI8x16MaxS: {
2237  __ vmax(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2238  i.InputSimd128Register(1));
2239  break;
2240  }
2241  case kArmI8x16Eq: {
2242  __ vceq(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2243  i.InputSimd128Register(1));
2244  break;
2245  }
2246  case kArmI8x16Ne: {
2247  Simd128Register dst = i.OutputSimd128Register();
2248  __ vceq(Neon8, dst, i.InputSimd128Register(0), i.InputSimd128Register(1));
2249  __ vmvn(dst, dst);
2250  break;
2251  }
2252  case kArmI8x16GtS: {
2253  __ vcgt(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2254  i.InputSimd128Register(1));
2255  break;
2256  }
2257  case kArmI8x16GeS: {
2258  __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2259  i.InputSimd128Register(1));
2260  break;
2261  }
2262  case kArmI8x16ShrU: {
2263  __ vshr(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2264  i.InputInt3(1));
2265  break;
2266  }
2267  case kArmI8x16UConvertI16x8:
2268  ASSEMBLE_NEON_NARROWING_OP(NeonU8);
2269  break;
2270  case kArmI8x16AddSaturateU: {
2271  __ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2272  i.InputSimd128Register(1));
2273  break;
2274  }
2275  case kArmI8x16SubSaturateU: {
2276  __ vqsub(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2277  i.InputSimd128Register(1));
2278  break;
2279  }
2280  case kArmI8x16MinU: {
2281  __ vmin(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2282  i.InputSimd128Register(1));
2283  break;
2284  }
2285  case kArmI8x16MaxU: {
2286  __ vmax(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2287  i.InputSimd128Register(1));
2288  break;
2289  }
2290  case kArmI8x16GtU: {
2291  __ vcgt(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2292  i.InputSimd128Register(1));
2293  break;
2294  }
2295  case kArmI8x16GeU: {
2296  __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
2297  i.InputSimd128Register(1));
2298  break;
2299  }
2300  case kArmS128Zero: {
2301  __ veor(i.OutputSimd128Register(), i.OutputSimd128Register(),
2302  i.OutputSimd128Register());
2303  break;
2304  }
2305  case kArmS128Dup: {
2306  NeonSize size = static_cast<NeonSize>(i.InputInt32(1));
2307  int lanes = kSimd128Size >> size;
2308  int index = i.InputInt32(2);
2309  DCHECK(index < lanes);
2310  int d_lanes = lanes / 2;
2311  int src_d_index = index & (d_lanes - 1);
2312  int src_d_code = i.InputSimd128Register(0).low().code() + index / d_lanes;
2313  __ vdup(size, i.OutputSimd128Register(),
2314  DwVfpRegister::from_code(src_d_code), src_d_index);
2315  break;
2316  }
2317  case kArmS128And: {
2318  __ vand(i.OutputSimd128Register(), i.InputSimd128Register(0),
2319  i.InputSimd128Register(1));
2320  break;
2321  }
2322  case kArmS128Or: {
2323  __ vorr(i.OutputSimd128Register(), i.InputSimd128Register(0),
2324  i.InputSimd128Register(1));
2325  break;
2326  }
2327  case kArmS128Xor: {
2328  __ veor(i.OutputSimd128Register(), i.InputSimd128Register(0),
2329  i.InputSimd128Register(1));
2330  break;
2331  }
2332  case kArmS128Not: {
2333  __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
2334  break;
2335  }
2336  case kArmS128Select: {
2337  Simd128Register dst = i.OutputSimd128Register();
2338  DCHECK(dst == i.InputSimd128Register(0));
2339  __ vbsl(dst, i.InputSimd128Register(1), i.InputSimd128Register(2));
2340  break;
2341  }
2342  case kArmS32x4ZipLeft: {
2343  Simd128Register dst = i.OutputSimd128Register(),
2344  src1 = i.InputSimd128Register(1);
2345  DCHECK(dst == i.InputSimd128Register(0));
2346  // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2347  __ vmov(dst.high(), src1.low()); // dst = [0, 1, 4, 5]
2348  __ vtrn(Neon32, dst.low(), dst.high()); // dst = [0, 4, 1, 5]
2349  break;
2350  }
2351  case kArmS32x4ZipRight: {
2352  Simd128Register dst = i.OutputSimd128Register(),
2353  src1 = i.InputSimd128Register(1);
2354  DCHECK(dst == i.InputSimd128Register(0));
2355  // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from ZipLeft).
2356  __ vmov(dst.low(), src1.high()); // dst = [2, 3, 6, 7]
2357  __ vtrn(Neon32, dst.low(), dst.high()); // dst = [2, 6, 3, 7]
2358  break;
2359  }
2360  case kArmS32x4UnzipLeft: {
2361  Simd128Register dst = i.OutputSimd128Register(),
2362  src1 = i.InputSimd128Register(1);
2363  DCHECK(dst == i.InputSimd128Register(0));
2364  UseScratchRegisterScope temps(tasm());
2365  Simd128Register scratch = temps.AcquireQ();
2366  // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2367  __ vmov(scratch, src1);
2368  __ vuzp(Neon32, dst, scratch); // dst = [0, 2, 4, 6]
2369  break;
2370  }
2371  case kArmS32x4UnzipRight: {
2372  Simd128Register dst = i.OutputSimd128Register(),
2373  src1 = i.InputSimd128Register(1);
2374  DCHECK(dst == i.InputSimd128Register(0));
2375  UseScratchRegisterScope temps(tasm());
2376  Simd128Register scratch = temps.AcquireQ();
2377  // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from UnzipLeft).
2378  __ vmov(scratch, src1);
2379  __ vuzp(Neon32, scratch, dst); // dst = [1, 3, 5, 7]
2380  break;
2381  }
2382  case kArmS32x4TransposeLeft: {
2383  Simd128Register dst = i.OutputSimd128Register(),
2384  src1 = i.InputSimd128Register(1);
2385  DCHECK(dst == i.InputSimd128Register(0));
2386  UseScratchRegisterScope temps(tasm());
2387  Simd128Register scratch = temps.AcquireQ();
2388  // src0 = [0, 1, 2, 3], src1 = [4, 5, 6, 7]
2389  __ vmov(scratch, src1);
2390  __ vtrn(Neon32, dst, scratch); // dst = [0, 4, 2, 6]
2391  break;
2392  }
2393  case kArmS32x4Shuffle: {
2394  Simd128Register dst = i.OutputSimd128Register(),
2395  src0 = i.InputSimd128Register(0),
2396  src1 = i.InputSimd128Register(1);
2397  DCHECK_NE(dst, src0);
2398  DCHECK_NE(dst, src1);
2399  // Perform shuffle as a vmov per lane.
2400  int dst_code = dst.code() * 4;
2401  int src0_code = src0.code() * 4;
2402  int src1_code = src1.code() * 4;
2403  int32_t shuffle = i.InputInt32(2);
2404  for (int i = 0; i < 4; i++) {
2405  int lane = shuffle & 0x7;
2406  int src_code = src0_code;
2407  if (lane >= 4) {
2408  src_code = src1_code;
2409  lane &= 0x3;
2410  }
2411  __ VmovExtended(dst_code + i, src_code + lane);
2412  shuffle >>= 8;
2413  }
2414  break;
2415  }
2416  case kArmS32x4TransposeRight: {
2417  Simd128Register dst = i.OutputSimd128Register(),
2418  src1 = i.InputSimd128Register(1);
2419  UseScratchRegisterScope temps(tasm());
2420  Simd128Register scratch = temps.AcquireQ();
2421  DCHECK(dst == i.InputSimd128Register(0));
2422  // src0 = [4, 5, 6, 7], src1 = [0, 1, 2, 3] (flipped from TransposeLeft).
2423  __ vmov(scratch, src1);
2424  __ vtrn(Neon32, scratch, dst); // dst = [1, 5, 3, 7]
2425  break;
2426  }
2427  case kArmS16x8ZipLeft: {
2428  Simd128Register dst = i.OutputSimd128Register(),
2429  src1 = i.InputSimd128Register(1);
2430  // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2431  DCHECK(dst == i.InputSimd128Register(0));
2432  __ vmov(dst.high(), src1.low()); // dst = [0, 1, 2, 3, 8, ... 11]
2433  __ vzip(Neon16, dst.low(), dst.high()); // dst = [0, 8, 1, 9, ... 11]
2434  break;
2435  }
2436  case kArmS16x8ZipRight: {
2437  Simd128Register dst = i.OutputSimd128Register(),
2438  src1 = i.InputSimd128Register(1);
2439  DCHECK(dst == i.InputSimd128Register(0));
2440  // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2441  __ vmov(dst.low(), src1.high());
2442  __ vzip(Neon16, dst.low(), dst.high()); // dst = [4, 12, 5, 13, ... 15]
2443  break;
2444  }
2445  case kArmS16x8UnzipLeft: {
2446  Simd128Register dst = i.OutputSimd128Register(),
2447  src1 = i.InputSimd128Register(1);
2448  UseScratchRegisterScope temps(tasm());
2449  Simd128Register scratch = temps.AcquireQ();
2450  DCHECK(dst == i.InputSimd128Register(0));
2451  // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2452  __ vmov(scratch, src1);
2453  __ vuzp(Neon16, dst, scratch); // dst = [0, 2, 4, 6, ... 14]
2454  break;
2455  }
2456  case kArmS16x8UnzipRight: {
2457  Simd128Register dst = i.OutputSimd128Register(),
2458  src1 = i.InputSimd128Register(1);
2459  UseScratchRegisterScope temps(tasm());
2460  Simd128Register scratch = temps.AcquireQ();
2461  DCHECK(dst == i.InputSimd128Register(0));
2462  // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2463  __ vmov(scratch, src1);
2464  __ vuzp(Neon16, scratch, dst); // dst = [1, 3, 5, 7, ... 15]
2465  break;
2466  }
2467  case kArmS16x8TransposeLeft: {
2468  Simd128Register dst = i.OutputSimd128Register(),
2469  src1 = i.InputSimd128Register(1);
2470  UseScratchRegisterScope temps(tasm());
2471  Simd128Register scratch = temps.AcquireQ();
2472  DCHECK(dst == i.InputSimd128Register(0));
2473  // src0 = [0, 1, 2, 3, ... 7], src1 = [8, 9, 10, 11, ... 15]
2474  __ vmov(scratch, src1);
2475  __ vtrn(Neon16, dst, scratch); // dst = [0, 8, 2, 10, ... 14]
2476  break;
2477  }
2478  case kArmS16x8TransposeRight: {
2479  Simd128Register dst = i.OutputSimd128Register(),
2480  src1 = i.InputSimd128Register(1);
2481  UseScratchRegisterScope temps(tasm());
2482  Simd128Register scratch = temps.AcquireQ();
2483  DCHECK(dst == i.InputSimd128Register(0));
2484  // src0 = [8, 9, 10, 11, ... 15], src1 = [0, 1, 2, 3, ... 7] (flipped).
2485  __ vmov(scratch, src1);
2486  __ vtrn(Neon16, scratch, dst); // dst = [1, 9, 3, 11, ... 15]
2487  break;
2488  }
2489  case kArmS8x16ZipLeft: {
2490  Simd128Register dst = i.OutputSimd128Register(),
2491  src1 = i.InputSimd128Register(1);
2492  DCHECK(dst == i.InputSimd128Register(0));
2493  // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2494  __ vmov(dst.high(), src1.low());
2495  __ vzip(Neon8, dst.low(), dst.high()); // dst = [0, 16, 1, 17, ... 23]
2496  break;
2497  }
2498  case kArmS8x16ZipRight: {
2499  Simd128Register dst = i.OutputSimd128Register(),
2500  src1 = i.InputSimd128Register(1);
2501  DCHECK(dst == i.InputSimd128Register(0));
2502  // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2503  __ vmov(dst.low(), src1.high());
2504  __ vzip(Neon8, dst.low(), dst.high()); // dst = [8, 24, 9, 25, ... 31]
2505  break;
2506  }
2507  case kArmS8x16UnzipLeft: {
2508  Simd128Register dst = i.OutputSimd128Register(),
2509  src1 = i.InputSimd128Register(1);
2510  UseScratchRegisterScope temps(tasm());
2511  Simd128Register scratch = temps.AcquireQ();
2512  DCHECK(dst == i.InputSimd128Register(0));
2513  // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2514  __ vmov(scratch, src1);
2515  __ vuzp(Neon8, dst, scratch); // dst = [0, 2, 4, 6, ... 30]
2516  break;
2517  }
2518  case kArmS8x16UnzipRight: {
2519  Simd128Register dst = i.OutputSimd128Register(),
2520  src1 = i.InputSimd128Register(1);
2521  UseScratchRegisterScope temps(tasm());
2522  Simd128Register scratch = temps.AcquireQ();
2523  DCHECK(dst == i.InputSimd128Register(0));
2524  // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2525  __ vmov(scratch, src1);
2526  __ vuzp(Neon8, scratch, dst); // dst = [1, 3, 5, 7, ... 31]
2527  break;
2528  }
2529  case kArmS8x16TransposeLeft: {
2530  Simd128Register dst = i.OutputSimd128Register(),
2531  src1 = i.InputSimd128Register(1);
2532  UseScratchRegisterScope temps(tasm());
2533  Simd128Register scratch = temps.AcquireQ();
2534  DCHECK(dst == i.InputSimd128Register(0));
2535  // src0 = [0, 1, 2, 3, ... 15], src1 = [16, 17, 18, 19, ... 31]
2536  __ vmov(scratch, src1);
2537  __ vtrn(Neon8, dst, scratch); // dst = [0, 16, 2, 18, ... 30]
2538  break;
2539  }
2540  case kArmS8x16TransposeRight: {
2541  Simd128Register dst = i.OutputSimd128Register(),
2542  src1 = i.InputSimd128Register(1);
2543  UseScratchRegisterScope temps(tasm());
2544  Simd128Register scratch = temps.AcquireQ();
2545  DCHECK(dst == i.InputSimd128Register(0));
2546  // src0 = [16, 17, 18, 19, ... 31], src1 = [0, 1, 2, 3, ... 15] (flipped).
2547  __ vmov(scratch, src1);
2548  __ vtrn(Neon8, scratch, dst); // dst = [1, 17, 3, 19, ... 31]
2549  break;
2550  }
2551  case kArmS8x16Concat: {
2552  __ vext(i.OutputSimd128Register(), i.InputSimd128Register(0),
2553  i.InputSimd128Register(1), i.InputInt4(2));
2554  break;
2555  }
2556  case kArmS8x16Shuffle: {
2557  Simd128Register dst = i.OutputSimd128Register(),
2558  src0 = i.InputSimd128Register(0),
2559  src1 = i.InputSimd128Register(1);
2560  DwVfpRegister table_base = src0.low();
2561  UseScratchRegisterScope temps(tasm());
2562  Simd128Register scratch = temps.AcquireQ();
2563  // If unary shuffle, table is src0 (2 d-registers), otherwise src0 and
2564  // src1. They must be consecutive.
2565  int table_size = src0 == src1 ? 2 : 4;
2566  DCHECK_IMPLIES(src0 != src1, src0.code() + 1 == src1.code());
2567  // The shuffle lane mask is a byte mask, materialize in scratch.
2568  int scratch_s_base = scratch.code() * 4;
2569  for (int j = 0; j < 4; j++) {
2570  uint32_t four_lanes = i.InputUint32(2 + j);
2571  // Ensure byte indices are in [0, 31] so masks are never NaNs.
2572  four_lanes &= 0x1F1F1F1F;
2573  __ vmov(SwVfpRegister::from_code(scratch_s_base + j),
2574  Float32::FromBits(four_lanes));
2575  }
2576  NeonListOperand table(table_base, table_size);
2577  if (dst != src0 && dst != src1) {
2578  __ vtbl(dst.low(), table, scratch.low());
2579  __ vtbl(dst.high(), table, scratch.high());
2580  } else {
2581  __ vtbl(scratch.low(), table, scratch.low());
2582  __ vtbl(scratch.high(), table, scratch.high());
2583  __ vmov(dst, scratch);
2584  }
2585  break;
2586  }
2587  case kArmS32x2Reverse: {
2588  __ vrev64(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0));
2589  break;
2590  }
2591  case kArmS16x4Reverse: {
2592  __ vrev64(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2593  break;
2594  }
2595  case kArmS16x2Reverse: {
2596  __ vrev32(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0));
2597  break;
2598  }
2599  case kArmS8x8Reverse: {
2600  __ vrev64(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2601  break;
2602  }
2603  case kArmS8x4Reverse: {
2604  __ vrev32(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2605  break;
2606  }
2607  case kArmS8x2Reverse: {
2608  __ vrev16(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0));
2609  break;
2610  }
2611  case kArmS1x4AnyTrue: {
2612  const QwNeonRegister& src = i.InputSimd128Register(0);
2613  UseScratchRegisterScope temps(tasm());
2614  DwVfpRegister scratch = temps.AcquireD();
2615  __ vpmax(NeonU32, scratch, src.low(), src.high());
2616  __ vpmax(NeonU32, scratch, scratch, scratch);
2617  __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
2618  break;
2619  }
2620  case kArmS1x4AllTrue: {
2621  const QwNeonRegister& src = i.InputSimd128Register(0);
2622  UseScratchRegisterScope temps(tasm());
2623  DwVfpRegister scratch = temps.AcquireD();
2624  __ vpmin(NeonU32, scratch, src.low(), src.high());
2625  __ vpmin(NeonU32, scratch, scratch, scratch);
2626  __ ExtractLane(i.OutputRegister(), scratch, NeonS32, 0);
2627  break;
2628  }
2629  case kArmS1x8AnyTrue: {
2630  const QwNeonRegister& src = i.InputSimd128Register(0);
2631  UseScratchRegisterScope temps(tasm());
2632  DwVfpRegister scratch = temps.AcquireD();
2633  __ vpmax(NeonU16, scratch, src.low(), src.high());
2634  __ vpmax(NeonU16, scratch, scratch, scratch);
2635  __ vpmax(NeonU16, scratch, scratch, scratch);
2636  __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
2637  break;
2638  }
2639  case kArmS1x8AllTrue: {
2640  const QwNeonRegister& src = i.InputSimd128Register(0);
2641  UseScratchRegisterScope temps(tasm());
2642  DwVfpRegister scratch = temps.AcquireD();
2643  __ vpmin(NeonU16, scratch, src.low(), src.high());
2644  __ vpmin(NeonU16, scratch, scratch, scratch);
2645  __ vpmin(NeonU16, scratch, scratch, scratch);
2646  __ ExtractLane(i.OutputRegister(), scratch, NeonS16, 0);
2647  break;
2648  }
2649  case kArmS1x16AnyTrue: {
2650  const QwNeonRegister& src = i.InputSimd128Register(0);
2651  UseScratchRegisterScope temps(tasm());
2652  QwNeonRegister q_scratch = temps.AcquireQ();
2653  DwVfpRegister d_scratch = q_scratch.low();
2654  __ vpmax(NeonU8, d_scratch, src.low(), src.high());
2655  __ vpmax(NeonU8, d_scratch, d_scratch, d_scratch);
2656  // vtst to detect any bits in the bottom 32 bits of d_scratch.
2657  // This saves an instruction vs. the naive sequence of vpmax.
2658  // kDoubleRegZero is not changed, since it is 0.
2659  __ vtst(Neon32, q_scratch, q_scratch, q_scratch);
2660  __ ExtractLane(i.OutputRegister(), d_scratch, NeonS32, 0);
2661  break;
2662  }
2663  case kArmS1x16AllTrue: {
2664  const QwNeonRegister& src = i.InputSimd128Register(0);
2665  UseScratchRegisterScope temps(tasm());
2666  DwVfpRegister scratch = temps.AcquireD();
2667  __ vpmin(NeonU8, scratch, src.low(), src.high());
2668  __ vpmin(NeonU8, scratch, scratch, scratch);
2669  __ vpmin(NeonU8, scratch, scratch, scratch);
2670  __ vpmin(NeonU8, scratch, scratch, scratch);
2671  __ ExtractLane(i.OutputRegister(), scratch, NeonS8, 0);
2672  break;
2673  }
2674  case kWord32AtomicLoadInt8:
2675  ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsb);
2676  break;
2677  case kWord32AtomicLoadUint8:
2678  ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrb);
2679  break;
2680  case kWord32AtomicLoadInt16:
2681  ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrsh);
2682  break;
2683  case kWord32AtomicLoadUint16:
2684  ASSEMBLE_ATOMIC_LOAD_INTEGER(ldrh);
2685  break;
2686  case kWord32AtomicLoadWord32:
2687  ASSEMBLE_ATOMIC_LOAD_INTEGER(ldr);
2688  break;
2689  case kWord32AtomicStoreWord8:
2690  ASSEMBLE_ATOMIC_STORE_INTEGER(strb);
2691  break;
2692  case kWord32AtomicStoreWord16:
2693  ASSEMBLE_ATOMIC_STORE_INTEGER(strh);
2694  break;
2695  case kWord32AtomicStoreWord32:
2696  ASSEMBLE_ATOMIC_STORE_INTEGER(str);
2697  break;
2698  case kWord32AtomicExchangeInt8:
2699  ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
2700  __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
2701  break;
2702  case kWord32AtomicExchangeUint8:
2703  ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexb, strexb);
2704  break;
2705  case kWord32AtomicExchangeInt16:
2706  ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
2707  __ sxth(i.OutputRegister(0), i.OutputRegister(0));
2708  break;
2709  case kWord32AtomicExchangeUint16:
2710  ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrexh, strexh);
2711  break;
2712  case kWord32AtomicExchangeWord32:
2713  ASSEMBLE_ATOMIC_EXCHANGE_INTEGER(ldrex, strex);
2714  break;
2715  case kWord32AtomicCompareExchangeInt8:
2716  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2717  __ uxtb(i.TempRegister(2), i.InputRegister(2));
2718  ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
2719  i.TempRegister(2));
2720  __ sxtb(i.OutputRegister(0), i.OutputRegister(0));
2721  break;
2722  case kWord32AtomicCompareExchangeUint8:
2723  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2724  __ uxtb(i.TempRegister(2), i.InputRegister(2));
2725  ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexb, strexb,
2726  i.TempRegister(2));
2727  break;
2728  case kWord32AtomicCompareExchangeInt16:
2729  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2730  __ uxth(i.TempRegister(2), i.InputRegister(2));
2731  ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
2732  i.TempRegister(2));
2733  __ sxth(i.OutputRegister(0), i.OutputRegister(0));
2734  break;
2735  case kWord32AtomicCompareExchangeUint16:
2736  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2737  __ uxth(i.TempRegister(2), i.InputRegister(2));
2738  ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrexh, strexh,
2739  i.TempRegister(2));
2740  break;
2741  case kWord32AtomicCompareExchangeWord32:
2742  __ add(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1));
2743  ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER(ldrex, strex,
2744  i.InputRegister(2));
2745  break;
2746 #define ATOMIC_BINOP_CASE(op, inst) \
2747  case kWord32Atomic##op##Int8: \
2748  ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
2749  __ sxtb(i.OutputRegister(0), i.OutputRegister(0)); \
2750  break; \
2751  case kWord32Atomic##op##Uint8: \
2752  ASSEMBLE_ATOMIC_BINOP(ldrexb, strexb, inst); \
2753  break; \
2754  case kWord32Atomic##op##Int16: \
2755  ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
2756  __ sxth(i.OutputRegister(0), i.OutputRegister(0)); \
2757  break; \
2758  case kWord32Atomic##op##Uint16: \
2759  ASSEMBLE_ATOMIC_BINOP(ldrexh, strexh, inst); \
2760  break; \
2761  case kWord32Atomic##op##Word32: \
2762  ASSEMBLE_ATOMIC_BINOP(ldrex, strex, inst); \
2763  break;
2764  ATOMIC_BINOP_CASE(Add, add)
2765  ATOMIC_BINOP_CASE(Sub, sub)
2766  ATOMIC_BINOP_CASE(And, and_)
2767  ATOMIC_BINOP_CASE(Or, orr)
2768  ATOMIC_BINOP_CASE(Xor, eor)
2769 #undef ATOMIC_BINOP_CASE
2770  case kArmWord32AtomicPairLoad: {
2771  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r0, r1));
2772  __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
2773  __ ldrexd(r0, r1, i.TempRegister(0));
2774  __ dmb(ISH);
2775  break;
2776  }
2777  case kArmWord32AtomicPairStore: {
2778  Label store;
2779  __ add(i.TempRegister(0), i.InputRegister(0), i.InputRegister(1));
2780  __ dmb(ISH);
2781  __ bind(&store);
2782  __ ldrexd(i.TempRegister(1), i.TempRegister(2), i.TempRegister(0));
2783  __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
2784  i.TempRegister(0));
2785  __ teq(i.TempRegister(1), Operand(0));
2786  __ b(ne, &store);
2787  __ dmb(ISH);
2788  break;
2789  }
2790 #define ATOMIC_ARITH_BINOP_CASE(op, instr1, instr2) \
2791  case kArmWord32AtomicPair##op: { \
2792  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
2793  ASSEMBLE_ATOMIC64_ARITH_BINOP(instr1, instr2); \
2794  break; \
2795  }
2796  ATOMIC_ARITH_BINOP_CASE(Add, add, adc)
2797  ATOMIC_ARITH_BINOP_CASE(Sub, sub, sbc)
2798 #undef ATOMIC_ARITH_BINOP_CASE
2799 #define ATOMIC_LOGIC_BINOP_CASE(op, instr1) \
2800  case kArmWord32AtomicPair##op: { \
2801  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3)); \
2802  ASSEMBLE_ATOMIC64_LOGIC_BINOP(instr1); \
2803  break; \
2804  }
2805  ATOMIC_LOGIC_BINOP_CASE(And, and_)
2806  ATOMIC_LOGIC_BINOP_CASE(Or, orr)
2807  ATOMIC_LOGIC_BINOP_CASE(Xor, eor)
2808 #undef ATOMIC_LOGIC_BINOP_CASE
2809  case kArmWord32AtomicPairExchange: {
2810  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r6, r7));
2811  Label exchange;
2812  __ add(i.TempRegister(0), i.InputRegister(2), i.InputRegister(3));
2813  __ dmb(ISH);
2814  __ bind(&exchange);
2815  __ ldrexd(r6, r7, i.TempRegister(0));
2816  __ strexd(i.TempRegister(1), i.InputRegister(0), i.InputRegister(1),
2817  i.TempRegister(0));
2818  __ teq(i.TempRegister(1), Operand(0));
2819  __ b(ne, &exchange);
2820  __ dmb(ISH);
2821  break;
2822  }
2823  case kArmWord32AtomicPairCompareExchange: {
2824  DCHECK(VerifyOutputOfAtomicPairInstr(&i, instr, r2, r3));
2825  __ add(i.TempRegister(0), i.InputRegister(4), i.InputRegister(5));
2826  Label compareExchange;
2827  Label exit;
2828  __ dmb(ISH);
2829  __ bind(&compareExchange);
2830  __ ldrexd(r2, r3, i.TempRegister(0));
2831  __ teq(i.InputRegister(0), Operand(r2));
2832  __ b(ne, &exit);
2833  __ teq(i.InputRegister(1), Operand(r3));
2834  __ b(ne, &exit);
2835  __ strexd(i.TempRegister(1), i.InputRegister(2), i.InputRegister(3),
2836  i.TempRegister(0));
2837  __ teq(i.TempRegister(1), Operand(0));
2838  __ b(ne, &compareExchange);
2839  __ bind(&exit);
2840  __ dmb(ISH);
2841  break;
2842  }
2843 #undef ASSEMBLE_ATOMIC_LOAD_INTEGER
2844 #undef ASSEMBLE_ATOMIC_STORE_INTEGER
2845 #undef ASSEMBLE_ATOMIC_EXCHANGE_INTEGER
2846 #undef ASSEMBLE_ATOMIC_COMPARE_EXCHANGE_INTEGER
2847 #undef ASSEMBLE_ATOMIC_BINOP
2848 #undef ASSEMBLE_ATOMIC64_ARITH_BINOP
2849 #undef ASSEMBLE_ATOMIC64_LOGIC_BINOP
2850 #undef ASSEMBLE_IEEE754_BINOP
2851 #undef ASSEMBLE_IEEE754_UNOP
2852 #undef ASSEMBLE_NEON_NARROWING_OP
2853 #undef ASSEMBLE_NEON_PAIRWISE_OP
2854  }
2855  return kSuccess;
2856 } // NOLINT(readability/fn_size)
2857 
2858 // Assembles branches after an instruction.
2859 void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
2860  ArmOperandConverter i(this, instr);
2861  Label* tlabel = branch->true_label;
2862  Label* flabel = branch->false_label;
2863  Condition cc = FlagsConditionToCondition(branch->condition);
2864  __ b(cc, tlabel);
2865  if (!branch->fallthru) __ b(flabel); // no fallthru to flabel.
2866 }
2867 
2868 void CodeGenerator::AssembleBranchPoisoning(FlagsCondition condition,
2869  Instruction* instr) {
2870  // TODO(jarin) Handle float comparisons (kUnordered[Not]Equal).
2871  if (condition == kUnorderedEqual || condition == kUnorderedNotEqual) {
2872  return;
2873  }
2874 
2875  condition = NegateFlagsCondition(condition);
2876  __ eor(kSpeculationPoisonRegister, kSpeculationPoisonRegister,
2877  Operand(kSpeculationPoisonRegister), SBit::LeaveCC,
2878  FlagsConditionToCondition(condition));
2879  __ csdb();
2880 }
2881 
2882 void CodeGenerator::AssembleArchDeoptBranch(Instruction* instr,
2883  BranchInfo* branch) {
2884  AssembleArchBranch(instr, branch);
2885 }
2886 
2887 void CodeGenerator::AssembleArchJump(RpoNumber target) {
2888  if (!IsNextInAssemblyOrder(target)) __ b(GetLabel(target));
2889 }
2890 
2891 void CodeGenerator::AssembleArchTrap(Instruction* instr,
2892  FlagsCondition condition) {
2893  class OutOfLineTrap final : public OutOfLineCode {
2894  public:
2895  OutOfLineTrap(CodeGenerator* gen, Instruction* instr)
2896  : OutOfLineCode(gen), instr_(instr), gen_(gen) {}
2897 
2898  void Generate() final {
2899  ArmOperandConverter i(gen_, instr_);
2900  TrapId trap_id =
2901  static_cast<TrapId>(i.InputInt32(instr_->InputCount() - 1));
2902  GenerateCallToTrap(trap_id);
2903  }
2904 
2905  private:
2906  void GenerateCallToTrap(TrapId trap_id) {
2907  if (trap_id == TrapId::kInvalid) {
2908  // We cannot test calls to the runtime in cctest/test-run-wasm.
2909  // Therefore we emit a call to C here instead of a call to the runtime.
2910  // We use the context register as the scratch register, because we do
2911  // not have a context here.
2912  __ PrepareCallCFunction(0, 0);
2913  __ CallCFunction(
2914  ExternalReference::wasm_call_trap_callback_for_testing(), 0);
2915  __ LeaveFrame(StackFrame::WASM_COMPILED);
2916  auto call_descriptor = gen_->linkage()->GetIncomingDescriptor();
2917  int pop_count =
2918  static_cast<int>(call_descriptor->StackParameterCount());
2919  __ Drop(pop_count);
2920  __ Ret();
2921  } else {
2922  gen_->AssembleSourcePosition(instr_);
2923  // A direct call to a wasm runtime stub defined in this module.
2924  // Just encode the stub index. This will be patched when the code
2925  // is added to the native module and copied into wasm code space.
2926  __ Call(static_cast<Address>(trap_id), RelocInfo::WASM_STUB_CALL);
2927  ReferenceMap* reference_map =
2928  new (gen_->zone()) ReferenceMap(gen_->zone());
2929  gen_->RecordSafepoint(reference_map, Safepoint::kSimple, 0,
2930  Safepoint::kNoLazyDeopt);
2931  if (FLAG_debug_code) {
2932  __ stop(GetAbortReason(AbortReason::kUnexpectedReturnFromWasmTrap));
2933  }
2934  }
2935  }
2936 
2937  Instruction* instr_;
2938  CodeGenerator* gen_;
2939  };
2940  auto ool = new (zone()) OutOfLineTrap(this, instr);
2941  Label* tlabel = ool->entry();
2942  Condition cc = FlagsConditionToCondition(condition);
2943  __ b(cc, tlabel);
2944 }
2945 
2946 // Assembles boolean materializations after an instruction.
2947 void CodeGenerator::AssembleArchBoolean(Instruction* instr,
2948  FlagsCondition condition) {
2949  ArmOperandConverter i(this, instr);
2950 
2951  // Materialize a full 32-bit 1 or 0 value. The result register is always the
2952  // last output of the instruction.
2953  DCHECK_NE(0u, instr->OutputCount());
2954  Register reg = i.OutputRegister(instr->OutputCount() - 1);
2955  Condition cc = FlagsConditionToCondition(condition);
2956  __ mov(reg, Operand(0));
2957  __ mov(reg, Operand(1), LeaveCC, cc);
2958 }
2959 
2960 void CodeGenerator::AssembleArchBinarySearchSwitch(Instruction* instr) {
2961  ArmOperandConverter i(this, instr);
2962  Register input = i.InputRegister(0);
2963  std::vector<std::pair<int32_t, Label*>> cases;
2964  for (size_t index = 2; index < instr->InputCount(); index += 2) {
2965  cases.push_back({i.InputInt32(index + 0), GetLabel(i.InputRpo(index + 1))});
2966  }
2967  AssembleArchBinarySearchSwitchRange(input, i.InputRpo(1), cases.data(),
2968  cases.data() + cases.size());
2969 }
2970 
2971 void CodeGenerator::AssembleArchLookupSwitch(Instruction* instr) {
2972  ArmOperandConverter i(this, instr);
2973  Register input = i.InputRegister(0);
2974  for (size_t index = 2; index < instr->InputCount(); index += 2) {
2975  __ cmp(input, Operand(i.InputInt32(index + 0)));
2976  __ b(eq, GetLabel(i.InputRpo(index + 1)));
2977  }
2978  AssembleArchJump(i.InputRpo(1));
2979 }
2980 
2981 void CodeGenerator::AssembleArchTableSwitch(Instruction* instr) {
2982  ArmOperandConverter i(this, instr);
2983  Register input = i.InputRegister(0);
2984  size_t const case_count = instr->InputCount() - 2;
2985  // Ensure to emit the constant pool first if necessary.
2986  __ CheckConstPool(true, true);
2987  __ cmp(input, Operand(case_count));
2988  __ BlockConstPoolFor(case_count + 2);
2989  __ add(pc, pc, Operand(input, LSL, 2), LeaveCC, lo);
2990  __ b(GetLabel(i.InputRpo(1)));
2991  for (size_t index = 0; index < case_count; ++index) {
2992  __ b(GetLabel(i.InputRpo(index + 2)));
2993  }
2994 }
2995 
2996 void CodeGenerator::FinishFrame(Frame* frame) {
2997  auto call_descriptor = linkage()->GetIncomingDescriptor();
2998 
2999  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3000  if (saves_fp != 0) {
3001  frame->AlignSavedCalleeRegisterSlots();
3002  }
3003 
3004  if (saves_fp != 0) {
3005  // Save callee-saved FP registers.
3006  STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3007  uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3008  uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3009  DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
3010  frame->AllocateSavedCalleeRegisterSlots((last - first + 1) *
3011  (kDoubleSize / kPointerSize));
3012  }
3013  const RegList saves = call_descriptor->CalleeSavedRegisters();
3014  if (saves != 0) {
3015  // Save callee-saved registers.
3016  frame->AllocateSavedCalleeRegisterSlots(base::bits::CountPopulation(saves));
3017  }
3018 }
3019 
3020 void CodeGenerator::AssembleConstructFrame() {
3021  auto call_descriptor = linkage()->GetIncomingDescriptor();
3022  if (frame_access_state()->has_frame()) {
3023  if (call_descriptor->IsCFunctionCall()) {
3024  __ Push(lr, fp);
3025  __ mov(fp, sp);
3026  } else if (call_descriptor->IsJSFunctionCall()) {
3027  __ Prologue();
3028  if (call_descriptor->PushArgumentCount()) {
3029  __ Push(kJavaScriptCallArgCountRegister);
3030  }
3031  } else {
3032  __ StubPrologue(info()->GetOutputStackFrameType());
3033  if (call_descriptor->IsWasmFunctionCall()) {
3034  __ Push(kWasmInstanceRegister);
3035  } else if (call_descriptor->IsWasmImportWrapper()) {
3036  // WASM import wrappers are passed a tuple in the place of the instance.
3037  // Unpack the tuple into the instance and the target callable.
3038  // This must be done here in the codegen because it cannot be expressed
3039  // properly in the graph.
3040  __ ldr(kJSFunctionRegister,
3041  FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue2Offset));
3042  __ ldr(kWasmInstanceRegister,
3043  FieldMemOperand(kWasmInstanceRegister, Tuple2::kValue1Offset));
3044  __ Push(kWasmInstanceRegister);
3045  }
3046  }
3047 
3048  unwinding_info_writer_.MarkFrameConstructed(__ pc_offset());
3049  }
3050 
3051  int shrink_slots = frame()->GetTotalFrameSlotCount() -
3052  call_descriptor->CalculateFixedFrameSize();
3053 
3054  if (info()->is_osr()) {
3055  // TurboFan OSR-compiled functions cannot be entered directly.
3056  __ Abort(AbortReason::kShouldNotDirectlyEnterOsrFunction);
3057 
3058  // Unoptimized code jumps directly to this entrypoint while the unoptimized
3059  // frame is still on the stack. Optimized code uses OSR values directly from
3060  // the unoptimized frame. Thus, all that needs to be done is to allocate the
3061  // remaining stack slots.
3062  if (FLAG_code_comments) __ RecordComment("-- OSR entrypoint --");
3063  osr_pc_offset_ = __ pc_offset();
3064  shrink_slots -= osr_helper()->UnoptimizedFrameSlots();
3065  ResetSpeculationPoison();
3066  }
3067 
3068  const RegList saves = call_descriptor->CalleeSavedRegisters();
3069  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3070 
3071  if (shrink_slots > 0) {
3072  DCHECK(frame_access_state()->has_frame());
3073  if (info()->IsWasm() && shrink_slots > 128) {
3074  // For WebAssembly functions with big frames we have to do the stack
3075  // overflow check before we construct the frame. Otherwise we may not
3076  // have enough space on the stack to call the runtime for the stack
3077  // overflow.
3078  Label done;
3079 
3080  // If the frame is bigger than the stack, we throw the stack overflow
3081  // exception unconditionally. Thereby we can avoid the integer overflow
3082  // check in the condition code.
3083  if ((shrink_slots * kPointerSize) < (FLAG_stack_size * 1024)) {
3084  UseScratchRegisterScope temps(tasm());
3085  Register scratch = temps.Acquire();
3086  __ ldr(scratch, FieldMemOperand(
3087  kWasmInstanceRegister,
3088  WasmInstanceObject::kRealStackLimitAddressOffset));
3089  __ ldr(scratch, MemOperand(scratch));
3090  __ add(scratch, scratch, Operand(shrink_slots * kPointerSize));
3091  __ cmp(sp, scratch);
3092  __ b(cs, &done);
3093  }
3094 
3095  __ ldr(r2, FieldMemOperand(kWasmInstanceRegister,
3096  WasmInstanceObject::kCEntryStubOffset));
3097  __ Move(cp, Smi::zero());
3098  __ CallRuntimeWithCEntry(Runtime::kThrowWasmStackOverflow, r2);
3099  // We come from WebAssembly, there are no references for the GC.
3100  ReferenceMap* reference_map = new (zone()) ReferenceMap(zone());
3101  RecordSafepoint(reference_map, Safepoint::kSimple, 0,
3102  Safepoint::kNoLazyDeopt);
3103  if (FLAG_debug_code) {
3104  __ stop(GetAbortReason(AbortReason::kUnexpectedReturnFromThrow));
3105  }
3106 
3107  __ bind(&done);
3108  }
3109 
3110  // Skip callee-saved and return slots, which are pushed below.
3111  shrink_slots -= base::bits::CountPopulation(saves);
3112  shrink_slots -= frame()->GetReturnSlotCount();
3113  shrink_slots -= 2 * base::bits::CountPopulation(saves_fp);
3114  if (shrink_slots > 0) {
3115  __ sub(sp, sp, Operand(shrink_slots * kPointerSize));
3116  }
3117  }
3118 
3119  if (saves_fp != 0) {
3120  // Save callee-saved FP registers.
3121  STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3122  uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3123  uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3124  DCHECK_EQ((last - first + 1), base::bits::CountPopulation(saves_fp));
3125  __ vstm(db_w, sp, DwVfpRegister::from_code(first),
3126  DwVfpRegister::from_code(last));
3127  }
3128 
3129  if (saves != 0) {
3130  // Save callee-saved registers.
3131  __ stm(db_w, sp, saves);
3132  }
3133 
3134  const int returns = frame()->GetReturnSlotCount();
3135  if (returns != 0) {
3136  // Create space for returns.
3137  __ sub(sp, sp, Operand(returns * kPointerSize));
3138  }
3139 }
3140 
3141 void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
3142  auto call_descriptor = linkage()->GetIncomingDescriptor();
3143  int pop_count = static_cast<int>(call_descriptor->StackParameterCount());
3144 
3145  const int returns = frame()->GetReturnSlotCount();
3146  if (returns != 0) {
3147  // Free space of returns.
3148  __ add(sp, sp, Operand(returns * kPointerSize));
3149  }
3150 
3151  // Restore registers.
3152  const RegList saves = call_descriptor->CalleeSavedRegisters();
3153  if (saves != 0) {
3154  __ ldm(ia_w, sp, saves);
3155  }
3156 
3157  // Restore FP registers.
3158  const RegList saves_fp = call_descriptor->CalleeSavedFPRegisters();
3159  if (saves_fp != 0) {
3160  STATIC_ASSERT(DwVfpRegister::kNumRegisters == 32);
3161  uint32_t last = base::bits::CountLeadingZeros32(saves_fp) - 1;
3162  uint32_t first = base::bits::CountTrailingZeros32(saves_fp);
3163  __ vldm(ia_w, sp, DwVfpRegister::from_code(first),
3164  DwVfpRegister::from_code(last));
3165  }
3166 
3167  unwinding_info_writer_.MarkBlockWillExit();
3168 
3169  ArmOperandConverter g(this, nullptr);
3170  if (call_descriptor->IsCFunctionCall()) {
3171  AssembleDeconstructFrame();
3172  } else if (frame_access_state()->has_frame()) {
3173  // Canonicalize JSFunction return sites for now unless they have an variable
3174  // number of stack slot pops.
3175  if (pop->IsImmediate() && g.ToConstant(pop).ToInt32() == 0) {
3176  if (return_label_.is_bound()) {
3177  __ b(&return_label_);
3178  return;
3179  } else {
3180  __ bind(&return_label_);
3181  AssembleDeconstructFrame();
3182  }
3183  } else {
3184  AssembleDeconstructFrame();
3185  }
3186  }
3187 
3188  if (pop->IsImmediate()) {
3189  DCHECK_EQ(Constant::kInt32, g.ToConstant(pop).type());
3190  pop_count += g.ToConstant(pop).ToInt32();
3191  } else {
3192  __ Drop(g.ToRegister(pop));
3193  }
3194  __ Drop(pop_count);
3195  __ Ret();
3196 }
3197 
3198 void CodeGenerator::FinishCode() { __ CheckConstPool(true, false); }
3199 
3200 void CodeGenerator::AssembleMove(InstructionOperand* source,
3201  InstructionOperand* destination) {
3202  ArmOperandConverter g(this, nullptr);
3203  // Helper function to write the given constant to the dst register.
3204  auto MoveConstantToRegister = [&](Register dst, Constant src) {
3205  if (src.type() == Constant::kHeapObject) {
3206  Handle<HeapObject> src_object = src.ToHeapObject();
3207  RootIndex index;
3208  if (IsMaterializableFromRoot(src_object, &index)) {
3209  __ LoadRoot(dst, index);
3210  } else {
3211  __ Move(dst, src_object);
3212  }
3213  } else if (src.type() == Constant::kExternalReference) {
3214  __ Move(dst, src.ToExternalReference());
3215  } else {
3216  __ mov(dst, g.ToImmediate(source));
3217  }
3218  };
3219  switch (MoveType::InferMove(source, destination)) {
3220  case MoveType::kRegisterToRegister:
3221  if (source->IsRegister()) {
3222  __ mov(g.ToRegister(destination), g.ToRegister(source));
3223  } else if (source->IsFloatRegister()) {
3224  DCHECK(destination->IsFloatRegister());
3225  // GapResolver may give us reg codes that don't map to actual
3226  // s-registers. Generate code to work around those cases.
3227  int src_code = LocationOperand::cast(source)->register_code();
3228  int dst_code = LocationOperand::cast(destination)->register_code();
3229  __ VmovExtended(dst_code, src_code);
3230  } else if (source->IsDoubleRegister()) {
3231  __ Move(g.ToDoubleRegister(destination), g.ToDoubleRegister(source));
3232  } else {
3233  __ Move(g.ToSimd128Register(destination), g.ToSimd128Register(source));
3234  }
3235  return;
3236  case MoveType::kRegisterToStack: {
3237  MemOperand dst = g.ToMemOperand(destination);
3238  if (source->IsRegister()) {
3239  __ str(g.ToRegister(source), dst);
3240  } else if (source->IsFloatRegister()) {
3241  // GapResolver may give us reg codes that don't map to actual
3242  // s-registers. Generate code to work around those cases.
3243  int src_code = LocationOperand::cast(source)->register_code();
3244  __ VmovExtended(dst, src_code);
3245  } else if (source->IsDoubleRegister()) {
3246  __ vstr(g.ToDoubleRegister(source), dst);
3247  } else {
3248  UseScratchRegisterScope temps(tasm());
3249  Register temp = temps.Acquire();
3250  QwNeonRegister src = g.ToSimd128Register(source);
3251  __ add(temp, dst.rn(), Operand(dst.offset()));
3252  __ vst1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3253  }
3254  return;
3255  }
3256  case MoveType::kStackToRegister: {
3257  MemOperand src = g.ToMemOperand(source);
3258  if (source->IsStackSlot()) {
3259  __ ldr(g.ToRegister(destination), src);
3260  } else if (source->IsFloatStackSlot()) {
3261  DCHECK(destination->IsFloatRegister());
3262  // GapResolver may give us reg codes that don't map to actual
3263  // s-registers. Generate code to work around those cases.
3264  int dst_code = LocationOperand::cast(destination)->register_code();
3265  __ VmovExtended(dst_code, src);
3266  } else if (source->IsDoubleStackSlot()) {
3267  __ vldr(g.ToDoubleRegister(destination), src);
3268  } else {
3269  UseScratchRegisterScope temps(tasm());
3270  Register temp = temps.Acquire();
3271  QwNeonRegister dst = g.ToSimd128Register(destination);
3272  __ add(temp, src.rn(), Operand(src.offset()));
3273  __ vld1(Neon8, NeonListOperand(dst.low(), 2), NeonMemOperand(temp));
3274  }
3275  return;
3276  }
3277  case MoveType::kStackToStack: {
3278  MemOperand src = g.ToMemOperand(source);
3279  MemOperand dst = g.ToMemOperand(destination);
3280  UseScratchRegisterScope temps(tasm());
3281  if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3282  SwVfpRegister temp = temps.AcquireS();
3283  __ vldr(temp, src);
3284  __ vstr(temp, dst);
3285  } else if (source->IsDoubleStackSlot()) {
3286  DwVfpRegister temp = temps.AcquireD();
3287  __ vldr(temp, src);
3288  __ vstr(temp, dst);
3289  } else {
3290  DCHECK(source->IsSimd128StackSlot());
3291  Register temp = temps.Acquire();
3292  QwNeonRegister temp_q = temps.AcquireQ();
3293  __ add(temp, src.rn(), Operand(src.offset()));
3294  __ vld1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3295  __ add(temp, dst.rn(), Operand(dst.offset()));
3296  __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3297  }
3298  return;
3299  }
3300  case MoveType::kConstantToRegister: {
3301  Constant src = g.ToConstant(source);
3302  if (destination->IsRegister()) {
3303  MoveConstantToRegister(g.ToRegister(destination), src);
3304  } else if (destination->IsFloatRegister()) {
3305  __ vmov(g.ToFloatRegister(destination),
3306  Float32::FromBits(src.ToFloat32AsInt()));
3307  } else {
3308  // TODO(arm): Look into optimizing this further if possible. Supporting
3309  // the NEON version of VMOV may help.
3310  __ vmov(g.ToDoubleRegister(destination), src.ToFloat64());
3311  }
3312  return;
3313  }
3314  case MoveType::kConstantToStack: {
3315  Constant src = g.ToConstant(source);
3316  MemOperand dst = g.ToMemOperand(destination);
3317  if (destination->IsStackSlot()) {
3318  UseScratchRegisterScope temps(tasm());
3319  // Acquire a S register instead of a general purpose register in case
3320  // `vstr` needs one to compute the address of `dst`.
3321  SwVfpRegister s_temp = temps.AcquireS();
3322  {
3323  // TODO(arm): This sequence could be optimized further if necessary by
3324  // writing the constant directly into `s_temp`.
3325  UseScratchRegisterScope temps(tasm());
3326  Register temp = temps.Acquire();
3327  MoveConstantToRegister(temp, src);
3328  __ vmov(s_temp, temp);
3329  }
3330  __ vstr(s_temp, dst);
3331  } else if (destination->IsFloatStackSlot()) {
3332  UseScratchRegisterScope temps(tasm());
3333  SwVfpRegister temp = temps.AcquireS();
3334  __ vmov(temp, Float32::FromBits(src.ToFloat32AsInt()));
3335  __ vstr(temp, dst);
3336  } else {
3337  DCHECK(destination->IsDoubleStackSlot());
3338  UseScratchRegisterScope temps(tasm());
3339  DwVfpRegister temp = temps.AcquireD();
3340  // TODO(arm): Look into optimizing this further if possible. Supporting
3341  // the NEON version of VMOV may help.
3342  __ vmov(temp, src.ToFloat64());
3343  __ vstr(temp, g.ToMemOperand(destination));
3344  }
3345  return;
3346  }
3347  }
3348  UNREACHABLE();
3349 }
3350 
3351 void CodeGenerator::AssembleSwap(InstructionOperand* source,
3352  InstructionOperand* destination) {
3353  ArmOperandConverter g(this, nullptr);
3354  switch (MoveType::InferSwap(source, destination)) {
3355  case MoveType::kRegisterToRegister:
3356  if (source->IsRegister()) {
3357  __ Swap(g.ToRegister(source), g.ToRegister(destination));
3358  } else if (source->IsFloatRegister()) {
3359  DCHECK(destination->IsFloatRegister());
3360  // GapResolver may give us reg codes that don't map to actual
3361  // s-registers. Generate code to work around those cases.
3362  UseScratchRegisterScope temps(tasm());
3363  LowDwVfpRegister temp = temps.AcquireLowD();
3364  int src_code = LocationOperand::cast(source)->register_code();
3365  int dst_code = LocationOperand::cast(destination)->register_code();
3366  __ VmovExtended(temp.low().code(), src_code);
3367  __ VmovExtended(src_code, dst_code);
3368  __ VmovExtended(dst_code, temp.low().code());
3369  } else if (source->IsDoubleRegister()) {
3370  __ Swap(g.ToDoubleRegister(source), g.ToDoubleRegister(destination));
3371  } else {
3372  __ Swap(g.ToSimd128Register(source), g.ToSimd128Register(destination));
3373  }
3374  return;
3375  case MoveType::kRegisterToStack: {
3376  MemOperand dst = g.ToMemOperand(destination);
3377  if (source->IsRegister()) {
3378  Register src = g.ToRegister(source);
3379  UseScratchRegisterScope temps(tasm());
3380  SwVfpRegister temp = temps.AcquireS();
3381  __ vmov(temp, src);
3382  __ ldr(src, dst);
3383  __ vstr(temp, dst);
3384  } else if (source->IsFloatRegister()) {
3385  int src_code = LocationOperand::cast(source)->register_code();
3386  UseScratchRegisterScope temps(tasm());
3387  LowDwVfpRegister temp = temps.AcquireLowD();
3388  __ VmovExtended(temp.low().code(), src_code);
3389  __ VmovExtended(src_code, dst);
3390  __ vstr(temp.low(), dst);
3391  } else if (source->IsDoubleRegister()) {
3392  UseScratchRegisterScope temps(tasm());
3393  DwVfpRegister temp = temps.AcquireD();
3394  DwVfpRegister src = g.ToDoubleRegister(source);
3395  __ Move(temp, src);
3396  __ vldr(src, dst);
3397  __ vstr(temp, dst);
3398  } else {
3399  QwNeonRegister src = g.ToSimd128Register(source);
3400  UseScratchRegisterScope temps(tasm());
3401  Register temp = temps.Acquire();
3402  QwNeonRegister temp_q = temps.AcquireQ();
3403  __ Move(temp_q, src);
3404  __ add(temp, dst.rn(), Operand(dst.offset()));
3405  __ vld1(Neon8, NeonListOperand(src.low(), 2), NeonMemOperand(temp));
3406  __ vst1(Neon8, NeonListOperand(temp_q.low(), 2), NeonMemOperand(temp));
3407  }
3408  return;
3409  }
3410  case MoveType::kStackToStack: {
3411  MemOperand src = g.ToMemOperand(source);
3412  MemOperand dst = g.ToMemOperand(destination);
3413  if (source->IsStackSlot() || source->IsFloatStackSlot()) {
3414  UseScratchRegisterScope temps(tasm());
3415  SwVfpRegister temp_0 = temps.AcquireS();
3416  SwVfpRegister temp_1 = temps.AcquireS();
3417  __ vldr(temp_0, dst);
3418  __ vldr(temp_1, src);
3419  __ vstr(temp_0, src);
3420  __ vstr(temp_1, dst);
3421  } else if (source->IsDoubleStackSlot()) {
3422  UseScratchRegisterScope temps(tasm());
3423  LowDwVfpRegister temp = temps.AcquireLowD();
3424  if (temps.CanAcquireD()) {
3425  DwVfpRegister temp_0 = temp;
3426  DwVfpRegister temp_1 = temps.AcquireD();
3427  __ vldr(temp_0, dst);
3428  __ vldr(temp_1, src);
3429  __ vstr(temp_0, src);
3430  __ vstr(temp_1, dst);
3431  } else {
3432  // We only have a single D register available. However, we can split
3433  // it into 2 S registers and swap the slots 32 bits at a time.
3434  MemOperand src0 = src;
3435  MemOperand dst0 = dst;
3436  MemOperand src1(src.rn(), src.offset() + kFloatSize);
3437  MemOperand dst1(dst.rn(), dst.offset() + kFloatSize);
3438  SwVfpRegister temp_0 = temp.low();
3439  SwVfpRegister temp_1 = temp.high();
3440  __ vldr(temp_0, dst0);
3441  __ vldr(temp_1, src0);
3442  __ vstr(temp_0, src0);
3443  __ vstr(temp_1, dst0);
3444  __ vldr(temp_0, dst1);
3445  __ vldr(temp_1, src1);
3446  __ vstr(temp_0, src1);
3447  __ vstr(temp_1, dst1);
3448  }
3449  } else {
3450  DCHECK(source->IsSimd128StackSlot());
3451  MemOperand src0 = src;
3452  MemOperand dst0 = dst;
3453  MemOperand src1(src.rn(), src.offset() + kDoubleSize);
3454  MemOperand dst1(dst.rn(), dst.offset() + kDoubleSize);
3455  UseScratchRegisterScope temps(tasm());
3456  DwVfpRegister temp_0 = temps.AcquireD();
3457  DwVfpRegister temp_1 = temps.AcquireD();
3458  __ vldr(temp_0, dst0);
3459  __ vldr(temp_1, src0);
3460  __ vstr(temp_0, src0);
3461  __ vstr(temp_1, dst0);
3462  __ vldr(temp_0, dst1);
3463  __ vldr(temp_1, src1);
3464  __ vstr(temp_0, src1);
3465  __ vstr(temp_1, dst1);
3466  }
3467  return;
3468  }
3469  default:
3470  UNREACHABLE();
3471  break;
3472  }
3473 }
3474 
3475 void CodeGenerator::AssembleJumpTable(Label** targets, size_t target_count) {
3476  // On 32-bit ARM we emit the jump tables inline.
3477  UNREACHABLE();
3478 }
3479 
3480 #undef __
3481 
3482 } // namespace compiler
3483 } // namespace internal
3484 } // namespace v8
Definition: libplatform.h:13