V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
assembler-x64.h
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2012 the V8 project authors. All rights reserved.
34 
35 // A lightweight X64 Assembler.
36 
37 #ifndef V8_X64_ASSEMBLER_X64_H_
38 #define V8_X64_ASSEMBLER_X64_H_
39 
40 #include <deque>
41 #include <map>
42 #include <vector>
43 
44 #include "src/assembler.h"
45 #include "src/label.h"
46 #include "src/objects/smi.h"
47 #include "src/x64/constants-x64.h"
48 #include "src/x64/sse-instr.h"
49 
50 namespace v8 {
51 namespace internal {
52 
53 // Utility functions
54 
55 #define GENERAL_REGISTERS(V) \
56  V(rax) \
57  V(rcx) \
58  V(rdx) \
59  V(rbx) \
60  V(rsp) \
61  V(rbp) \
62  V(rsi) \
63  V(rdi) \
64  V(r8) \
65  V(r9) \
66  V(r10) \
67  V(r11) \
68  V(r12) \
69  V(r13) \
70  V(r14) \
71  V(r15)
72 
73 #define ALLOCATABLE_GENERAL_REGISTERS(V) \
74  V(rax) \
75  V(rbx) \
76  V(rdx) \
77  V(rcx) \
78  V(rsi) \
79  V(rdi) \
80  V(r8) \
81  V(r9) \
82  V(r11) \
83  V(r12) \
84  V(r14) \
85  V(r15)
86 
87 enum RegisterCode {
88 #define REGISTER_CODE(R) kRegCode_##R,
89  GENERAL_REGISTERS(REGISTER_CODE)
90 #undef REGISTER_CODE
91  kRegAfterLast
92 };
93 
94 class Register : public RegisterBase<Register, kRegAfterLast> {
95  public:
96  bool is_byte_register() const { return reg_code_ <= 3; }
97  // Return the high bit of the register code as a 0 or 1. Used often
98  // when constructing the REX prefix byte.
99  int high_bit() const { return reg_code_ >> 3; }
100  // Return the 3 low bits of the register code. Used when encoding registers
101  // in modR/M, SIB, and opcode bytes.
102  int low_bits() const { return reg_code_ & 0x7; }
103 
104  private:
105  friend class RegisterBase<Register, kRegAfterLast>;
106  explicit constexpr Register(int code) : RegisterBase(code) {}
107 };
108 
109 ASSERT_TRIVIALLY_COPYABLE(Register);
110 static_assert(sizeof(Register) == sizeof(int),
111  "Register can efficiently be passed by value");
112 
113 #define DECLARE_REGISTER(R) \
114  constexpr Register R = Register::from_code<kRegCode_##R>();
115 GENERAL_REGISTERS(DECLARE_REGISTER)
116 #undef DECLARE_REGISTER
117 constexpr Register no_reg = Register::no_reg();
118 
119 constexpr int kNumRegs = 16;
120 
121 constexpr RegList kJSCallerSaved =
122  Register::ListOf<rax, rcx, rdx,
123  rbx, // used as a caller-saved register in JavaScript code
124  rdi // callee function
125  >();
126 
127 constexpr int kNumJSCallerSaved = 5;
128 
129 // Number of registers for which space is reserved in safepoints.
130 constexpr int kNumSafepointRegisters = 16;
131 
132 #ifdef _WIN64
133  // Windows calling convention
134 constexpr Register arg_reg_1 = rcx;
135 constexpr Register arg_reg_2 = rdx;
136 constexpr Register arg_reg_3 = r8;
137 constexpr Register arg_reg_4 = r9;
138 #else
139  // AMD64 calling convention
140 constexpr Register arg_reg_1 = rdi;
141 constexpr Register arg_reg_2 = rsi;
142 constexpr Register arg_reg_3 = rdx;
143 constexpr Register arg_reg_4 = rcx;
144 #endif // _WIN64
145 
146 
147 #define DOUBLE_REGISTERS(V) \
148  V(xmm0) \
149  V(xmm1) \
150  V(xmm2) \
151  V(xmm3) \
152  V(xmm4) \
153  V(xmm5) \
154  V(xmm6) \
155  V(xmm7) \
156  V(xmm8) \
157  V(xmm9) \
158  V(xmm10) \
159  V(xmm11) \
160  V(xmm12) \
161  V(xmm13) \
162  V(xmm14) \
163  V(xmm15)
164 
165 #define FLOAT_REGISTERS DOUBLE_REGISTERS
166 #define SIMD128_REGISTERS DOUBLE_REGISTERS
167 
168 #define ALLOCATABLE_DOUBLE_REGISTERS(V) \
169  V(xmm0) \
170  V(xmm1) \
171  V(xmm2) \
172  V(xmm3) \
173  V(xmm4) \
174  V(xmm5) \
175  V(xmm6) \
176  V(xmm7) \
177  V(xmm8) \
178  V(xmm9) \
179  V(xmm10) \
180  V(xmm11) \
181  V(xmm12) \
182  V(xmm13) \
183  V(xmm14)
184 
185 constexpr bool kPadArguments = false;
186 constexpr bool kSimpleFPAliasing = true;
187 constexpr bool kSimdMaskRegisters = false;
188 
189 enum DoubleRegisterCode {
190 #define REGISTER_CODE(R) kDoubleCode_##R,
191  DOUBLE_REGISTERS(REGISTER_CODE)
192 #undef REGISTER_CODE
193  kDoubleAfterLast
194 };
195 
196 class XMMRegister : public RegisterBase<XMMRegister, kDoubleAfterLast> {
197  public:
198  // Return the high bit of the register code as a 0 or 1. Used often
199  // when constructing the REX prefix byte.
200  int high_bit() const { return reg_code_ >> 3; }
201  // Return the 3 low bits of the register code. Used when encoding registers
202  // in modR/M, SIB, and opcode bytes.
203  int low_bits() const { return reg_code_ & 0x7; }
204 
205  private:
206  friend class RegisterBase<XMMRegister, kDoubleAfterLast>;
207  explicit constexpr XMMRegister(int code) : RegisterBase(code) {}
208 };
209 
210 ASSERT_TRIVIALLY_COPYABLE(XMMRegister);
211 static_assert(sizeof(XMMRegister) == sizeof(int),
212  "XMMRegister can efficiently be passed by value");
213 
214 typedef XMMRegister FloatRegister;
215 
216 typedef XMMRegister DoubleRegister;
217 
218 typedef XMMRegister Simd128Register;
219 
220 #define DECLARE_REGISTER(R) \
221  constexpr DoubleRegister R = DoubleRegister::from_code<kDoubleCode_##R>();
222 DOUBLE_REGISTERS(DECLARE_REGISTER)
223 #undef DECLARE_REGISTER
224 constexpr DoubleRegister no_dreg = DoubleRegister::no_reg();
225 
226 enum Condition {
227  // any value < 0 is considered no_condition
228  no_condition = -1,
229 
230  overflow = 0,
231  no_overflow = 1,
232  below = 2,
233  above_equal = 3,
234  equal = 4,
235  not_equal = 5,
236  below_equal = 6,
237  above = 7,
238  negative = 8,
239  positive = 9,
240  parity_even = 10,
241  parity_odd = 11,
242  less = 12,
243  greater_equal = 13,
244  less_equal = 14,
245  greater = 15,
246 
247  // Fake conditions that are handled by the
248  // opcodes using them.
249  always = 16,
250  never = 17,
251  // aliases
252  carry = below,
253  not_carry = above_equal,
254  zero = equal,
255  not_zero = not_equal,
256  sign = negative,
257  not_sign = positive,
258  last_condition = greater
259 };
260 
261 
262 // Returns the equivalent of !cc.
263 // Negation of the default no_condition (-1) results in a non-default
264 // no_condition value (-2). As long as tests for no_condition check
265 // for condition < 0, this will work as expected.
266 inline Condition NegateCondition(Condition cc) {
267  return static_cast<Condition>(cc ^ 1);
268 }
269 
270 
271 enum RoundingMode {
272  kRoundToNearest = 0x0,
273  kRoundDown = 0x1,
274  kRoundUp = 0x2,
275  kRoundToZero = 0x3
276 };
277 
278 
279 // -----------------------------------------------------------------------------
280 // Machine instruction Immediates
281 
282 class Immediate {
283  public:
284  explicit constexpr Immediate(int32_t value) : value_(value) {}
285  explicit constexpr Immediate(int32_t value, RelocInfo::Mode rmode)
286  : value_(value), rmode_(rmode) {}
287  explicit Immediate(Smi value)
288  : value_(static_cast<int32_t>(static_cast<intptr_t>(value.ptr()))) {
289  DCHECK(SmiValuesAre31Bits()); // Only available for 31-bit SMI.
290  }
291 
292  private:
293  const int32_t value_;
294  const RelocInfo::Mode rmode_ = RelocInfo::NONE;
295 
296  friend class Assembler;
297 };
298 ASSERT_TRIVIALLY_COPYABLE(Immediate);
299 static_assert(sizeof(Immediate) <= kPointerSize,
300  "Immediate must be small enough to pass it by value");
301 
302 // -----------------------------------------------------------------------------
303 // Machine instruction Operands
304 
305 enum ScaleFactor : int8_t {
306  times_1 = 0,
307  times_2 = 1,
308  times_4 = 2,
309  times_8 = 3,
310  times_int_size = times_4,
311  times_pointer_size = (kPointerSize == 8) ? times_8 : times_4
312 };
313 
314 class Operand {
315  public:
316  struct Data {
317  byte rex = 0;
318  byte buf[9];
319  byte len = 1; // number of bytes of buf_ in use.
320  int8_t addend; // for rip + offset + addend.
321  };
322 
323  // [base + disp/r]
324  Operand(Register base, int32_t disp);
325 
326  // [base + index*scale + disp/r]
327  Operand(Register base,
328  Register index,
329  ScaleFactor scale,
330  int32_t disp);
331 
332  // [index*scale + disp/r]
333  Operand(Register index,
334  ScaleFactor scale,
335  int32_t disp);
336 
337  // Offset from existing memory operand.
338  // Offset is added to existing displacement as 32-bit signed values and
339  // this must not overflow.
340  Operand(Operand base, int32_t offset);
341 
342  // [rip + disp/r]
343  explicit Operand(Label* label, int addend = 0);
344 
345  Operand(const Operand&) = default;
346 
347  // Checks whether either base or index register is the given register.
348  // Does not check the "reg" part of the Operand.
349  bool AddressUsesRegister(Register reg) const;
350 
351  // Queries related to the size of the generated instruction.
352  // Whether the generated instruction will have a REX prefix.
353  bool requires_rex() const { return data_.rex != 0; }
354  // Size of the ModR/M, SIB and displacement parts of the generated
355  // instruction.
356  int operand_size() const { return data_.len; }
357 
358  const Data& data() const { return data_; }
359 
360  private:
361  const Data data_;
362 };
363 ASSERT_TRIVIALLY_COPYABLE(Operand);
364 static_assert(sizeof(Operand) <= 2 * kPointerSize,
365  "Operand must be small enough to pass it by value");
366 
367 #define ASSEMBLER_INSTRUCTION_LIST(V) \
368  V(add) \
369  V(and) \
370  V(cmp) \
371  V(cmpxchg) \
372  V(dec) \
373  V(idiv) \
374  V(div) \
375  V(imul) \
376  V(inc) \
377  V(lea) \
378  V(mov) \
379  V(movzxb) \
380  V(movzxw) \
381  V(neg) \
382  V(not) \
383  V(or) \
384  V(repmovs) \
385  V(sbb) \
386  V(sub) \
387  V(test) \
388  V(xchg) \
389  V(xor)
390 
391 // Shift instructions on operands/registers with kPointerSize, kInt32Size and
392 // kInt64Size.
393 #define SHIFT_INSTRUCTION_LIST(V) \
394  V(rol, 0x0) \
395  V(ror, 0x1) \
396  V(rcl, 0x2) \
397  V(rcr, 0x3) \
398  V(shl, 0x4) \
399  V(shr, 0x5) \
400  V(sar, 0x7)
401 
402 // Partial Constant Pool
403 // Different from complete constant pool (like arm does), partial constant pool
404 // only takes effects for shareable constants in order to reduce code size.
405 // Partial constant pool does not emit constant pool entries at the end of each
406 // code object. Instead, it keeps the first shareable constant inlined in the
407 // instructions and uses rip-relative memory loadings for the same constants in
408 // subsequent instructions. These rip-relative memory loadings will target at
409 // the position of the first inlined constant. For example:
410 //
411 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
412 // …
413 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
414 // …
415 //
416 // turns into
417 //
418 // REX.W movq r10,0x7f9f75a32c20 ; 10 bytes
419 // …
420 // REX.W movq r10,[rip+0xffffff96] ; 7 bytes
421 // …
422 
423 class ConstPool {
424  public:
425  explicit ConstPool(Assembler* assm) : assm_(assm) {}
426  // Returns true when partial constant pool is valid for this entry.
427  bool TryRecordEntry(intptr_t data, RelocInfo::Mode mode);
428  bool IsEmpty() const { return entries_.empty(); }
429 
430  void PatchEntries();
431  // Discard any pending pool entries.
432  void Clear();
433 
434  private:
435  // Adds a shared entry to entries_. Returns true if this is not the first time
436  // we add this entry, false otherwise.
437  bool AddSharedEntry(uint64_t data, int offset);
438 
439  // Check if the instruction is a rip-relative move.
440  bool IsMoveRipRelative(byte* instr);
441 
442  Assembler* assm_;
443 
444  // Values, pc offsets of entries.
445  typedef std::multimap<uint64_t, int> EntryMap;
446  EntryMap entries_;
447 
448  // Number of bytes taken up by the displacement of rip-relative addressing.
449  static constexpr int kRipRelativeDispSize = 4; // 32-bit displacement.
450  // Distance between the address of the displacement in the rip-relative move
451  // instruction and the head address of the instruction.
452  static constexpr int kMoveRipRelativeDispOffset =
453  3; // REX Opcode ModRM Displacement
454  // Distance between the address of the imm64 in the 'movq reg, imm64'
455  // instruction and the head address of the instruction.
456  static constexpr int kMoveImm64Offset = 2; // REX Opcode imm64
457  // A mask for rip-relative move instruction.
458  static constexpr uint32_t kMoveRipRelativeMask = 0x00C7FFFB;
459  // The bits for a rip-relative move instruction after mask.
460  static constexpr uint32_t kMoveRipRelativeInstr = 0x00058B48;
461 };
462 
463 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
464  private:
465  // We check before assembling an instruction that there is sufficient
466  // space to write an instruction and its relocation information.
467  // The relocation writer's position must be kGap bytes above the end of
468  // the generated instructions. This leaves enough space for the
469  // longest possible x64 instruction, 15 bytes, and the longest possible
470  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
471  // (There is a 15 byte limit on x64 instruction length that rules out some
472  // otherwise valid instructions.)
473  // This allows for a single, fast space check per instruction.
474  static constexpr int kGap = 32;
475 
476  public:
477  // Create an assembler. Instructions and relocation information are emitted
478  // into a buffer, with the instructions starting from the beginning and the
479  // relocation information starting from the end of the buffer. See CodeDesc
480  // for a detailed comment on the layout (globals.h).
481  //
482  // If the provided buffer is nullptr, the assembler allocates and grows its
483  // own buffer, and buffer_size determines the initial buffer size. The buffer
484  // is owned by the assembler and deallocated upon destruction of the
485  // assembler.
486  //
487  // If the provided buffer is not nullptr, the assembler uses the provided
488  // buffer for code generation and assumes its size to be buffer_size. If the
489  // buffer is too small, a fatal error occurs. No deallocation of the buffer is
490  // done upon destruction of the assembler.
491  Assembler(const AssemblerOptions& options, void* buffer, int buffer_size);
492  ~Assembler() override = default;
493 
494  // GetCode emits any pending (non-emitted) code and fills the descriptor
495  // desc. GetCode() is idempotent; it returns the same result if no other
496  // Assembler functions are invoked in between GetCode() calls.
497  void GetCode(Isolate* isolate, CodeDesc* desc);
498 
499  // Read/Modify the code target in the relative branch/call instruction at pc.
500  // On the x64 architecture, we use relative jumps with a 32-bit displacement
501  // to jump to other Code objects in the Code space in the heap.
502  // Jumps to C functions are done indirectly through a 64-bit register holding
503  // the absolute address of the target.
504  // These functions convert between absolute Addresses of Code objects and
505  // the relative displacements stored in the code.
506  // The isolate argument is unused (and may be nullptr) when skipping flushing.
507  static inline Address target_address_at(Address pc, Address constant_pool);
508  static inline void set_target_address_at(
509  Address pc, Address constant_pool, Address target,
510  ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
511 
512  // Return the code target address at a call site from the return address
513  // of that call in the instruction stream.
514  static inline Address target_address_from_return_address(Address pc);
515 
516  // This sets the branch destination (which is in the instruction on x64).
517  // This is for calls and branches within generated code.
518  inline static void deserialization_set_special_target_at(
519  Address instruction_payload, Code code, Address target);
520 
521  // Get the size of the special target encoded at 'instruction_payload'.
522  inline static int deserialization_special_target_size(
523  Address instruction_payload);
524 
525  // This sets the internal reference at the pc.
526  inline static void deserialization_set_target_internal_reference_at(
527  Address pc, Address target,
528  RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
529 
530  inline Handle<Code> code_target_object_handle_at(Address pc);
531  inline Address runtime_entry_at(Address pc);
532 
533  // Number of bytes taken up by the branch target in the code.
534  static constexpr int kSpecialTargetSize = 4; // 32-bit displacement.
535  // Distance between the address of the code target in the call instruction
536  // and the return address pushed on the stack.
537  static constexpr int kCallTargetAddressOffset = 4; // 32-bit displacement.
538  // The length of call(kScratchRegister).
539  static constexpr int kCallScratchRegisterInstructionLength = 3;
540  // The length of movq(kScratchRegister, address).
541  static constexpr int kMoveAddressIntoScratchRegisterInstructionLength =
542  2 + kPointerSize;
543  // The length of movq(kScratchRegister, address) and call(kScratchRegister).
544  static constexpr int kCallSequenceLength =
545  kMoveAddressIntoScratchRegisterInstructionLength +
546  kCallScratchRegisterInstructionLength;
547 
548  // One byte opcode for test eax,0xXXXXXXXX.
549  static constexpr byte kTestEaxByte = 0xA9;
550  // One byte opcode for test al, 0xXX.
551  static constexpr byte kTestAlByte = 0xA8;
552  // One byte opcode for nop.
553  static constexpr byte kNopByte = 0x90;
554 
555  // One byte prefix for a short conditional jump.
556  static constexpr byte kJccShortPrefix = 0x70;
557  static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
558  static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
559  static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
560  static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
561 
562  // VEX prefix encodings.
563  enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
564  enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
565  enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
566  enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
567 
568  // ---------------------------------------------------------------------------
569  // Code generation
570  //
571  // Function names correspond one-to-one to x64 instruction mnemonics.
572  // Unless specified otherwise, instructions operate on 64-bit operands.
573  //
574  // If we need versions of an assembly instruction that operate on different
575  // width arguments, we add a single-letter suffix specifying the width.
576  // This is done for the following instructions: mov, cmp, inc, dec,
577  // add, sub, and test.
578  // There are no versions of these instructions without the suffix.
579  // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
580  // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
581  // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
582  // - Instructions on 64-bit (quadword) operands/registers use 'q'.
583  // - Instructions on operands/registers with pointer size use 'p'.
584 
585  STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);
586 
587 #define DECLARE_INSTRUCTION(instruction) \
588  template<class P1> \
589  void instruction##p(P1 p1) { \
590  emit_##instruction(p1, kPointerSize); \
591  } \
592  \
593  template<class P1> \
594  void instruction##l(P1 p1) { \
595  emit_##instruction(p1, kInt32Size); \
596  } \
597  \
598  template<class P1> \
599  void instruction##q(P1 p1) { \
600  emit_##instruction(p1, kInt64Size); \
601  } \
602  \
603  template<class P1, class P2> \
604  void instruction##p(P1 p1, P2 p2) { \
605  emit_##instruction(p1, p2, kPointerSize); \
606  } \
607  \
608  template<class P1, class P2> \
609  void instruction##l(P1 p1, P2 p2) { \
610  emit_##instruction(p1, p2, kInt32Size); \
611  } \
612  \
613  template<class P1, class P2> \
614  void instruction##q(P1 p1, P2 p2) { \
615  emit_##instruction(p1, p2, kInt64Size); \
616  } \
617  \
618  template<class P1, class P2, class P3> \
619  void instruction##p(P1 p1, P2 p2, P3 p3) { \
620  emit_##instruction(p1, p2, p3, kPointerSize); \
621  } \
622  \
623  template<class P1, class P2, class P3> \
624  void instruction##l(P1 p1, P2 p2, P3 p3) { \
625  emit_##instruction(p1, p2, p3, kInt32Size); \
626  } \
627  \
628  template<class P1, class P2, class P3> \
629  void instruction##q(P1 p1, P2 p2, P3 p3) { \
630  emit_##instruction(p1, p2, p3, kInt64Size); \
631  }
632  ASSEMBLER_INSTRUCTION_LIST(DECLARE_INSTRUCTION)
633 #undef DECLARE_INSTRUCTION
634 
635  // Insert the smallest number of nop instructions
636  // possible to align the pc offset to a multiple
637  // of m, where m must be a power of 2.
638  void Align(int m);
639  // Insert the smallest number of zero bytes possible to align the pc offset
640  // to a mulitple of m. m must be a power of 2 (>= 2).
641  void DataAlign(int m);
642  void Nop(int bytes = 1);
643  // Aligns code to something that's optimal for a jump target for the platform.
644  void CodeTargetAlign();
645 
646  // Stack
647  void pushfq();
648  void popfq();
649 
650  void pushq(Immediate value);
651  // Push a 32 bit integer, and guarantee that it is actually pushed as a
652  // 32 bit value, the normal push will optimize the 8 bit case.
653  void pushq_imm32(int32_t imm32);
654  void pushq(Register src);
655  void pushq(Operand src);
656 
657  void popq(Register dst);
658  void popq(Operand dst);
659 
660  void enter(Immediate size);
661  void leave();
662 
663  // Moves
664  void movb(Register dst, Operand src);
665  void movb(Register dst, Immediate imm);
666  void movb(Operand dst, Register src);
667  void movb(Operand dst, Immediate imm);
668 
669  // Move the low 16 bits of a 64-bit register value to a 16-bit
670  // memory location.
671  void movw(Register dst, Operand src);
672  void movw(Operand dst, Register src);
673  void movw(Operand dst, Immediate imm);
674 
675  // Move the offset of the label location relative to the current
676  // position (after the move) to the destination.
677  void movl(Operand dst, Label* src);
678 
679  // Loads a pointer into a register with a relocation mode.
680  void movp(Register dst, Address ptr, RelocInfo::Mode rmode);
681 
682  // Load a heap number into a register.
683  // The heap number will not be allocated and embedded into the code right
684  // away. Instead, we emit the load of a dummy object. Later, when calling
685  // Assembler::GetCode, the heap number will be allocated and the code will be
686  // patched by replacing the dummy with the actual object. The RelocInfo for
687  // the embedded object gets already recorded correctly when emitting the dummy
688  // move.
689  void movp_heap_number(Register dst, double value);
690 
691  void movp_string(Register dst, const StringConstantBase* str);
692 
693  // Loads a 64-bit immediate into a register.
694  void movq(Register dst, int64_t value,
695  RelocInfo::Mode rmode = RelocInfo::NONE);
696  void movq(Register dst, uint64_t value,
697  RelocInfo::Mode rmode = RelocInfo::NONE);
698 
699  void movsxbl(Register dst, Register src);
700  void movsxbl(Register dst, Operand src);
701  void movsxbq(Register dst, Register src);
702  void movsxbq(Register dst, Operand src);
703  void movsxwl(Register dst, Register src);
704  void movsxwl(Register dst, Operand src);
705  void movsxwq(Register dst, Register src);
706  void movsxwq(Register dst, Operand src);
707  void movsxlq(Register dst, Register src);
708  void movsxlq(Register dst, Operand src);
709 
710  // Repeated moves.
711 
712  void repmovsb();
713  void repmovsw();
714  void repmovsp() { emit_repmovs(kPointerSize); }
715  void repmovsl() { emit_repmovs(kInt32Size); }
716  void repmovsq() { emit_repmovs(kInt64Size); }
717 
718  // Instruction to load from an immediate 64-bit pointer into RAX.
719  void load_rax(Address value, RelocInfo::Mode rmode);
720  void load_rax(ExternalReference ext);
721 
722  // Conditional moves.
723  void cmovq(Condition cc, Register dst, Register src);
724  void cmovq(Condition cc, Register dst, Operand src);
725  void cmovl(Condition cc, Register dst, Register src);
726  void cmovl(Condition cc, Register dst, Operand src);
727 
728  void cmpb(Register dst, Immediate src) {
729  immediate_arithmetic_op_8(0x7, dst, src);
730  }
731 
732  void cmpb_al(Immediate src);
733 
734  void cmpb(Register dst, Register src) {
735  arithmetic_op_8(0x3A, dst, src);
736  }
737 
738  void cmpb(Register dst, Operand src) { arithmetic_op_8(0x3A, dst, src); }
739 
740  void cmpb(Operand dst, Register src) { arithmetic_op_8(0x38, src, dst); }
741 
742  void cmpb(Operand dst, Immediate src) {
743  immediate_arithmetic_op_8(0x7, dst, src);
744  }
745 
746  void cmpw(Operand dst, Immediate src) {
747  immediate_arithmetic_op_16(0x7, dst, src);
748  }
749 
750  void cmpw(Register dst, Immediate src) {
751  immediate_arithmetic_op_16(0x7, dst, src);
752  }
753 
754  void cmpw(Register dst, Operand src) { arithmetic_op_16(0x3B, dst, src); }
755 
756  void cmpw(Register dst, Register src) {
757  arithmetic_op_16(0x3B, dst, src);
758  }
759 
760  void cmpw(Operand dst, Register src) { arithmetic_op_16(0x39, src, dst); }
761 
762  void testb(Register reg, Operand op) { testb(op, reg); }
763 
764  void testw(Register reg, Operand op) { testw(op, reg); }
765 
766  void andb(Register dst, Immediate src) {
767  immediate_arithmetic_op_8(0x4, dst, src);
768  }
769 
770  void decb(Register dst);
771  void decb(Operand dst);
772 
773  // Lock prefix.
774  void lock();
775 
776  void xchgb(Register reg, Operand op);
777  void xchgw(Register reg, Operand op);
778 
779  void cmpxchgb(Operand dst, Register src);
780  void cmpxchgw(Operand dst, Register src);
781 
782  // Sign-extends rax into rdx:rax.
783  void cqo();
784  // Sign-extends eax into edx:eax.
785  void cdq();
786 
787  // Multiply eax by src, put the result in edx:eax.
788  void mull(Register src);
789  void mull(Operand src);
790  // Multiply rax by src, put the result in rdx:rax.
791  void mulq(Register src);
792 
793 #define DECLARE_SHIFT_INSTRUCTION(instruction, subcode) \
794  void instruction##p(Register dst, Immediate imm8) { \
795  shift(dst, imm8, subcode, kPointerSize); \
796  } \
797  \
798  void instruction##l(Register dst, Immediate imm8) { \
799  shift(dst, imm8, subcode, kInt32Size); \
800  } \
801  \
802  void instruction##q(Register dst, Immediate imm8) { \
803  shift(dst, imm8, subcode, kInt64Size); \
804  } \
805  \
806  void instruction##p(Operand dst, Immediate imm8) { \
807  shift(dst, imm8, subcode, kPointerSize); \
808  } \
809  \
810  void instruction##l(Operand dst, Immediate imm8) { \
811  shift(dst, imm8, subcode, kInt32Size); \
812  } \
813  \
814  void instruction##q(Operand dst, Immediate imm8) { \
815  shift(dst, imm8, subcode, kInt64Size); \
816  } \
817  \
818  void instruction##p_cl(Register dst) { shift(dst, subcode, kPointerSize); } \
819  \
820  void instruction##l_cl(Register dst) { shift(dst, subcode, kInt32Size); } \
821  \
822  void instruction##q_cl(Register dst) { shift(dst, subcode, kInt64Size); } \
823  \
824  void instruction##p_cl(Operand dst) { shift(dst, subcode, kPointerSize); } \
825  \
826  void instruction##l_cl(Operand dst) { shift(dst, subcode, kInt32Size); } \
827  \
828  void instruction##q_cl(Operand dst) { shift(dst, subcode, kInt64Size); }
829  SHIFT_INSTRUCTION_LIST(DECLARE_SHIFT_INSTRUCTION)
830 #undef DECLARE_SHIFT_INSTRUCTION
831 
832  // Shifts dst:src left by cl bits, affecting only dst.
833  void shld(Register dst, Register src);
834 
835  // Shifts src:dst right by cl bits, affecting only dst.
836  void shrd(Register dst, Register src);
837 
838  void store_rax(Address dst, RelocInfo::Mode mode);
839  void store_rax(ExternalReference ref);
840 
841  void subb(Register dst, Immediate src) {
842  immediate_arithmetic_op_8(0x5, dst, src);
843  }
844 
845  void sub_sp_32(uint32_t imm);
846 
847  void testb(Register dst, Register src);
848  void testb(Register reg, Immediate mask);
849  void testb(Operand op, Immediate mask);
850  void testb(Operand op, Register reg);
851 
852  void testw(Register dst, Register src);
853  void testw(Register reg, Immediate mask);
854  void testw(Operand op, Immediate mask);
855  void testw(Operand op, Register reg);
856 
857  // Bit operations.
858  void bswapl(Register dst);
859  void bswapq(Register dst);
860  void btq(Operand dst, Register src);
861  void btsq(Operand dst, Register src);
862  void btsq(Register dst, Immediate imm8);
863  void btrq(Register dst, Immediate imm8);
864  void bsrq(Register dst, Register src);
865  void bsrq(Register dst, Operand src);
866  void bsrl(Register dst, Register src);
867  void bsrl(Register dst, Operand src);
868  void bsfq(Register dst, Register src);
869  void bsfq(Register dst, Operand src);
870  void bsfl(Register dst, Register src);
871  void bsfl(Register dst, Operand src);
872 
873  // Miscellaneous
874  void clc();
875  void cld();
876  void cpuid();
877  void hlt();
878  void int3();
879  void nop();
880  void ret(int imm16);
881  void ud2();
882  void setcc(Condition cc, Register reg);
883 
884  void pshufw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
885  void pshufw(XMMRegister dst, Operand src, uint8_t shuffle);
886  void pblendw(XMMRegister dst, Operand src, uint8_t mask);
887  void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask);
888  void palignr(XMMRegister dst, Operand src, uint8_t mask);
889  void palignr(XMMRegister dst, XMMRegister src, uint8_t mask);
890 
891  // Label operations & relative jumps (PPUM Appendix D)
892  //
893  // Takes a branch opcode (cc) and a label (L) and generates
894  // either a backward branch or a forward branch and links it
895  // to the label fixup chain. Usage:
896  //
897  // Label L; // unbound label
898  // j(cc, &L); // forward branch to unbound label
899  // bind(&L); // bind label to the current pc
900  // j(cc, &L); // backward branch to bound label
901  // bind(&L); // illegal: a label may be bound only once
902  //
903  // Note: The same Label can be used for forward and backward branches
904  // but it may be bound only once.
905 
906  void bind(Label* L); // binds an unbound label L to the current code position
907 
908  // Calls
909  // Call near relative 32-bit displacement, relative to next instruction.
910  void call(Label* L);
911  void call(Address entry, RelocInfo::Mode rmode);
912  void near_call(Address entry, RelocInfo::Mode rmode);
913  void near_jmp(Address entry, RelocInfo::Mode rmode);
914  void call(CodeStub* stub);
915  void call(Handle<Code> target,
916  RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
917 
918  // Calls directly to the given address using a relative offset.
919  // Should only ever be used in Code objects for calls within the
920  // same Code object. Should not be used when generating new code (use labels),
921  // but only when patching existing code.
922  void call(Address target);
923 
924  // Call near absolute indirect, address in register
925  void call(Register adr);
926 
927  // Jumps
928  // Jump short or near relative.
929  // Use a 32-bit signed displacement.
930  // Unconditional jump to L
931  void jmp(Label* L, Label::Distance distance = Label::kFar);
932  void jmp(Handle<Code> target, RelocInfo::Mode rmode);
933 
934  // Jump near absolute indirect (r64)
935  void jmp(Register adr);
936  void jmp(Operand src);
937 
938  // Conditional jumps
939  void j(Condition cc,
940  Label* L,
941  Label::Distance distance = Label::kFar);
942  void j(Condition cc, Address entry, RelocInfo::Mode rmode);
943  void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
944 
945  // Floating-point operations
946  void fld(int i);
947 
948  void fld1();
949  void fldz();
950  void fldpi();
951  void fldln2();
952 
953  void fld_s(Operand adr);
954  void fld_d(Operand adr);
955 
956  void fstp_s(Operand adr);
957  void fstp_d(Operand adr);
958  void fstp(int index);
959 
960  void fild_s(Operand adr);
961  void fild_d(Operand adr);
962 
963  void fist_s(Operand adr);
964 
965  void fistp_s(Operand adr);
966  void fistp_d(Operand adr);
967 
968  void fisttp_s(Operand adr);
969  void fisttp_d(Operand adr);
970 
971  void fabs();
972  void fchs();
973 
974  void fadd(int i);
975  void fsub(int i);
976  void fmul(int i);
977  void fdiv(int i);
978 
979  void fisub_s(Operand adr);
980 
981  void faddp(int i = 1);
982  void fsubp(int i = 1);
983  void fsubrp(int i = 1);
984  void fmulp(int i = 1);
985  void fdivp(int i = 1);
986  void fprem();
987  void fprem1();
988 
989  void fxch(int i = 1);
990  void fincstp();
991  void ffree(int i = 0);
992 
993  void ftst();
994  void fucomp(int i);
995  void fucompp();
996  void fucomi(int i);
997  void fucomip();
998 
999  void fcompp();
1000  void fnstsw_ax();
1001  void fwait();
1002  void fnclex();
1003 
1004  void fsin();
1005  void fcos();
1006  void fptan();
1007  void fyl2x();
1008  void f2xm1();
1009  void fscale();
1010  void fninit();
1011 
1012  void frndint();
1013 
1014  void sahf();
1015 
1016  // SSE instructions
1017  void addss(XMMRegister dst, XMMRegister src);
1018  void addss(XMMRegister dst, Operand src);
1019  void subss(XMMRegister dst, XMMRegister src);
1020  void subss(XMMRegister dst, Operand src);
1021  void mulss(XMMRegister dst, XMMRegister src);
1022  void mulss(XMMRegister dst, Operand src);
1023  void divss(XMMRegister dst, XMMRegister src);
1024  void divss(XMMRegister dst, Operand src);
1025 
1026  void maxss(XMMRegister dst, XMMRegister src);
1027  void maxss(XMMRegister dst, Operand src);
1028  void minss(XMMRegister dst, XMMRegister src);
1029  void minss(XMMRegister dst, Operand src);
1030 
1031  void sqrtss(XMMRegister dst, XMMRegister src);
1032  void sqrtss(XMMRegister dst, Operand src);
1033 
1034  void ucomiss(XMMRegister dst, XMMRegister src);
1035  void ucomiss(XMMRegister dst, Operand src);
1036  void movaps(XMMRegister dst, XMMRegister src);
1037 
1038  // Don't use this unless it's important to keep the
1039  // top half of the destination register unchanged.
1040  // Use movaps when moving float values and movd for integer
1041  // values in xmm registers.
1042  void movss(XMMRegister dst, XMMRegister src);
1043 
1044  void movss(XMMRegister dst, Operand src);
1045  void movss(Operand dst, XMMRegister src);
1046  void shufps(XMMRegister dst, XMMRegister src, byte imm8);
1047 
1048  void cvttss2si(Register dst, Operand src);
1049  void cvttss2si(Register dst, XMMRegister src);
1050  void cvtlsi2ss(XMMRegister dst, Operand src);
1051  void cvtlsi2ss(XMMRegister dst, Register src);
1052 
1053  void andps(XMMRegister dst, XMMRegister src);
1054  void andps(XMMRegister dst, Operand src);
1055  void orps(XMMRegister dst, XMMRegister src);
1056  void orps(XMMRegister dst, Operand src);
1057  void xorps(XMMRegister dst, XMMRegister src);
1058  void xorps(XMMRegister dst, Operand src);
1059 
1060  void addps(XMMRegister dst, XMMRegister src);
1061  void addps(XMMRegister dst, Operand src);
1062  void subps(XMMRegister dst, XMMRegister src);
1063  void subps(XMMRegister dst, Operand src);
1064  void mulps(XMMRegister dst, XMMRegister src);
1065  void mulps(XMMRegister dst, Operand src);
1066  void divps(XMMRegister dst, XMMRegister src);
1067  void divps(XMMRegister dst, Operand src);
1068 
1069  void movmskps(Register dst, XMMRegister src);
1070 
1071  void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
1072  SIMDPrefix pp, LeadingOpcode m, VexW w);
1073  void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1074  SIMDPrefix pp, LeadingOpcode m, VexW w);
1075 
1076  // SSE2 instructions
1077  void sse2_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape,
1078  byte opcode);
1079  void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
1080  byte opcode);
1081 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1082  void instruction(XMMRegister dst, XMMRegister src) { \
1083  sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1084  } \
1085  void instruction(XMMRegister dst, Operand src) { \
1086  sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1087  }
1088 
1089  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1090 #undef DECLARE_SSE2_INSTRUCTION
1091 
1092 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1093  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1094  vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1095  } \
1096  void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1097  vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1098  }
1099 
1100  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1101 #undef DECLARE_SSE2_AVX_INSTRUCTION
1102 
1103  // SSE3
1104  void lddqu(XMMRegister dst, Operand src);
1105 
1106  // SSSE3
1107  void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1108  byte escape2, byte opcode);
1109  void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1110  byte escape2, byte opcode);
1111 
1112 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1113  opcode) \
1114  void instruction(XMMRegister dst, XMMRegister src) { \
1115  ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1116  } \
1117  void instruction(XMMRegister dst, Operand src) { \
1118  ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1119  }
1120 
1121  SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1122 #undef DECLARE_SSSE3_INSTRUCTION
1123 
1124  // SSE4
1125  void sse4_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
1126  byte escape2, byte opcode);
1127  void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1128  byte escape2, byte opcode);
1129 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1130  opcode) \
1131  void instruction(XMMRegister dst, XMMRegister src) { \
1132  sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1133  } \
1134  void instruction(XMMRegister dst, Operand src) { \
1135  sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1136  }
1137 
1138  SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1139 #undef DECLARE_SSE4_INSTRUCTION
1140 
1141 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1142  opcode) \
1143  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1144  vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1145  } \
1146  void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1147  vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1148  }
1149 
1150  SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1151  SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1152 #undef DECLARE_SSE34_AVX_INSTRUCTION
1153 
1154  void movd(XMMRegister dst, Register src);
1155  void movd(XMMRegister dst, Operand src);
1156  void movd(Register dst, XMMRegister src);
1157  void movq(XMMRegister dst, Register src);
1158  void movq(Register dst, XMMRegister src);
1159  void movq(XMMRegister dst, XMMRegister src);
1160 
1161  // Don't use this unless it's important to keep the
1162  // top half of the destination register unchanged.
1163  // Use movapd when moving double values and movq for integer
1164  // values in xmm registers.
1165  void movsd(XMMRegister dst, XMMRegister src);
1166 
1167  void movsd(Operand dst, XMMRegister src);
1168  void movsd(XMMRegister dst, Operand src);
1169 
1170  void movdqa(Operand dst, XMMRegister src);
1171  void movdqa(XMMRegister dst, Operand src);
1172 
1173  void movdqu(Operand dst, XMMRegister src);
1174  void movdqu(XMMRegister dst, Operand src);
1175 
1176  void movapd(XMMRegister dst, XMMRegister src);
1177  void movupd(XMMRegister dst, Operand src);
1178  void movupd(Operand dst, XMMRegister src);
1179 
1180  void psllq(XMMRegister reg, byte imm8);
1181  void psrlq(XMMRegister reg, byte imm8);
1182  void psllw(XMMRegister reg, byte imm8);
1183  void pslld(XMMRegister reg, byte imm8);
1184  void psrlw(XMMRegister reg, byte imm8);
1185  void psrld(XMMRegister reg, byte imm8);
1186  void psraw(XMMRegister reg, byte imm8);
1187  void psrad(XMMRegister reg, byte imm8);
1188 
1189  void cvttsd2si(Register dst, Operand src);
1190  void cvttsd2si(Register dst, XMMRegister src);
1191  void cvttss2siq(Register dst, XMMRegister src);
1192  void cvttss2siq(Register dst, Operand src);
1193  void cvttsd2siq(Register dst, XMMRegister src);
1194  void cvttsd2siq(Register dst, Operand src);
1195  void cvttps2dq(XMMRegister dst, Operand src);
1196  void cvttps2dq(XMMRegister dst, XMMRegister src);
1197 
1198  void cvtlsi2sd(XMMRegister dst, Operand src);
1199  void cvtlsi2sd(XMMRegister dst, Register src);
1200 
1201  void cvtqsi2ss(XMMRegister dst, Operand src);
1202  void cvtqsi2ss(XMMRegister dst, Register src);
1203 
1204  void cvtqsi2sd(XMMRegister dst, Operand src);
1205  void cvtqsi2sd(XMMRegister dst, Register src);
1206 
1207 
1208  void cvtss2sd(XMMRegister dst, XMMRegister src);
1209  void cvtss2sd(XMMRegister dst, Operand src);
1210  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1211  void cvtsd2ss(XMMRegister dst, Operand src);
1212 
1213  void cvtsd2si(Register dst, XMMRegister src);
1214  void cvtsd2siq(Register dst, XMMRegister src);
1215 
1216  void addsd(XMMRegister dst, XMMRegister src);
1217  void addsd(XMMRegister dst, Operand src);
1218  void subsd(XMMRegister dst, XMMRegister src);
1219  void subsd(XMMRegister dst, Operand src);
1220  void mulsd(XMMRegister dst, XMMRegister src);
1221  void mulsd(XMMRegister dst, Operand src);
1222  void divsd(XMMRegister dst, XMMRegister src);
1223  void divsd(XMMRegister dst, Operand src);
1224 
1225  void maxsd(XMMRegister dst, XMMRegister src);
1226  void maxsd(XMMRegister dst, Operand src);
1227  void minsd(XMMRegister dst, XMMRegister src);
1228  void minsd(XMMRegister dst, Operand src);
1229 
1230  void andpd(XMMRegister dst, XMMRegister src);
1231  void andpd(XMMRegister dst, Operand src);
1232  void orpd(XMMRegister dst, XMMRegister src);
1233  void orpd(XMMRegister dst, Operand src);
1234  void xorpd(XMMRegister dst, XMMRegister src);
1235  void xorpd(XMMRegister dst, Operand src);
1236  void sqrtsd(XMMRegister dst, XMMRegister src);
1237  void sqrtsd(XMMRegister dst, Operand src);
1238 
1239  void haddps(XMMRegister dst, XMMRegister src);
1240  void haddps(XMMRegister dst, Operand src);
1241 
1242  void ucomisd(XMMRegister dst, XMMRegister src);
1243  void ucomisd(XMMRegister dst, Operand src);
1244  void cmpltsd(XMMRegister dst, XMMRegister src);
1245 
1246  void movmskpd(Register dst, XMMRegister src);
1247 
1248  // SSE 4.1 instruction
1249  void insertps(XMMRegister dst, XMMRegister src, byte imm8);
1250  void extractps(Register dst, XMMRegister src, byte imm8);
1251  void pextrb(Register dst, XMMRegister src, int8_t imm8);
1252  void pextrb(Operand dst, XMMRegister src, int8_t imm8);
1253  void pextrw(Register dst, XMMRegister src, int8_t imm8);
1254  void pextrw(Operand dst, XMMRegister src, int8_t imm8);
1255  void pextrd(Register dst, XMMRegister src, int8_t imm8);
1256  void pextrd(Operand dst, XMMRegister src, int8_t imm8);
1257  void pinsrb(XMMRegister dst, Register src, int8_t imm8);
1258  void pinsrb(XMMRegister dst, Operand src, int8_t imm8);
1259  void pinsrw(XMMRegister dst, Register src, int8_t imm8);
1260  void pinsrw(XMMRegister dst, Operand src, int8_t imm8);
1261  void pinsrd(XMMRegister dst, Register src, int8_t imm8);
1262  void pinsrd(XMMRegister dst, Operand src, int8_t imm8);
1263 
1264  void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1265  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1266 
1267  void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
1268  void cmpps(XMMRegister dst, Operand src, int8_t cmp);
1269  void cmppd(XMMRegister dst, XMMRegister src, int8_t cmp);
1270  void cmppd(XMMRegister dst, Operand src, int8_t cmp);
1271 
1272 #define SSE_CMP_P(instr, imm8) \
1273  void instr##ps(XMMRegister dst, XMMRegister src) { cmpps(dst, src, imm8); } \
1274  void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); } \
1275  void instr##pd(XMMRegister dst, XMMRegister src) { cmppd(dst, src, imm8); } \
1276  void instr##pd(XMMRegister dst, Operand src) { cmppd(dst, src, imm8); }
1277 
1278  SSE_CMP_P(cmpeq, 0x0);
1279  SSE_CMP_P(cmplt, 0x1);
1280  SSE_CMP_P(cmple, 0x2);
1281  SSE_CMP_P(cmpneq, 0x4);
1282  SSE_CMP_P(cmpnlt, 0x5);
1283  SSE_CMP_P(cmpnle, 0x6);
1284 
1285 #undef SSE_CMP_P
1286 
1287  void minps(XMMRegister dst, XMMRegister src);
1288  void minps(XMMRegister dst, Operand src);
1289  void maxps(XMMRegister dst, XMMRegister src);
1290  void maxps(XMMRegister dst, Operand src);
1291  void rcpps(XMMRegister dst, XMMRegister src);
1292  void rcpps(XMMRegister dst, Operand src);
1293  void rsqrtps(XMMRegister dst, XMMRegister src);
1294  void rsqrtps(XMMRegister dst, Operand src);
1295  void sqrtps(XMMRegister dst, XMMRegister src);
1296  void sqrtps(XMMRegister dst, Operand src);
1297  void movups(XMMRegister dst, XMMRegister src);
1298  void movups(XMMRegister dst, Operand src);
1299  void movups(Operand dst, XMMRegister src);
1300  void psrldq(XMMRegister dst, uint8_t shift);
1301  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1302  void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1303  void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1304  void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1305  void cvtdq2ps(XMMRegister dst, XMMRegister src);
1306  void cvtdq2ps(XMMRegister dst, Operand src);
1307 
1308  // AVX instruction
1309  void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1310  vfmasd(0x99, dst, src1, src2);
1311  }
1312  void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1313  vfmasd(0xa9, dst, src1, src2);
1314  }
1315  void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1316  vfmasd(0xb9, dst, src1, src2);
1317  }
1318  void vfmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1319  vfmasd(0x99, dst, src1, src2);
1320  }
1321  void vfmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1322  vfmasd(0xa9, dst, src1, src2);
1323  }
1324  void vfmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1325  vfmasd(0xb9, dst, src1, src2);
1326  }
1327  void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1328  vfmasd(0x9b, dst, src1, src2);
1329  }
1330  void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1331  vfmasd(0xab, dst, src1, src2);
1332  }
1333  void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1334  vfmasd(0xbb, dst, src1, src2);
1335  }
1336  void vfmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1337  vfmasd(0x9b, dst, src1, src2);
1338  }
1339  void vfmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1340  vfmasd(0xab, dst, src1, src2);
1341  }
1342  void vfmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1343  vfmasd(0xbb, dst, src1, src2);
1344  }
1345  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1346  vfmasd(0x9d, dst, src1, src2);
1347  }
1348  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1349  vfmasd(0xad, dst, src1, src2);
1350  }
1351  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1352  vfmasd(0xbd, dst, src1, src2);
1353  }
1354  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1355  vfmasd(0x9d, dst, src1, src2);
1356  }
1357  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1358  vfmasd(0xad, dst, src1, src2);
1359  }
1360  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1361  vfmasd(0xbd, dst, src1, src2);
1362  }
1363  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1364  vfmasd(0x9f, dst, src1, src2);
1365  }
1366  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1367  vfmasd(0xaf, dst, src1, src2);
1368  }
1369  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1370  vfmasd(0xbf, dst, src1, src2);
1371  }
1372  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1373  vfmasd(0x9f, dst, src1, src2);
1374  }
1375  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1376  vfmasd(0xaf, dst, src1, src2);
1377  }
1378  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1379  vfmasd(0xbf, dst, src1, src2);
1380  }
1381  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1382  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1383 
1384  void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1385  vfmass(0x99, dst, src1, src2);
1386  }
1387  void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1388  vfmass(0xa9, dst, src1, src2);
1389  }
1390  void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1391  vfmass(0xb9, dst, src1, src2);
1392  }
1393  void vfmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1394  vfmass(0x99, dst, src1, src2);
1395  }
1396  void vfmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1397  vfmass(0xa9, dst, src1, src2);
1398  }
1399  void vfmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1400  vfmass(0xb9, dst, src1, src2);
1401  }
1402  void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1403  vfmass(0x9b, dst, src1, src2);
1404  }
1405  void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1406  vfmass(0xab, dst, src1, src2);
1407  }
1408  void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1409  vfmass(0xbb, dst, src1, src2);
1410  }
1411  void vfmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1412  vfmass(0x9b, dst, src1, src2);
1413  }
1414  void vfmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1415  vfmass(0xab, dst, src1, src2);
1416  }
1417  void vfmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1418  vfmass(0xbb, dst, src1, src2);
1419  }
1420  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1421  vfmass(0x9d, dst, src1, src2);
1422  }
1423  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1424  vfmass(0xad, dst, src1, src2);
1425  }
1426  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1427  vfmass(0xbd, dst, src1, src2);
1428  }
1429  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1430  vfmass(0x9d, dst, src1, src2);
1431  }
1432  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1433  vfmass(0xad, dst, src1, src2);
1434  }
1435  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1436  vfmass(0xbd, dst, src1, src2);
1437  }
1438  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1439  vfmass(0x9f, dst, src1, src2);
1440  }
1441  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1442  vfmass(0xaf, dst, src1, src2);
1443  }
1444  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1445  vfmass(0xbf, dst, src1, src2);
1446  }
1447  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1448  vfmass(0x9f, dst, src1, src2);
1449  }
1450  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1451  vfmass(0xaf, dst, src1, src2);
1452  }
1453  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1454  vfmass(0xbf, dst, src1, src2);
1455  }
1456  void vfmass(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1457  void vfmass(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1458 
1459  void vmovd(XMMRegister dst, Register src);
1460  void vmovd(XMMRegister dst, Operand src);
1461  void vmovd(Register dst, XMMRegister src);
1462  void vmovq(XMMRegister dst, Register src);
1463  void vmovq(XMMRegister dst, Operand src);
1464  void vmovq(Register dst, XMMRegister src);
1465 
1466  void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1467  vsd(0x10, dst, src1, src2);
1468  }
1469  void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
1470  void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
1471 
1472 #define AVX_SP_3(instr, opcode) \
1473  AVX_S_3(instr, opcode) \
1474  AVX_P_3(instr, opcode)
1475 
1476 #define AVX_S_3(instr, opcode) \
1477  AVX_3(instr##ss, opcode, vss) \
1478  AVX_3(instr##sd, opcode, vsd)
1479 
1480 #define AVX_P_3(instr, opcode) \
1481  AVX_3(instr##ps, opcode, vps) \
1482  AVX_3(instr##pd, opcode, vpd)
1483 
1484 #define AVX_3(instr, opcode, impl) \
1485  void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1486  impl(opcode, dst, src1, src2); \
1487  } \
1488  void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
1489  impl(opcode, dst, src1, src2); \
1490  }
1491 
1492  AVX_SP_3(vsqrt, 0x51);
1493  AVX_SP_3(vadd, 0x58);
1494  AVX_SP_3(vsub, 0x5c);
1495  AVX_SP_3(vmul, 0x59);
1496  AVX_SP_3(vdiv, 0x5e);
1497  AVX_SP_3(vmin, 0x5d);
1498  AVX_SP_3(vmax, 0x5f);
1499  AVX_P_3(vand, 0x54);
1500  AVX_P_3(vor, 0x56);
1501  AVX_P_3(vxor, 0x57);
1502  AVX_3(vcvtsd2ss, 0x5a, vsd);
1503  AVX_3(vhaddps, 0x7c, vsd);
1504 
1505 #undef AVX_3
1506 #undef AVX_S_3
1507 #undef AVX_P_3
1508 #undef AVX_SP_3
1509 
1510  void vpsrlq(XMMRegister dst, XMMRegister src, byte imm8) {
1511  vpd(0x73, xmm2, dst, src);
1512  emit(imm8);
1513  }
1514  void vpsllq(XMMRegister dst, XMMRegister src, byte imm8) {
1515  vpd(0x73, xmm6, dst, src);
1516  emit(imm8);
1517  }
1518  void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1519  vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1520  }
1521  void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1522  vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
1523  }
1524  void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1525  XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1526  vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW0);
1527  }
1528  void vcvtlsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1529  vinstr(0x2a, dst, src1, src2, kF2, k0F, kW0);
1530  }
1531  void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1532  XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1533  vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW0);
1534  }
1535  void vcvtlsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1536  vinstr(0x2a, dst, src1, src2, kF3, k0F, kW0);
1537  }
1538  void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Register src2) {
1539  XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1540  vinstr(0x2a, dst, src1, isrc2, kF3, k0F, kW1);
1541  }
1542  void vcvtqsi2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1543  vinstr(0x2a, dst, src1, src2, kF3, k0F, kW1);
1544  }
1545  void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Register src2) {
1546  XMMRegister isrc2 = XMMRegister::from_code(src2.code());
1547  vinstr(0x2a, dst, src1, isrc2, kF2, k0F, kW1);
1548  }
1549  void vcvtqsi2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1550  vinstr(0x2a, dst, src1, src2, kF2, k0F, kW1);
1551  }
1552  void vcvttss2si(Register dst, XMMRegister src) {
1553  XMMRegister idst = XMMRegister::from_code(dst.code());
1554  vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1555  }
1556  void vcvttss2si(Register dst, Operand src) {
1557  XMMRegister idst = XMMRegister::from_code(dst.code());
1558  vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
1559  }
1560  void vcvttsd2si(Register dst, XMMRegister src) {
1561  XMMRegister idst = XMMRegister::from_code(dst.code());
1562  vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1563  }
1564  void vcvttsd2si(Register dst, Operand src) {
1565  XMMRegister idst = XMMRegister::from_code(dst.code());
1566  vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
1567  }
1568  void vcvttss2siq(Register dst, XMMRegister src) {
1569  XMMRegister idst = XMMRegister::from_code(dst.code());
1570  vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1571  }
1572  void vcvttss2siq(Register dst, Operand src) {
1573  XMMRegister idst = XMMRegister::from_code(dst.code());
1574  vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW1);
1575  }
1576  void vcvttsd2siq(Register dst, XMMRegister src) {
1577  XMMRegister idst = XMMRegister::from_code(dst.code());
1578  vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1579  }
1580  void vcvttsd2siq(Register dst, Operand src) {
1581  XMMRegister idst = XMMRegister::from_code(dst.code());
1582  vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW1);
1583  }
1584  void vcvtsd2si(Register dst, XMMRegister src) {
1585  XMMRegister idst = XMMRegister::from_code(dst.code());
1586  vinstr(0x2d, idst, xmm0, src, kF2, k0F, kW0);
1587  }
1588  void vucomisd(XMMRegister dst, XMMRegister src) {
1589  vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1590  }
1591  void vucomisd(XMMRegister dst, Operand src) {
1592  vinstr(0x2e, dst, xmm0, src, k66, k0F, kWIG);
1593  }
1594  void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1595  RoundingMode mode) {
1596  vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
1597  emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1598  }
1599  void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1600  RoundingMode mode) {
1601  vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
1602  emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
1603  }
1604 
1605  void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1606  vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1607  }
1608  void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
1609  vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
1610  }
1611 
1612  void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1613  vss(0x10, dst, src1, src2);
1614  }
1615  void vmovss(XMMRegister dst, Operand src) { vss(0x10, dst, xmm0, src); }
1616  void vmovss(Operand dst, XMMRegister src) { vss(0x11, src, xmm0, dst); }
1617  void vucomiss(XMMRegister dst, XMMRegister src);
1618  void vucomiss(XMMRegister dst, Operand src);
1619  void vss(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1620  void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1621 
1622  void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
1623  void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
1624  void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
1625  void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
1626  void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
1627  void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
1628  void vmovupd(Operand dst, XMMRegister src) { vpd(0x11, src, xmm0, dst); }
1629  void vmovmskps(Register dst, XMMRegister src) {
1630  XMMRegister idst = XMMRegister::from_code(dst.code());
1631  vps(0x50, idst, xmm0, src);
1632  }
1633  void vmovmskpd(Register dst, XMMRegister src) {
1634  XMMRegister idst = XMMRegister::from_code(dst.code());
1635  vpd(0x50, idst, xmm0, src);
1636  }
1637  void vcmpps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1638  vps(0xC2, dst, src1, src2);
1639  emit(cmp);
1640  }
1641  void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1642  vps(0xC2, dst, src1, src2);
1643  emit(cmp);
1644  }
1645  void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
1646  vpd(0xC2, dst, src1, src2);
1647  emit(cmp);
1648  }
1649  void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
1650  vpd(0xC2, dst, src1, src2);
1651  emit(cmp);
1652  }
1653 
1654 #define AVX_CMP_P(instr, imm8) \
1655  void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1656  vcmpps(dst, src1, src2, imm8); \
1657  } \
1658  void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
1659  vcmpps(dst, src1, src2, imm8); \
1660  } \
1661  void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1662  vcmppd(dst, src1, src2, imm8); \
1663  } \
1664  void instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
1665  vcmppd(dst, src1, src2, imm8); \
1666  }
1667 
1668  AVX_CMP_P(vcmpeq, 0x0);
1669  AVX_CMP_P(vcmplt, 0x1);
1670  AVX_CMP_P(vcmple, 0x2);
1671  AVX_CMP_P(vcmpneq, 0x4);
1672  AVX_CMP_P(vcmpnlt, 0x5);
1673  AVX_CMP_P(vcmpnle, 0x6);
1674 
1675 #undef AVX_CMP_P
1676 
1677  void vlddqu(XMMRegister dst, Operand src) {
1678  vinstr(0xF0, dst, xmm0, src, kF2, k0F, kWIG);
1679  }
1680  void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1681  vinstr(0x71, xmm6, dst, src, k66, k0F, kWIG);
1682  emit(imm8);
1683  }
1684  void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1685  vinstr(0x71, xmm2, dst, src, k66, k0F, kWIG);
1686  emit(imm8);
1687  }
1688  void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1689  vinstr(0x71, xmm4, dst, src, k66, k0F, kWIG);
1690  emit(imm8);
1691  }
1692  void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1693  vinstr(0x72, xmm6, dst, src, k66, k0F, kWIG);
1694  emit(imm8);
1695  }
1696  void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1697  vinstr(0x72, xmm2, dst, src, k66, k0F, kWIG);
1698  emit(imm8);
1699  }
1700  void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1701  vinstr(0x72, xmm4, dst, src, k66, k0F, kWIG);
1702  emit(imm8);
1703  }
1704  void vpextrb(Register dst, XMMRegister src, uint8_t imm8) {
1705  XMMRegister idst = XMMRegister::from_code(dst.code());
1706  vinstr(0x14, src, xmm0, idst, k66, k0F3A, kW0);
1707  emit(imm8);
1708  }
1709  void vpextrb(Operand dst, XMMRegister src, uint8_t imm8) {
1710  vinstr(0x14, src, xmm0, dst, k66, k0F3A, kW0);
1711  emit(imm8);
1712  }
1713  void vpextrw(Register dst, XMMRegister src, uint8_t imm8) {
1714  XMMRegister idst = XMMRegister::from_code(dst.code());
1715  vinstr(0xc5, idst, xmm0, src, k66, k0F, kW0);
1716  emit(imm8);
1717  }
1718  void vpextrw(Operand dst, XMMRegister src, uint8_t imm8) {
1719  vinstr(0x15, src, xmm0, dst, k66, k0F3A, kW0);
1720  emit(imm8);
1721  }
1722  void vpextrd(Register dst, XMMRegister src, uint8_t imm8) {
1723  XMMRegister idst = XMMRegister::from_code(dst.code());
1724  vinstr(0x16, src, xmm0, idst, k66, k0F3A, kW0);
1725  emit(imm8);
1726  }
1727  void vpextrd(Operand dst, XMMRegister src, uint8_t imm8) {
1728  vinstr(0x16, src, xmm0, dst, k66, k0F3A, kW0);
1729  emit(imm8);
1730  }
1731  void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1732  XMMRegister isrc = XMMRegister::from_code(src2.code());
1733  vinstr(0x20, dst, src1, isrc, k66, k0F3A, kW0);
1734  emit(imm8);
1735  }
1736  void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1737  vinstr(0x20, dst, src1, src2, k66, k0F3A, kW0);
1738  emit(imm8);
1739  }
1740  void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1741  XMMRegister isrc = XMMRegister::from_code(src2.code());
1742  vinstr(0xc4, dst, src1, isrc, k66, k0F, kW0);
1743  emit(imm8);
1744  }
1745  void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1746  vinstr(0xc4, dst, src1, src2, k66, k0F, kW0);
1747  emit(imm8);
1748  }
1749  void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2, uint8_t imm8) {
1750  XMMRegister isrc = XMMRegister::from_code(src2.code());
1751  vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW0);
1752  emit(imm8);
1753  }
1754  void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t imm8) {
1755  vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
1756  emit(imm8);
1757  }
1758  void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
1759  vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
1760  emit(imm8);
1761  }
1762 
1763  void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1764  void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1765  void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
1766  void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1767 
1768  // BMI instruction
1769  void andnq(Register dst, Register src1, Register src2) {
1770  bmi1q(0xf2, dst, src1, src2);
1771  }
1772  void andnq(Register dst, Register src1, Operand src2) {
1773  bmi1q(0xf2, dst, src1, src2);
1774  }
1775  void andnl(Register dst, Register src1, Register src2) {
1776  bmi1l(0xf2, dst, src1, src2);
1777  }
1778  void andnl(Register dst, Register src1, Operand src2) {
1779  bmi1l(0xf2, dst, src1, src2);
1780  }
1781  void bextrq(Register dst, Register src1, Register src2) {
1782  bmi1q(0xf7, dst, src2, src1);
1783  }
1784  void bextrq(Register dst, Operand src1, Register src2) {
1785  bmi1q(0xf7, dst, src2, src1);
1786  }
1787  void bextrl(Register dst, Register src1, Register src2) {
1788  bmi1l(0xf7, dst, src2, src1);
1789  }
1790  void bextrl(Register dst, Operand src1, Register src2) {
1791  bmi1l(0xf7, dst, src2, src1);
1792  }
1793  void blsiq(Register dst, Register src) { bmi1q(0xf3, rbx, dst, src); }
1794  void blsiq(Register dst, Operand src) { bmi1q(0xf3, rbx, dst, src); }
1795  void blsil(Register dst, Register src) { bmi1l(0xf3, rbx, dst, src); }
1796  void blsil(Register dst, Operand src) { bmi1l(0xf3, rbx, dst, src); }
1797  void blsmskq(Register dst, Register src) { bmi1q(0xf3, rdx, dst, src); }
1798  void blsmskq(Register dst, Operand src) { bmi1q(0xf3, rdx, dst, src); }
1799  void blsmskl(Register dst, Register src) { bmi1l(0xf3, rdx, dst, src); }
1800  void blsmskl(Register dst, Operand src) { bmi1l(0xf3, rdx, dst, src); }
1801  void blsrq(Register dst, Register src) { bmi1q(0xf3, rcx, dst, src); }
1802  void blsrq(Register dst, Operand src) { bmi1q(0xf3, rcx, dst, src); }
1803  void blsrl(Register dst, Register src) { bmi1l(0xf3, rcx, dst, src); }
1804  void blsrl(Register dst, Operand src) { bmi1l(0xf3, rcx, dst, src); }
1805  void tzcntq(Register dst, Register src);
1806  void tzcntq(Register dst, Operand src);
1807  void tzcntl(Register dst, Register src);
1808  void tzcntl(Register dst, Operand src);
1809 
1810  void lzcntq(Register dst, Register src);
1811  void lzcntq(Register dst, Operand src);
1812  void lzcntl(Register dst, Register src);
1813  void lzcntl(Register dst, Operand src);
1814 
1815  void popcntq(Register dst, Register src);
1816  void popcntq(Register dst, Operand src);
1817  void popcntl(Register dst, Register src);
1818  void popcntl(Register dst, Operand src);
1819 
1820  void bzhiq(Register dst, Register src1, Register src2) {
1821  bmi2q(kNone, 0xf5, dst, src2, src1);
1822  }
1823  void bzhiq(Register dst, Operand src1, Register src2) {
1824  bmi2q(kNone, 0xf5, dst, src2, src1);
1825  }
1826  void bzhil(Register dst, Register src1, Register src2) {
1827  bmi2l(kNone, 0xf5, dst, src2, src1);
1828  }
1829  void bzhil(Register dst, Operand src1, Register src2) {
1830  bmi2l(kNone, 0xf5, dst, src2, src1);
1831  }
1832  void mulxq(Register dst1, Register dst2, Register src) {
1833  bmi2q(kF2, 0xf6, dst1, dst2, src);
1834  }
1835  void mulxq(Register dst1, Register dst2, Operand src) {
1836  bmi2q(kF2, 0xf6, dst1, dst2, src);
1837  }
1838  void mulxl(Register dst1, Register dst2, Register src) {
1839  bmi2l(kF2, 0xf6, dst1, dst2, src);
1840  }
1841  void mulxl(Register dst1, Register dst2, Operand src) {
1842  bmi2l(kF2, 0xf6, dst1, dst2, src);
1843  }
1844  void pdepq(Register dst, Register src1, Register src2) {
1845  bmi2q(kF2, 0xf5, dst, src1, src2);
1846  }
1847  void pdepq(Register dst, Register src1, Operand src2) {
1848  bmi2q(kF2, 0xf5, dst, src1, src2);
1849  }
1850  void pdepl(Register dst, Register src1, Register src2) {
1851  bmi2l(kF2, 0xf5, dst, src1, src2);
1852  }
1853  void pdepl(Register dst, Register src1, Operand src2) {
1854  bmi2l(kF2, 0xf5, dst, src1, src2);
1855  }
1856  void pextq(Register dst, Register src1, Register src2) {
1857  bmi2q(kF3, 0xf5, dst, src1, src2);
1858  }
1859  void pextq(Register dst, Register src1, Operand src2) {
1860  bmi2q(kF3, 0xf5, dst, src1, src2);
1861  }
1862  void pextl(Register dst, Register src1, Register src2) {
1863  bmi2l(kF3, 0xf5, dst, src1, src2);
1864  }
1865  void pextl(Register dst, Register src1, Operand src2) {
1866  bmi2l(kF3, 0xf5, dst, src1, src2);
1867  }
1868  void sarxq(Register dst, Register src1, Register src2) {
1869  bmi2q(kF3, 0xf7, dst, src2, src1);
1870  }
1871  void sarxq(Register dst, Operand src1, Register src2) {
1872  bmi2q(kF3, 0xf7, dst, src2, src1);
1873  }
1874  void sarxl(Register dst, Register src1, Register src2) {
1875  bmi2l(kF3, 0xf7, dst, src2, src1);
1876  }
1877  void sarxl(Register dst, Operand src1, Register src2) {
1878  bmi2l(kF3, 0xf7, dst, src2, src1);
1879  }
1880  void shlxq(Register dst, Register src1, Register src2) {
1881  bmi2q(k66, 0xf7, dst, src2, src1);
1882  }
1883  void shlxq(Register dst, Operand src1, Register src2) {
1884  bmi2q(k66, 0xf7, dst, src2, src1);
1885  }
1886  void shlxl(Register dst, Register src1, Register src2) {
1887  bmi2l(k66, 0xf7, dst, src2, src1);
1888  }
1889  void shlxl(Register dst, Operand src1, Register src2) {
1890  bmi2l(k66, 0xf7, dst, src2, src1);
1891  }
1892  void shrxq(Register dst, Register src1, Register src2) {
1893  bmi2q(kF2, 0xf7, dst, src2, src1);
1894  }
1895  void shrxq(Register dst, Operand src1, Register src2) {
1896  bmi2q(kF2, 0xf7, dst, src2, src1);
1897  }
1898  void shrxl(Register dst, Register src1, Register src2) {
1899  bmi2l(kF2, 0xf7, dst, src2, src1);
1900  }
1901  void shrxl(Register dst, Operand src1, Register src2) {
1902  bmi2l(kF2, 0xf7, dst, src2, src1);
1903  }
1904  void rorxq(Register dst, Register src, byte imm8);
1905  void rorxq(Register dst, Operand src, byte imm8);
1906  void rorxl(Register dst, Register src, byte imm8);
1907  void rorxl(Register dst, Operand src, byte imm8);
1908 
1909  void lfence();
1910  void pause();
1911 
1912  // Check the code size generated from label to here.
1913  int SizeOfCodeGeneratedSince(Label* label) {
1914  return pc_offset() - label->pos();
1915  }
1916 
1917  // Record a comment relocation entry that can be used by a disassembler.
1918  // Use --code-comments to enable.
1919  void RecordComment(const char* msg);
1920 
1921  // Record a deoptimization reason that can be used by a log or cpu profiler.
1922  // Use --trace-deopt to enable.
1923  void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1924  int id);
1925 
1926 
1927  // Writes a single word of data in the code stream.
1928  // Used for inline tables, e.g., jump-tables.
1929  void db(uint8_t data);
1930  void dd(uint32_t data);
1931  void dq(uint64_t data);
1932  void dp(uintptr_t data) { dq(data); }
1933  void dq(Label* label);
1934 
1935  // Patch entries for partial constant pool.
1936  void PatchConstPool();
1937 
1938  // Check if use partial constant pool for this rmode.
1939  static bool UseConstPoolFor(RelocInfo::Mode rmode);
1940 
1941  // Check if there is less than kGap bytes available in the buffer.
1942  // If this is the case, we need to grow the buffer before emitting
1943  // an instruction or relocation information.
1944  inline bool buffer_overflow() const {
1945  return pc_ >= reloc_info_writer.pos() - kGap;
1946  }
1947 
1948  // Get the number of bytes available in the buffer.
1949  inline int available_space() const {
1950  return static_cast<int>(reloc_info_writer.pos() - pc_);
1951  }
1952 
1953  static bool IsNop(Address addr);
1954 
1955  // Avoid overflows for displacements etc.
1956  static constexpr int kMaximalBufferSize = 512 * MB;
1957 
1958  byte byte_at(int pos) { return buffer_[pos]; }
1959  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1960 
1961  protected:
1962  // Call near indirect
1963  void call(Operand operand);
1964 
1965  private:
1966  byte* addr_at(int pos) { return buffer_ + pos; }
1967  uint32_t long_at(int pos) {
1968  return *reinterpret_cast<uint32_t*>(addr_at(pos));
1969  }
1970  void long_at_put(int pos, uint32_t x) {
1971  *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1972  }
1973 
1974  // code emission
1975  void GrowBuffer();
1976 
1977  void emit(byte x) { *pc_++ = x; }
1978  inline void emitl(uint32_t x);
1979  inline void emitp(Address x, RelocInfo::Mode rmode);
1980  inline void emitq(uint64_t x);
1981  inline void emitw(uint16_t x);
1982  inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1983  inline void emit(Immediate x);
1984 
1985  // Emits a REX prefix that encodes a 64-bit operand size and
1986  // the top bit of both register codes.
1987  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1988  // REX.W is set.
1989  inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1990  inline void emit_rex_64(Register reg, XMMRegister rm_reg);
1991  inline void emit_rex_64(Register reg, Register rm_reg);
1992  inline void emit_rex_64(XMMRegister reg, XMMRegister rm_reg);
1993 
1994  // Emits a REX prefix that encodes a 64-bit operand size and
1995  // the top bit of the destination, index, and base register codes.
1996  // The high bit of reg is used for REX.R, the high bit of op's base
1997  // register is used for REX.B, and the high bit of op's index register
1998  // is used for REX.X. REX.W is set.
1999  inline void emit_rex_64(Register reg, Operand op);
2000  inline void emit_rex_64(XMMRegister reg, Operand op);
2001 
2002  // Emits a REX prefix that encodes a 64-bit operand size and
2003  // the top bit of the register code.
2004  // The high bit of register is used for REX.B.
2005  // REX.W is set and REX.R and REX.X are clear.
2006  inline void emit_rex_64(Register rm_reg);
2007 
2008  // Emits a REX prefix that encodes a 64-bit operand size and
2009  // the top bit of the index and base register codes.
2010  // The high bit of op's base register is used for REX.B, and the high
2011  // bit of op's index register is used for REX.X.
2012  // REX.W is set and REX.R clear.
2013  inline void emit_rex_64(Operand op);
2014 
2015  // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
2016  void emit_rex_64() { emit(0x48); }
2017 
2018  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2019  // REX.W is clear.
2020  inline void emit_rex_32(Register reg, Register rm_reg);
2021 
2022  // The high bit of reg is used for REX.R, the high bit of op's base
2023  // register is used for REX.B, and the high bit of op's index register
2024  // is used for REX.X. REX.W is cleared.
2025  inline void emit_rex_32(Register reg, Operand op);
2026 
2027  // High bit of rm_reg goes to REX.B.
2028  // REX.W, REX.R and REX.X are clear.
2029  inline void emit_rex_32(Register rm_reg);
2030 
2031  // High bit of base goes to REX.B and high bit of index to REX.X.
2032  // REX.W and REX.R are clear.
2033  inline void emit_rex_32(Operand op);
2034 
2035  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
2036  // REX.W is cleared. If no REX bits are set, no byte is emitted.
2037  inline void emit_optional_rex_32(Register reg, Register rm_reg);
2038 
2039  // The high bit of reg is used for REX.R, the high bit of op's base
2040  // register is used for REX.B, and the high bit of op's index register
2041  // is used for REX.X. REX.W is cleared. If no REX bits are set, nothing
2042  // is emitted.
2043  inline void emit_optional_rex_32(Register reg, Operand op);
2044 
2045  // As for emit_optional_rex_32(Register, Register), except that
2046  // the registers are XMM registers.
2047  inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);
2048 
2049  // As for emit_optional_rex_32(Register, Register), except that
2050  // one of the registers is an XMM registers.
2051  inline void emit_optional_rex_32(XMMRegister reg, Register base);
2052 
2053  // As for emit_optional_rex_32(Register, Register), except that
2054  // one of the registers is an XMM registers.
2055  inline void emit_optional_rex_32(Register reg, XMMRegister base);
2056 
2057  // As for emit_optional_rex_32(Register, Operand), except that
2058  // the register is an XMM register.
2059  inline void emit_optional_rex_32(XMMRegister reg, Operand op);
2060 
2061  // Optionally do as emit_rex_32(Register) if the register number has
2062  // the high bit set.
2063  inline void emit_optional_rex_32(Register rm_reg);
2064  inline void emit_optional_rex_32(XMMRegister rm_reg);
2065 
2066  // Optionally do as emit_rex_32(Operand) if the operand register
2067  // numbers have a high bit set.
2068  inline void emit_optional_rex_32(Operand op);
2069 
2070  void emit_rex(int size) {
2071  if (size == kInt64Size) {
2072  emit_rex_64();
2073  } else {
2074  DCHECK_EQ(size, kInt32Size);
2075  }
2076  }
2077 
2078  template<class P1>
2079  void emit_rex(P1 p1, int size) {
2080  if (size == kInt64Size) {
2081  emit_rex_64(p1);
2082  } else {
2083  DCHECK_EQ(size, kInt32Size);
2084  emit_optional_rex_32(p1);
2085  }
2086  }
2087 
2088  template<class P1, class P2>
2089  void emit_rex(P1 p1, P2 p2, int size) {
2090  if (size == kInt64Size) {
2091  emit_rex_64(p1, p2);
2092  } else {
2093  DCHECK_EQ(size, kInt32Size);
2094  emit_optional_rex_32(p1, p2);
2095  }
2096  }
2097 
2098  // Emit vex prefix
2099  void emit_vex2_byte0() { emit(0xc5); }
2100  inline void emit_vex2_byte1(XMMRegister reg, XMMRegister v, VectorLength l,
2101  SIMDPrefix pp);
2102  void emit_vex3_byte0() { emit(0xc4); }
2103  inline void emit_vex3_byte1(XMMRegister reg, XMMRegister rm, LeadingOpcode m);
2104  inline void emit_vex3_byte1(XMMRegister reg, Operand rm, LeadingOpcode m);
2105  inline void emit_vex3_byte2(VexW w, XMMRegister v, VectorLength l,
2106  SIMDPrefix pp);
2107  inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, XMMRegister rm,
2108  VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2109  VexW w);
2110  inline void emit_vex_prefix(Register reg, Register v, Register rm,
2111  VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2112  VexW w);
2113  inline void emit_vex_prefix(XMMRegister reg, XMMRegister v, Operand rm,
2114  VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2115  VexW w);
2116  inline void emit_vex_prefix(Register reg, Register v, Operand rm,
2117  VectorLength l, SIMDPrefix pp, LeadingOpcode m,
2118  VexW w);
2119 
2120  // Emit the ModR/M byte, and optionally the SIB byte and
2121  // 1- or 4-byte offset for a memory operand. Also encodes
2122  // the second operand of the operation, a register or operation
2123  // subcode, into the reg field of the ModR/M byte.
2124  void emit_operand(Register reg, Operand adr) {
2125  emit_operand(reg.low_bits(), adr);
2126  }
2127 
2128  // Emit the ModR/M byte, and optionally the SIB byte and
2129  // 1- or 4-byte offset for a memory operand. Also used to encode
2130  // a three-bit opcode extension into the ModR/M byte.
2131  void emit_operand(int rm, Operand adr);
2132 
2133  // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
2134  void emit_modrm(Register reg, Register rm_reg) {
2135  emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
2136  }
2137 
2138  // Emit a ModR/M byte with an operation subcode in the reg field and
2139  // a register in the rm_reg field.
2140  void emit_modrm(int code, Register rm_reg) {
2141  DCHECK(is_uint3(code));
2142  emit(0xC0 | code << 3 | rm_reg.low_bits());
2143  }
2144 
2145  // Emit the code-object-relative offset of the label's position
2146  inline void emit_code_relative_offset(Label* label);
2147 
2148  // The first argument is the reg field, the second argument is the r/m field.
2149  void emit_sse_operand(XMMRegister dst, XMMRegister src);
2150  void emit_sse_operand(XMMRegister reg, Operand adr);
2151  void emit_sse_operand(Register reg, Operand adr);
2152  void emit_sse_operand(XMMRegister dst, Register src);
2153  void emit_sse_operand(Register dst, XMMRegister src);
2154  void emit_sse_operand(XMMRegister dst);
2155 
2156  // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
2157  // AND, OR, XOR, or CMP. The encodings of these operations are all
2158  // similar, differing just in the opcode or in the reg field of the
2159  // ModR/M byte.
2160  void arithmetic_op_8(byte opcode, Register reg, Register rm_reg);
2161  void arithmetic_op_8(byte opcode, Register reg, Operand rm_reg);
2162  void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
2163  void arithmetic_op_16(byte opcode, Register reg, Operand rm_reg);
2164  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2165  void arithmetic_op(byte opcode, Register reg, Register rm_reg, int size);
2166  void arithmetic_op(byte opcode, Register reg, Operand rm_reg, int size);
2167  // Operate on a byte in memory or register.
2168  void immediate_arithmetic_op_8(byte subcode,
2169  Register dst,
2170  Immediate src);
2171  void immediate_arithmetic_op_8(byte subcode, Operand dst, Immediate src);
2172  // Operate on a word in memory or register.
2173  void immediate_arithmetic_op_16(byte subcode,
2174  Register dst,
2175  Immediate src);
2176  void immediate_arithmetic_op_16(byte subcode, Operand dst, Immediate src);
2177  // Operate on operands/registers with pointer size, 32-bit or 64-bit size.
2178  void immediate_arithmetic_op(byte subcode,
2179  Register dst,
2180  Immediate src,
2181  int size);
2182  void immediate_arithmetic_op(byte subcode, Operand dst, Immediate src,
2183  int size);
2184 
2185  // Emit machine code for a shift operation.
2186  void shift(Operand dst, Immediate shift_amount, int subcode, int size);
2187  void shift(Register dst, Immediate shift_amount, int subcode, int size);
2188  // Shift dst by cl % 64 bits.
2189  void shift(Register dst, int subcode, int size);
2190  void shift(Operand dst, int subcode, int size);
2191 
2192  void emit_farith(int b1, int b2, int i);
2193 
2194  // labels
2195  // void print(Label* L);
2196  void bind_to(Label* L, int pos);
2197 
2198  // record reloc info for current pc_
2199  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
2200 
2201  // Arithmetics
2202  void emit_add(Register dst, Register src, int size) {
2203  arithmetic_op(0x03, dst, src, size);
2204  }
2205 
2206  void emit_add(Register dst, Immediate src, int size) {
2207  immediate_arithmetic_op(0x0, dst, src, size);
2208  }
2209 
2210  void emit_add(Register dst, Operand src, int size) {
2211  arithmetic_op(0x03, dst, src, size);
2212  }
2213 
2214  void emit_add(Operand dst, Register src, int size) {
2215  arithmetic_op(0x1, src, dst, size);
2216  }
2217 
2218  void emit_add(Operand dst, Immediate src, int size) {
2219  immediate_arithmetic_op(0x0, dst, src, size);
2220  }
2221 
2222  void emit_and(Register dst, Register src, int size) {
2223  arithmetic_op(0x23, dst, src, size);
2224  }
2225 
2226  void emit_and(Register dst, Operand src, int size) {
2227  arithmetic_op(0x23, dst, src, size);
2228  }
2229 
2230  void emit_and(Operand dst, Register src, int size) {
2231  arithmetic_op(0x21, src, dst, size);
2232  }
2233 
2234  void emit_and(Register dst, Immediate src, int size) {
2235  immediate_arithmetic_op(0x4, dst, src, size);
2236  }
2237 
2238  void emit_and(Operand dst, Immediate src, int size) {
2239  immediate_arithmetic_op(0x4, dst, src, size);
2240  }
2241 
2242  void emit_cmp(Register dst, Register src, int size) {
2243  arithmetic_op(0x3B, dst, src, size);
2244  }
2245 
2246  void emit_cmp(Register dst, Operand src, int size) {
2247  arithmetic_op(0x3B, dst, src, size);
2248  }
2249 
2250  void emit_cmp(Operand dst, Register src, int size) {
2251  arithmetic_op(0x39, src, dst, size);
2252  }
2253 
2254  void emit_cmp(Register dst, Immediate src, int size) {
2255  immediate_arithmetic_op(0x7, dst, src, size);
2256  }
2257 
2258  void emit_cmp(Operand dst, Immediate src, int size) {
2259  immediate_arithmetic_op(0x7, dst, src, size);
2260  }
2261 
2262  // Compare {al,ax,eax,rax} with src. If equal, set ZF and write dst into
2263  // src. Otherwise clear ZF and write src into {al,ax,eax,rax}. This
2264  // operation is only atomic if prefixed by the lock instruction.
2265  void emit_cmpxchg(Operand dst, Register src, int size);
2266 
2267  void emit_dec(Register dst, int size);
2268  void emit_dec(Operand dst, int size);
2269 
2270  // Divide rdx:rax by src. Quotient in rax, remainder in rdx when size is 64.
2271  // Divide edx:eax by lower 32 bits of src. Quotient in eax, remainder in edx
2272  // when size is 32.
2273  void emit_idiv(Register src, int size);
2274  void emit_div(Register src, int size);
2275 
2276  // Signed multiply instructions.
2277  // rdx:rax = rax * src when size is 64 or edx:eax = eax * src when size is 32.
2278  void emit_imul(Register src, int size);
2279  void emit_imul(Operand src, int size);
2280  void emit_imul(Register dst, Register src, int size);
2281  void emit_imul(Register dst, Operand src, int size);
2282  void emit_imul(Register dst, Register src, Immediate imm, int size);
2283  void emit_imul(Register dst, Operand src, Immediate imm, int size);
2284 
2285  void emit_inc(Register dst, int size);
2286  void emit_inc(Operand dst, int size);
2287 
2288  void emit_lea(Register dst, Operand src, int size);
2289 
2290  void emit_mov(Register dst, Operand src, int size);
2291  void emit_mov(Register dst, Register src, int size);
2292  void emit_mov(Operand dst, Register src, int size);
2293  void emit_mov(Register dst, Immediate value, int size);
2294  void emit_mov(Operand dst, Immediate value, int size);
2295 
2296  void emit_movzxb(Register dst, Operand src, int size);
2297  void emit_movzxb(Register dst, Register src, int size);
2298  void emit_movzxw(Register dst, Operand src, int size);
2299  void emit_movzxw(Register dst, Register src, int size);
2300 
2301  void emit_neg(Register dst, int size);
2302  void emit_neg(Operand dst, int size);
2303 
2304  void emit_not(Register dst, int size);
2305  void emit_not(Operand dst, int size);
2306 
2307  void emit_or(Register dst, Register src, int size) {
2308  arithmetic_op(0x0B, dst, src, size);
2309  }
2310 
2311  void emit_or(Register dst, Operand src, int size) {
2312  arithmetic_op(0x0B, dst, src, size);
2313  }
2314 
2315  void emit_or(Operand dst, Register src, int size) {
2316  arithmetic_op(0x9, src, dst, size);
2317  }
2318 
2319  void emit_or(Register dst, Immediate src, int size) {
2320  immediate_arithmetic_op(0x1, dst, src, size);
2321  }
2322 
2323  void emit_or(Operand dst, Immediate src, int size) {
2324  immediate_arithmetic_op(0x1, dst, src, size);
2325  }
2326 
2327  void emit_repmovs(int size);
2328 
2329  void emit_sbb(Register dst, Register src, int size) {
2330  arithmetic_op(0x1b, dst, src, size);
2331  }
2332 
2333  void emit_sub(Register dst, Register src, int size) {
2334  arithmetic_op(0x2B, dst, src, size);
2335  }
2336 
2337  void emit_sub(Register dst, Immediate src, int size) {
2338  immediate_arithmetic_op(0x5, dst, src, size);
2339  }
2340 
2341  void emit_sub(Register dst, Operand src, int size) {
2342  arithmetic_op(0x2B, dst, src, size);
2343  }
2344 
2345  void emit_sub(Operand dst, Register src, int size) {
2346  arithmetic_op(0x29, src, dst, size);
2347  }
2348 
2349  void emit_sub(Operand dst, Immediate src, int size) {
2350  immediate_arithmetic_op(0x5, dst, src, size);
2351  }
2352 
2353  void emit_test(Register dst, Register src, int size);
2354  void emit_test(Register reg, Immediate mask, int size);
2355  void emit_test(Operand op, Register reg, int size);
2356  void emit_test(Operand op, Immediate mask, int size);
2357  void emit_test(Register reg, Operand op, int size) {
2358  return emit_test(op, reg, size);
2359  }
2360 
2361  void emit_xchg(Register dst, Register src, int size);
2362  void emit_xchg(Register dst, Operand src, int size);
2363 
2364  void emit_xor(Register dst, Register src, int size) {
2365  if (size == kInt64Size && dst.code() == src.code()) {
2366  // 32 bit operations zero the top 32 bits of 64 bit registers. Therefore
2367  // there is no need to make this a 64 bit operation.
2368  arithmetic_op(0x33, dst, src, kInt32Size);
2369  } else {
2370  arithmetic_op(0x33, dst, src, size);
2371  }
2372  }
2373 
2374  void emit_xor(Register dst, Operand src, int size) {
2375  arithmetic_op(0x33, dst, src, size);
2376  }
2377 
2378  void emit_xor(Register dst, Immediate src, int size) {
2379  immediate_arithmetic_op(0x6, dst, src, size);
2380  }
2381 
2382  void emit_xor(Operand dst, Immediate src, int size) {
2383  immediate_arithmetic_op(0x6, dst, src, size);
2384  }
2385 
2386  void emit_xor(Operand dst, Register src, int size) {
2387  arithmetic_op(0x31, src, dst, size);
2388  }
2389 
2390  // Most BMI instructions are similar.
2391  void bmi1q(byte op, Register reg, Register vreg, Register rm);
2392  void bmi1q(byte op, Register reg, Register vreg, Operand rm);
2393  void bmi1l(byte op, Register reg, Register vreg, Register rm);
2394  void bmi1l(byte op, Register reg, Register vreg, Operand rm);
2395  void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2396  void bmi2q(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2397  void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Register rm);
2398  void bmi2l(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
2399 
2400  // record the position of jmp/jcc instruction
2401  void record_farjmp_position(Label* L, int pos);
2402 
2403  bool is_optimizable_farjmp(int idx);
2404 
2405  void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
2406 
2407  friend class EnsureSpace;
2408  friend class RegExpMacroAssemblerX64;
2409 
2410  // code generation
2411  RelocInfoWriter reloc_info_writer;
2412 
2413  // Internal reference positions, required for (potential) patching in
2414  // GrowBuffer(); contains only those internal references whose labels
2415  // are already bound.
2416  std::deque<int> internal_reference_positions_;
2417 
2418  // Variables for this instance of assembler
2419  int farjmp_num_ = 0;
2420  std::deque<int> farjmp_positions_;
2421  std::map<Label*, std::vector<int>> label_farjmp_maps_;
2422 
2423  ConstPool constpool_;
2424 
2425  friend class ConstPool;
2426 };
2427 
2428 
2429 // Helper class that ensures that there is enough space for generating
2430 // instructions and relocation information. The constructor makes
2431 // sure that there is enough space and (in debug mode) the destructor
2432 // checks that we did not generate too much.
2433 class EnsureSpace {
2434  public:
2435  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
2436  if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
2437 #ifdef DEBUG
2438  space_before_ = assembler_->available_space();
2439 #endif
2440  }
2441 
2442 #ifdef DEBUG
2443  ~EnsureSpace() {
2444  int bytes_generated = space_before_ - assembler_->available_space();
2445  DCHECK(bytes_generated < assembler_->kGap);
2446  }
2447 #endif
2448 
2449  private:
2450  Assembler* assembler_;
2451 #ifdef DEBUG
2452  int space_before_;
2453 #endif
2454 };
2455 
2456 // Define {RegisterName} methods for the register types.
2457 DEFINE_REGISTER_NAMES(Register, GENERAL_REGISTERS)
2458 DEFINE_REGISTER_NAMES(XMMRegister, DOUBLE_REGISTERS)
2459 
2460 } // namespace internal
2461 } // namespace v8
2462 
2463 #endif // V8_X64_ASSEMBLER_X64_H_
Definition: libplatform.h:13
Definition: v8.h:963