V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
assembler-ia32.h
1 // Copyright (c) 1994-2006 Sun Microsystems Inc.
2 // All Rights Reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // - Redistributions of source code must retain the above copyright notice,
9 // this list of conditions and the following disclaimer.
10 //
11 // - Redistribution in binary form must reproduce the above copyright
12 // notice, this list of conditions and the following disclaimer in the
13 // documentation and/or other materials provided with the distribution.
14 //
15 // - Neither the name of Sun Microsystems or the names of contributors may
16 // be used to endorse or promote products derived from this software without
17 // specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
20 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
23 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // The original source code covered by the above license above has been
32 // modified significantly by Google Inc.
33 // Copyright 2011 the V8 project authors. All rights reserved.
34 
35 // A light-weight IA32 Assembler.
36 
37 #ifndef V8_IA32_ASSEMBLER_IA32_H_
38 #define V8_IA32_ASSEMBLER_IA32_H_
39 
40 #include <deque>
41 
42 #include "src/assembler.h"
43 #include "src/ia32/constants-ia32.h"
44 #include "src/ia32/sse-instr.h"
45 #include "src/isolate.h"
46 #include "src/label.h"
47 #include "src/objects/smi.h"
48 #include "src/utils.h"
49 
50 namespace v8 {
51 namespace internal {
52 
53 #define GENERAL_REGISTERS(V) \
54  V(eax) \
55  V(ecx) \
56  V(edx) \
57  V(ebx) \
58  V(esp) \
59  V(ebp) \
60  V(esi) \
61  V(edi)
62 
63 #define ALLOCATABLE_GENERAL_REGISTERS(V) \
64  V(eax) \
65  V(ecx) \
66  V(edx) \
67  V(esi) \
68  V(edi)
69 
70 #define DOUBLE_REGISTERS(V) \
71  V(xmm0) \
72  V(xmm1) \
73  V(xmm2) \
74  V(xmm3) \
75  V(xmm4) \
76  V(xmm5) \
77  V(xmm6) \
78  V(xmm7)
79 
80 #define FLOAT_REGISTERS DOUBLE_REGISTERS
81 #define SIMD128_REGISTERS DOUBLE_REGISTERS
82 
83 #define ALLOCATABLE_DOUBLE_REGISTERS(V) \
84  V(xmm1) \
85  V(xmm2) \
86  V(xmm3) \
87  V(xmm4) \
88  V(xmm5) \
89  V(xmm6) \
90  V(xmm7)
91 
92 enum RegisterCode {
93 #define REGISTER_CODE(R) kRegCode_##R,
94  GENERAL_REGISTERS(REGISTER_CODE)
95 #undef REGISTER_CODE
96  kRegAfterLast
97 };
98 
99 class Register : public RegisterBase<Register, kRegAfterLast> {
100  public:
101  bool is_byte_register() const { return reg_code_ <= 3; }
102 
103  private:
104  friend class RegisterBase<Register, kRegAfterLast>;
105  explicit constexpr Register(int code) : RegisterBase(code) {}
106 };
107 
108 ASSERT_TRIVIALLY_COPYABLE(Register);
109 static_assert(sizeof(Register) == sizeof(int),
110  "Register can efficiently be passed by value");
111 
112 #define DEFINE_REGISTER(R) \
113  constexpr Register R = Register::from_code<kRegCode_##R>();
114 GENERAL_REGISTERS(DEFINE_REGISTER)
115 #undef DEFINE_REGISTER
116 constexpr Register no_reg = Register::no_reg();
117 
118 constexpr bool kPadArguments = false;
119 constexpr bool kSimpleFPAliasing = true;
120 constexpr bool kSimdMaskRegisters = false;
121 
122 enum DoubleCode {
123 #define REGISTER_CODE(R) kDoubleCode_##R,
124  DOUBLE_REGISTERS(REGISTER_CODE)
125 #undef REGISTER_CODE
126  kDoubleAfterLast
127 };
128 
129 class XMMRegister : public RegisterBase<XMMRegister, kDoubleAfterLast> {
130  friend class RegisterBase<XMMRegister, kDoubleAfterLast>;
131  explicit constexpr XMMRegister(int code) : RegisterBase(code) {}
132 };
133 
134 typedef XMMRegister FloatRegister;
135 
137 
139 
140 #define DEFINE_REGISTER(R) \
141  constexpr DoubleRegister R = DoubleRegister::from_code<kDoubleCode_##R>();
142 DOUBLE_REGISTERS(DEFINE_REGISTER)
143 #undef DEFINE_REGISTER
144 constexpr DoubleRegister no_dreg = DoubleRegister::no_reg();
145 
146 // Note that the bit values must match those used in actual instruction encoding
147 constexpr int kNumRegs = 8;
148 
149 // Caller-saved registers
150 constexpr RegList kJSCallerSaved =
151  Register::ListOf<eax, ecx, edx,
152  ebx, // used as a caller-saved register in JavaScript code
153  edi // callee function
154  >();
155 
156 constexpr int kNumJSCallerSaved = 5;
157 
158 // Number of registers for which space is reserved in safepoints.
159 constexpr int kNumSafepointRegisters = 8;
160 
161 enum Condition {
162  // any value < 0 is considered no_condition
163  no_condition = -1,
164 
165  overflow = 0,
166  no_overflow = 1,
167  below = 2,
168  above_equal = 3,
169  equal = 4,
170  not_equal = 5,
171  below_equal = 6,
172  above = 7,
173  negative = 8,
174  positive = 9,
175  parity_even = 10,
176  parity_odd = 11,
177  less = 12,
178  greater_equal = 13,
179  less_equal = 14,
180  greater = 15,
181 
182  // aliases
183  carry = below,
184  not_carry = above_equal,
185  zero = equal,
186  not_zero = not_equal,
187  sign = negative,
188  not_sign = positive
189 };
190 
191 
192 // Returns the equivalent of !cc.
193 // Negation of the default no_condition (-1) results in a non-default
194 // no_condition value (-2). As long as tests for no_condition check
195 // for condition < 0, this will work as expected.
196 inline Condition NegateCondition(Condition cc) {
197  return static_cast<Condition>(cc ^ 1);
198 }
199 
200 
201 enum RoundingMode {
202  kRoundToNearest = 0x0,
203  kRoundDown = 0x1,
204  kRoundUp = 0x2,
205  kRoundToZero = 0x3
206 };
207 
208 // -----------------------------------------------------------------------------
209 // Machine instruction Immediates
210 
211 class Immediate {
212  public:
213  // Calls where x is an Address (uintptr_t) resolve to this overload.
214  inline explicit Immediate(int x, RelocInfo::Mode rmode = RelocInfo::NONE) {
215  value_.immediate = x;
216  rmode_ = rmode;
217  }
218  inline explicit Immediate(const ExternalReference& ext)
219  : Immediate(ext.address(), RelocInfo::EXTERNAL_REFERENCE) {}
220  inline explicit Immediate(Handle<HeapObject> handle)
221  : Immediate(handle.address(), RelocInfo::EMBEDDED_OBJECT) {}
222  inline explicit Immediate(Smi value)
223  : Immediate(static_cast<intptr_t>(value.ptr())) {}
224 
225  static Immediate EmbeddedNumber(double number); // Smi or HeapNumber.
226  static Immediate EmbeddedCode(CodeStub* code);
227  static Immediate EmbeddedStringConstant(const StringConstantBase* str);
228 
229  static Immediate CodeRelativeOffset(Label* label) {
230  return Immediate(label);
231  }
232 
233  bool is_heap_object_request() const {
234  DCHECK_IMPLIES(is_heap_object_request_,
235  rmode_ == RelocInfo::EMBEDDED_OBJECT ||
236  rmode_ == RelocInfo::CODE_TARGET);
237  return is_heap_object_request_;
238  }
239 
240  HeapObjectRequest heap_object_request() const {
241  DCHECK(is_heap_object_request());
242  return value_.heap_object_request;
243  }
244 
245  int immediate() const {
246  DCHECK(!is_heap_object_request());
247  return value_.immediate;
248  }
249 
250  bool is_embedded_object() const {
251  return !is_heap_object_request() && rmode() == RelocInfo::EMBEDDED_OBJECT;
252  }
253 
254  Handle<HeapObject> embedded_object() const {
255  return Handle<HeapObject>(reinterpret_cast<Address*>(immediate()));
256  }
257 
258  bool is_external_reference() const {
259  return rmode() == RelocInfo::EXTERNAL_REFERENCE;
260  }
261 
262  ExternalReference external_reference() const {
263  DCHECK(is_external_reference());
264  return bit_cast<ExternalReference>(immediate());
265  }
266 
267  bool is_zero() const { return RelocInfo::IsNone(rmode_) && immediate() == 0; }
268  bool is_int8() const {
269  return RelocInfo::IsNone(rmode_) && i::is_int8(immediate());
270  }
271  bool is_uint8() const {
272  return RelocInfo::IsNone(rmode_) && i::is_uint8(immediate());
273  }
274  bool is_int16() const {
275  return RelocInfo::IsNone(rmode_) && i::is_int16(immediate());
276  }
277 
278  bool is_uint16() const {
279  return RelocInfo::IsNone(rmode_) && i::is_uint16(immediate());
280  }
281 
282  RelocInfo::Mode rmode() const { return rmode_; }
283 
284  private:
285  inline explicit Immediate(Label* value) {
286  value_.immediate = reinterpret_cast<int32_t>(value);
287  rmode_ = RelocInfo::INTERNAL_REFERENCE;
288  }
289 
290  union Value {
291  Value() {}
292  HeapObjectRequest heap_object_request;
293  int immediate;
294  } value_;
295  bool is_heap_object_request_ = false;
296  RelocInfo::Mode rmode_;
297 
298  friend class Operand;
299  friend class Assembler;
300  friend class MacroAssembler;
301 };
302 
303 
304 // -----------------------------------------------------------------------------
305 // Machine instruction Operands
306 
307 enum ScaleFactor {
308  times_1 = 0,
309  times_2 = 1,
310  times_4 = 2,
311  times_8 = 3,
312  times_int_size = times_4,
313  times_half_pointer_size = times_2,
314  times_pointer_size = times_4,
315  times_twice_pointer_size = times_8
316 };
317 
318 class V8_EXPORT_PRIVATE Operand {
319  public:
320  // reg
321  V8_INLINE explicit Operand(Register reg) { set_modrm(3, reg); }
322 
323  // XMM reg
324  V8_INLINE explicit Operand(XMMRegister xmm_reg) {
325  Register reg = Register::from_code(xmm_reg.code());
326  set_modrm(3, reg);
327  }
328 
329  // [disp/r]
330  V8_INLINE explicit Operand(int32_t disp, RelocInfo::Mode rmode) {
331  set_modrm(0, ebp);
332  set_dispr(disp, rmode);
333  }
334 
335  // [disp/r]
336  V8_INLINE explicit Operand(Immediate imm) {
337  set_modrm(0, ebp);
338  set_dispr(imm.immediate(), imm.rmode_);
339  }
340 
341  // [base + disp/r]
342  explicit Operand(Register base, int32_t disp,
343  RelocInfo::Mode rmode = RelocInfo::NONE);
344 
345  // [base + index*scale + disp/r]
346  explicit Operand(Register base, Register index, ScaleFactor scale,
347  int32_t disp, RelocInfo::Mode rmode = RelocInfo::NONE);
348 
349  // [index*scale + disp/r]
350  explicit Operand(Register index, ScaleFactor scale, int32_t disp,
351  RelocInfo::Mode rmode = RelocInfo::NONE);
352 
353  static Operand JumpTable(Register index, ScaleFactor scale, Label* table) {
354  return Operand(index, scale, reinterpret_cast<int32_t>(table),
355  RelocInfo::INTERNAL_REFERENCE);
356  }
357 
358  static Operand ForRegisterPlusImmediate(Register base, Immediate imm) {
359  return Operand(base, imm.value_.immediate, imm.rmode_);
360  }
361 
362  // Returns true if this Operand is a wrapper for the specified register.
363  bool is_reg(Register reg) const { return is_reg(reg.code()); }
364  bool is_reg(XMMRegister reg) const { return is_reg(reg.code()); }
365 
366  // Returns true if this Operand is a wrapper for one register.
367  bool is_reg_only() const;
368 
369  // Asserts that this Operand is a wrapper for one register and returns the
370  // register.
371  Register reg() const;
372 
373  private:
374  // Set the ModRM byte without an encoded 'reg' register. The
375  // register is encoded later as part of the emit_operand operation.
376  inline void set_modrm(int mod, Register rm) {
377  DCHECK_EQ(mod & -4, 0);
378  buf_[0] = mod << 6 | rm.code();
379  len_ = 1;
380  }
381 
382  inline void set_sib(ScaleFactor scale, Register index, Register base);
383  inline void set_disp8(int8_t disp);
384  inline void set_dispr(int32_t disp, RelocInfo::Mode rmode) {
385  DCHECK(len_ == 1 || len_ == 2);
386  int32_t* p = reinterpret_cast<int32_t*>(&buf_[len_]);
387  *p = disp;
388  len_ += sizeof(int32_t);
389  rmode_ = rmode;
390  }
391 
392  inline bool is_reg(int reg_code) const {
393  return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only.
394  && ((buf_[0] & 0x07) == reg_code); // register codes match.
395  }
396 
397  byte buf_[6];
398  // The number of bytes in buf_.
399  uint8_t len_ = 0;
400  // Only valid if len_ > 4.
401  RelocInfo::Mode rmode_ = RelocInfo::NONE;
402 
403  // TODO(clemensh): Get rid of this friendship, or make Operand immutable.
404  friend class Assembler;
405 };
406 ASSERT_TRIVIALLY_COPYABLE(Operand);
407 static_assert(sizeof(Operand) <= 2 * kPointerSize,
408  "Operand must be small enough to pass it by value");
409 
410 // -----------------------------------------------------------------------------
411 // A Displacement describes the 32bit immediate field of an instruction which
412 // may be used together with a Label in order to refer to a yet unknown code
413 // position. Displacements stored in the instruction stream are used to describe
414 // the instruction and to chain a list of instructions using the same Label.
415 // A Displacement contains 2 different fields:
416 //
417 // next field: position of next displacement in the chain (0 = end of list)
418 // type field: instruction type
419 //
420 // A next value of null (0) indicates the end of a chain (note that there can
421 // be no displacement at position zero, because there is always at least one
422 // instruction byte before the displacement).
423 //
424 // Displacement _data field layout
425 //
426 // |31.....2|1......0|
427 // [ next | type |
428 
430  public:
431  enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE };
432 
433  int data() const { return data_; }
434  Type type() const { return TypeField::decode(data_); }
435  void next(Label* L) const {
436  int n = NextField::decode(data_);
437  n > 0 ? L->link_to(n) : L->Unuse();
438  }
439  void link_to(Label* L) { init(L, type()); }
440 
441  explicit Displacement(int data) { data_ = data; }
442 
443  Displacement(Label* L, Type type) { init(L, type); }
444 
445  void print() {
446  PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"),
447  NextField::decode(data_));
448  }
449 
450  private:
451  int data_;
452 
453  class TypeField: public BitField<Type, 0, 2> {};
454  class NextField: public BitField<int, 2, 32-2> {};
455 
456  void init(Label* L, Type type);
457 };
458 
459 class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
460  private:
461  // We check before assembling an instruction that there is sufficient
462  // space to write an instruction and its relocation information.
463  // The relocation writer's position must be kGap bytes above the end of
464  // the generated instructions. This leaves enough space for the
465  // longest possible ia32 instruction, 15 bytes, and the longest possible
466  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
467  // (There is a 15 byte limit on ia32 instruction length that rules out some
468  // otherwise valid instructions.)
469  // This allows for a single, fast space check per instruction.
470  static constexpr int kGap = 32;
471 
472  public:
473  // Create an assembler. Instructions and relocation information are emitted
474  // into a buffer, with the instructions starting from the beginning and the
475  // relocation information starting from the end of the buffer. See CodeDesc
476  // for a detailed comment on the layout (globals.h).
477  //
478  // If the provided buffer is nullptr, the assembler allocates and grows its
479  // own buffer, and buffer_size determines the initial buffer size. The buffer
480  // is owned by the assembler and deallocated upon destruction of the
481  // assembler.
482  //
483  // If the provided buffer is not nullptr, the assembler uses the provided
484  // buffer for code generation and assumes its size to be buffer_size. If the
485  // buffer is too small, a fatal error occurs. No deallocation of the buffer is
486  // done upon destruction of the assembler.
487  Assembler(const AssemblerOptions& options, void* buffer, int buffer_size);
488  virtual ~Assembler() {}
489 
490  // GetCode emits any pending (non-emitted) code and fills the descriptor
491  // desc. GetCode() is idempotent; it returns the same result if no other
492  // Assembler functions are invoked in between GetCode() calls.
493  void GetCode(Isolate* isolate, CodeDesc* desc);
494 
495  // Read/Modify the code target in the branch/call instruction at pc.
496  // The isolate argument is unused (and may be nullptr) when skipping flushing.
497  inline static Address target_address_at(Address pc, Address constant_pool);
498  inline static void set_target_address_at(
499  Address pc, Address constant_pool, Address target,
500  ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
501 
502  // Return the code target address at a call site from the return address
503  // of that call in the instruction stream.
504  inline static Address target_address_from_return_address(Address pc);
505 
506  // This sets the branch destination (which is in the instruction on x86).
507  // This is for calls and branches within generated code.
508  inline static void deserialization_set_special_target_at(
509  Address instruction_payload, Code code, Address target);
510 
511  // Get the size of the special target encoded at 'instruction_payload'.
512  inline static int deserialization_special_target_size(
513  Address instruction_payload);
514 
515  // This sets the internal reference at the pc.
516  inline static void deserialization_set_target_internal_reference_at(
517  Address pc, Address target,
518  RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
519 
520  static constexpr int kSpecialTargetSize = kPointerSize;
521 
522  // Distance between the address of the code target in the call instruction
523  // and the return address
524  static constexpr int kCallTargetAddressOffset = kPointerSize;
525 
526  // One byte opcode for test al, 0xXX.
527  static constexpr byte kTestAlByte = 0xA8;
528  // One byte opcode for nop.
529  static constexpr byte kNopByte = 0x90;
530 
531  // One byte opcode for a short unconditional jump.
532  static constexpr byte kJmpShortOpcode = 0xEB;
533  // One byte prefix for a short conditional jump.
534  static constexpr byte kJccShortPrefix = 0x70;
535  static constexpr byte kJncShortOpcode = kJccShortPrefix | not_carry;
536  static constexpr byte kJcShortOpcode = kJccShortPrefix | carry;
537  static constexpr byte kJnzShortOpcode = kJccShortPrefix | not_zero;
538  static constexpr byte kJzShortOpcode = kJccShortPrefix | zero;
539 
540  // ---------------------------------------------------------------------------
541  // Code generation
542  //
543  // - function names correspond one-to-one to ia32 instruction mnemonics
544  // - unless specified otherwise, instructions operate on 32bit operands
545  // - instructions on 8bit (byte) operands/registers have a trailing '_b'
546  // - instructions on 16bit (word) operands/registers have a trailing '_w'
547  // - naming conflicts with C++ keywords are resolved via a trailing '_'
548 
549  // NOTE ON INTERFACE: Currently, the interface is not very consistent
550  // in the sense that some operations (e.g. mov()) can be called in more
551  // the one way to generate the same instruction: The Register argument
552  // can in some cases be replaced with an Operand(Register) argument.
553  // This should be cleaned up and made more orthogonal. The questions
554  // is: should we always use Operands instead of Registers where an
555  // Operand is possible, or should we have a Register (overloaded) form
556  // instead? We must be careful to make sure that the selected instruction
557  // is obvious from the parameters to avoid hard-to-find code generation
558  // bugs.
559 
560  // Insert the smallest number of nop instructions
561  // possible to align the pc offset to a multiple
562  // of m. m must be a power of 2.
563  void Align(int m);
564  // Insert the smallest number of zero bytes possible to align the pc offset
565  // to a mulitple of m. m must be a power of 2 (>= 2).
566  void DataAlign(int m);
567  void Nop(int bytes = 1);
568  // Aligns code to something that's optimal for a jump target for the platform.
569  void CodeTargetAlign();
570 
571  // Stack
572  void pushad();
573  void popad();
574 
575  void pushfd();
576  void popfd();
577 
578  void push(const Immediate& x);
579  void push_imm32(int32_t imm32);
580  void push(Register src);
581  void push(Operand src);
582 
583  void pop(Register dst);
584  void pop(Operand dst);
585 
586  void enter(const Immediate& size);
587  void leave();
588 
589  // Moves
590  void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); }
591  void mov_b(Register dst, Operand src);
592  void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); }
593  void mov_b(Operand dst, int8_t src) { mov_b(dst, Immediate(src)); }
594  void mov_b(Operand dst, const Immediate& src);
595  void mov_b(Operand dst, Register src);
596 
597  void mov_w(Register dst, Operand src);
598  void mov_w(Operand dst, int16_t src) { mov_w(dst, Immediate(src)); }
599  void mov_w(Operand dst, const Immediate& src);
600  void mov_w(Operand dst, Register src);
601 
602  void mov(Register dst, int32_t imm32);
603  void mov(Register dst, const Immediate& x);
604  void mov(Register dst, Handle<HeapObject> handle);
605  void mov(Register dst, Operand src);
606  void mov(Register dst, Register src);
607  void mov(Operand dst, const Immediate& x);
608  void mov(Operand dst, Handle<HeapObject> handle);
609  void mov(Operand dst, Register src);
610  void mov(Operand dst, Address src, RelocInfo::Mode);
611 
612  void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); }
613  void movsx_b(Register dst, Operand src);
614 
615  void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); }
616  void movsx_w(Register dst, Operand src);
617 
618  void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); }
619  void movzx_b(Register dst, Operand src);
620 
621  void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); }
622  void movzx_w(Register dst, Operand src);
623 
624  void movq(XMMRegister dst, Operand src);
625  // Conditional moves
626  void cmov(Condition cc, Register dst, Register src) {
627  cmov(cc, dst, Operand(src));
628  }
629  void cmov(Condition cc, Register dst, Operand src);
630 
631  // Flag management.
632  void cld();
633 
634  // Repetitive string instructions.
635  void rep_movs();
636  void rep_stos();
637  void stos();
638 
639  // Exchange
640  void xchg(Register dst, Register src);
641  void xchg(Register dst, Operand src);
642  void xchg_b(Register reg, Operand op);
643  void xchg_w(Register reg, Operand op);
644 
645  // Lock prefix
646  void lock();
647 
648  // CompareExchange
649  void cmpxchg(Operand dst, Register src);
650  void cmpxchg_b(Operand dst, Register src);
651  void cmpxchg_w(Operand dst, Register src);
652  void cmpxchg8b(Operand dst);
653 
654  // Memory Fence
655  void lfence();
656 
657  void pause();
658 
659  // Arithmetics
660  void adc(Register dst, int32_t imm32);
661  void adc(Register dst, Register src) { adc(dst, Operand(src)); }
662  void adc(Register dst, Operand src);
663 
664  void add(Register dst, Register src) { add(dst, Operand(src)); }
665  void add(Register dst, Operand src);
666  void add(Operand dst, Register src);
667  void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); }
668  void add(Operand dst, const Immediate& x);
669 
670  void and_(Register dst, int32_t imm32);
671  void and_(Register dst, const Immediate& x);
672  void and_(Register dst, Register src) { and_(dst, Operand(src)); }
673  void and_(Register dst, Operand src);
674  void and_(Operand dst, Register src);
675  void and_(Operand dst, const Immediate& x);
676 
677  void cmpb(Register reg, Immediate imm8) {
678  DCHECK(reg.is_byte_register());
679  cmpb(Operand(reg), imm8);
680  }
681  void cmpb(Operand op, Immediate imm8);
682  void cmpb(Register reg, Operand op);
683  void cmpb(Operand op, Register reg);
684  void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); }
685  void cmpb_al(Operand op);
686  void cmpw_ax(Operand op);
687  void cmpw(Operand dst, Immediate src);
688  void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); }
689  void cmpw(Register dst, Operand src);
690  void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); }
691  void cmpw(Operand dst, Register src);
692  void cmp(Register reg, int32_t imm32);
693  void cmp(Register reg, Handle<HeapObject> handle);
694  void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); }
695  void cmp(Register reg, Operand op);
696  void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); }
697  void cmp(Operand op, Register reg);
698  void cmp(Operand op, const Immediate& imm);
699  void cmp(Operand op, Handle<HeapObject> handle);
700 
701  void dec_b(Register dst);
702  void dec_b(Operand dst);
703 
704  void dec(Register dst);
705  void dec(Operand dst);
706 
707  void cdq();
708 
709  void idiv(Register src) { idiv(Operand(src)); }
710  void idiv(Operand src);
711  void div(Register src) { div(Operand(src)); }
712  void div(Operand src);
713 
714  // Signed multiply instructions.
715  void imul(Register src); // edx:eax = eax * src.
716  void imul(Register dst, Register src) { imul(dst, Operand(src)); }
717  void imul(Register dst, Operand src); // dst = dst * src.
718  void imul(Register dst, Register src, int32_t imm32); // dst = src * imm32.
719  void imul(Register dst, Operand src, int32_t imm32);
720 
721  void inc(Register dst);
722  void inc(Operand dst);
723 
724  void lea(Register dst, Operand src);
725 
726  // Unsigned multiply instruction.
727  void mul(Register src); // edx:eax = eax * reg.
728 
729  void neg(Register dst);
730  void neg(Operand dst);
731 
732  void not_(Register dst);
733  void not_(Operand dst);
734 
735  void or_(Register dst, int32_t imm32);
736  void or_(Register dst, Register src) { or_(dst, Operand(src)); }
737  void or_(Register dst, Operand src);
738  void or_(Operand dst, Register src);
739  void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); }
740  void or_(Operand dst, const Immediate& x);
741 
742  void rcl(Register dst, uint8_t imm8);
743  void rcr(Register dst, uint8_t imm8);
744 
745  void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
746  void ror(Operand dst, uint8_t imm8);
747  void ror_cl(Register dst) { ror_cl(Operand(dst)); }
748  void ror_cl(Operand dst);
749 
750  void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); }
751  void sar(Operand dst, uint8_t imm8);
752  void sar_cl(Register dst) { sar_cl(Operand(dst)); }
753  void sar_cl(Operand dst);
754 
755  void sbb(Register dst, Register src) { sbb(dst, Operand(src)); }
756  void sbb(Register dst, Operand src);
757 
758  void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); }
759  void shl(Operand dst, uint8_t imm8);
760  void shl_cl(Register dst) { shl_cl(Operand(dst)); }
761  void shl_cl(Operand dst);
762  void shld(Register dst, Register src, uint8_t shift);
763  void shld_cl(Register dst, Register src);
764 
765  void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); }
766  void shr(Operand dst, uint8_t imm8);
767  void shr_cl(Register dst) { shr_cl(Operand(dst)); }
768  void shr_cl(Operand dst);
769  void shrd(Register dst, Register src, uint8_t shift);
770  void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); }
771  void shrd_cl(Operand dst, Register src);
772 
773  void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); }
774  void sub(Operand dst, const Immediate& x);
775  void sub(Register dst, Register src) { sub(dst, Operand(src)); }
776  void sub(Register dst, Operand src);
777  void sub(Operand dst, Register src);
778  void sub_sp_32(uint32_t imm);
779 
780  void test(Register reg, const Immediate& imm);
781  void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); }
782  void test(Register reg, Operand op);
783  void test(Operand op, const Immediate& imm);
784  void test(Operand op, Register reg) { test(reg, op); }
785  void test_b(Register reg, Operand op);
786  void test_b(Register reg, Immediate imm8);
787  void test_b(Operand op, Immediate imm8);
788  void test_b(Operand op, Register reg) { test_b(reg, op); }
789  void test_b(Register dst, Register src) { test_b(dst, Operand(src)); }
790  void test_w(Register reg, Operand op);
791  void test_w(Register reg, Immediate imm16);
792  void test_w(Operand op, Immediate imm16);
793  void test_w(Operand op, Register reg) { test_w(reg, op); }
794  void test_w(Register dst, Register src) { test_w(dst, Operand(src)); }
795 
796  void xor_(Register dst, int32_t imm32);
797  void xor_(Register dst, Register src) { xor_(dst, Operand(src)); }
798  void xor_(Register dst, Operand src);
799  void xor_(Operand dst, Register src);
800  void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); }
801  void xor_(Operand dst, const Immediate& x);
802 
803  // Bit operations.
804  void bswap(Register dst);
805  void bt(Operand dst, Register src);
806  void bts(Register dst, Register src) { bts(Operand(dst), src); }
807  void bts(Operand dst, Register src);
808  void bsr(Register dst, Register src) { bsr(dst, Operand(src)); }
809  void bsr(Register dst, Operand src);
810  void bsf(Register dst, Register src) { bsf(dst, Operand(src)); }
811  void bsf(Register dst, Operand src);
812 
813  // Miscellaneous
814  void hlt();
815  void int3();
816  void nop();
817  void ret(int imm16);
818  void ud2();
819 
820  // Label operations & relative jumps (PPUM Appendix D)
821  //
822  // Takes a branch opcode (cc) and a label (L) and generates
823  // either a backward branch or a forward branch and links it
824  // to the label fixup chain. Usage:
825  //
826  // Label L; // unbound label
827  // j(cc, &L); // forward branch to unbound label
828  // bind(&L); // bind label to the current pc
829  // j(cc, &L); // backward branch to bound label
830  // bind(&L); // illegal: a label may be bound only once
831  //
832  // Note: The same Label can be used for forward and backward branches
833  // but it may be bound only once.
834 
835  void bind(Label* L); // binds an unbound label L to the current code position
836 
837  // Calls
838  void call(Label* L);
839  void call(Address entry, RelocInfo::Mode rmode);
840  void call(Register reg) { call(Operand(reg)); }
841  void call(Operand adr);
842  void call(Handle<Code> code, RelocInfo::Mode rmode);
843  void call(CodeStub* stub);
844  void wasm_call(Address address, RelocInfo::Mode rmode);
845 
846  // Jumps
847  // unconditional jump to L
848  void jmp(Label* L, Label::Distance distance = Label::kFar);
849  void jmp(Address entry, RelocInfo::Mode rmode);
850  void jmp(Register reg) { jmp(Operand(reg)); }
851  void jmp(Operand adr);
852  void jmp(Handle<Code> code, RelocInfo::Mode rmode);
853  // unconditionoal jump relative to the current address. Low-level rountine,
854  // use with caution!
855  void jmp_rel(int offset);
856 
857  // Conditional jumps
858  void j(Condition cc,
859  Label* L,
860  Label::Distance distance = Label::kFar);
861  void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
862  void j(Condition cc, Handle<Code> code,
863  RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
864 
865  // Floating-point operations
866  void fld(int i);
867  void fstp(int i);
868 
869  void fld1();
870  void fldz();
871  void fldpi();
872  void fldln2();
873 
874  void fld_s(Operand adr);
875  void fld_d(Operand adr);
876 
877  void fstp_s(Operand adr);
878  void fst_s(Operand adr);
879  void fstp_d(Operand adr);
880  void fst_d(Operand adr);
881 
882  void fild_s(Operand adr);
883  void fild_d(Operand adr);
884 
885  void fist_s(Operand adr);
886 
887  void fistp_s(Operand adr);
888  void fistp_d(Operand adr);
889 
890  // The fisttp instructions require SSE3.
891  void fisttp_s(Operand adr);
892  void fisttp_d(Operand adr);
893 
894  void fabs();
895  void fchs();
896  void fcos();
897  void fsin();
898  void fptan();
899  void fyl2x();
900  void f2xm1();
901  void fscale();
902  void fninit();
903 
904  void fadd(int i);
905  void fadd_i(int i);
906  void fsub(int i);
907  void fsub_i(int i);
908  void fmul(int i);
909  void fmul_i(int i);
910  void fdiv(int i);
911  void fdiv_i(int i);
912 
913  void fisub_s(Operand adr);
914 
915  void faddp(int i = 1);
916  void fsubp(int i = 1);
917  void fsubrp(int i = 1);
918  void fmulp(int i = 1);
919  void fdivp(int i = 1);
920  void fprem();
921  void fprem1();
922 
923  void fxch(int i = 1);
924  void fincstp();
925  void ffree(int i = 0);
926 
927  void ftst();
928  void fucomp(int i);
929  void fucompp();
930  void fucomi(int i);
931  void fucomip();
932  void fcompp();
933  void fnstsw_ax();
934  void fwait();
935  void fnclex();
936 
937  void frndint();
938 
939  void sahf();
940  void setcc(Condition cc, Register reg);
941 
942  void cpuid();
943 
944  // SSE instructions
945  void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
946  void addss(XMMRegister dst, Operand src);
947  void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
948  void subss(XMMRegister dst, Operand src);
949  void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
950  void mulss(XMMRegister dst, Operand src);
951  void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
952  void divss(XMMRegister dst, Operand src);
953  void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); }
954  void sqrtss(XMMRegister dst, Operand src);
955 
956  void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
957  void ucomiss(XMMRegister dst, Operand src);
958  void movaps(XMMRegister dst, XMMRegister src);
959  void movups(XMMRegister dst, XMMRegister src);
960  void movups(XMMRegister dst, Operand src);
961  void movups(Operand dst, XMMRegister src);
962  void shufps(XMMRegister dst, XMMRegister src, byte imm8);
963 
964  void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
965  void maxss(XMMRegister dst, Operand src);
966  void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); }
967  void minss(XMMRegister dst, Operand src);
968 
969  void andps(XMMRegister dst, Operand src);
970  void andps(XMMRegister dst, XMMRegister src) { andps(dst, Operand(src)); }
971  void xorps(XMMRegister dst, Operand src);
972  void xorps(XMMRegister dst, XMMRegister src) { xorps(dst, Operand(src)); }
973  void orps(XMMRegister dst, Operand src);
974  void orps(XMMRegister dst, XMMRegister src) { orps(dst, Operand(src)); }
975 
976  void addps(XMMRegister dst, Operand src);
977  void addps(XMMRegister dst, XMMRegister src) { addps(dst, Operand(src)); }
978  void subps(XMMRegister dst, Operand src);
979  void subps(XMMRegister dst, XMMRegister src) { subps(dst, Operand(src)); }
980  void mulps(XMMRegister dst, Operand src);
981  void mulps(XMMRegister dst, XMMRegister src) { mulps(dst, Operand(src)); }
982  void divps(XMMRegister dst, Operand src);
983  void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); }
984  void rcpps(XMMRegister dst, Operand src);
985  void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); }
986  void rsqrtps(XMMRegister dst, Operand src);
987  void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); }
988  void haddps(XMMRegister dst, Operand src);
989  void haddps(XMMRegister dst, XMMRegister src) { haddps(dst, Operand(src)); }
990 
991  void minps(XMMRegister dst, Operand src);
992  void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); }
993  void maxps(XMMRegister dst, Operand src);
994  void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); }
995 
996  void cmpps(XMMRegister dst, Operand src, uint8_t cmp);
997 #define SSE_CMP_P(instr, imm8) \
998  void instr##ps(XMMRegister dst, XMMRegister src) { \
999  cmpps(dst, Operand(src), imm8); \
1000  } \
1001  void instr##ps(XMMRegister dst, Operand src) { cmpps(dst, src, imm8); }
1002 
1003  SSE_CMP_P(cmpeq, 0x0);
1004  SSE_CMP_P(cmplt, 0x1);
1005  SSE_CMP_P(cmple, 0x2);
1006  SSE_CMP_P(cmpneq, 0x4);
1007 
1008 #undef SSE_CMP_P
1009 
1010  // SSE2 instructions
1011  void cvttss2si(Register dst, Operand src);
1012  void cvttss2si(Register dst, XMMRegister src) {
1013  cvttss2si(dst, Operand(src));
1014  }
1015  void cvttsd2si(Register dst, Operand src);
1016  void cvttsd2si(Register dst, XMMRegister src) {
1017  cvttsd2si(dst, Operand(src));
1018  }
1019  void cvtsd2si(Register dst, XMMRegister src);
1020 
1021  void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
1022  void cvtsi2ss(XMMRegister dst, Operand src);
1023  void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); }
1024  void cvtsi2sd(XMMRegister dst, Operand src);
1025  void cvtss2sd(XMMRegister dst, Operand src);
1026  void cvtss2sd(XMMRegister dst, XMMRegister src) {
1027  cvtss2sd(dst, Operand(src));
1028  }
1029  void cvtsd2ss(XMMRegister dst, Operand src);
1030  void cvtsd2ss(XMMRegister dst, XMMRegister src) {
1031  cvtsd2ss(dst, Operand(src));
1032  }
1033  void cvtdq2ps(XMMRegister dst, XMMRegister src) {
1034  cvtdq2ps(dst, Operand(src));
1035  }
1036  void cvtdq2ps(XMMRegister dst, Operand src);
1037  void cvttps2dq(XMMRegister dst, XMMRegister src) {
1038  cvttps2dq(dst, Operand(src));
1039  }
1040  void cvttps2dq(XMMRegister dst, Operand src);
1041 
1042  void addsd(XMMRegister dst, XMMRegister src) { addsd(dst, Operand(src)); }
1043  void addsd(XMMRegister dst, Operand src);
1044  void subsd(XMMRegister dst, XMMRegister src) { subsd(dst, Operand(src)); }
1045  void subsd(XMMRegister dst, Operand src);
1046  void mulsd(XMMRegister dst, XMMRegister src) { mulsd(dst, Operand(src)); }
1047  void mulsd(XMMRegister dst, Operand src);
1048  void divsd(XMMRegister dst, XMMRegister src) { divsd(dst, Operand(src)); }
1049  void divsd(XMMRegister dst, Operand src);
1050  void xorpd(XMMRegister dst, XMMRegister src) { xorpd(dst, Operand(src)); }
1051  void xorpd(XMMRegister dst, Operand src);
1052  void sqrtsd(XMMRegister dst, XMMRegister src) { sqrtsd(dst, Operand(src)); }
1053  void sqrtsd(XMMRegister dst, Operand src);
1054 
1055  void andpd(XMMRegister dst, XMMRegister src) { andpd(dst, Operand(src)); }
1056  void andpd(XMMRegister dst, Operand src);
1057  void orpd(XMMRegister dst, XMMRegister src) { orpd(dst, Operand(src)); }
1058  void orpd(XMMRegister dst, Operand src);
1059 
1060  void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
1061  void ucomisd(XMMRegister dst, Operand src);
1062 
1063  void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1064  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
1065 
1066  void movmskpd(Register dst, XMMRegister src);
1067  void movmskps(Register dst, XMMRegister src);
1068 
1069  void cmpltsd(XMMRegister dst, XMMRegister src);
1070 
1071  void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); }
1072  void maxsd(XMMRegister dst, Operand src);
1073  void minsd(XMMRegister dst, XMMRegister src) { minsd(dst, Operand(src)); }
1074  void minsd(XMMRegister dst, Operand src);
1075 
1076  void movdqa(XMMRegister dst, Operand src);
1077  void movdqa(Operand dst, XMMRegister src);
1078  void movdqu(XMMRegister dst, Operand src);
1079  void movdqu(Operand dst, XMMRegister src);
1080  void movdq(bool aligned, XMMRegister dst, Operand src) {
1081  if (aligned) {
1082  movdqa(dst, src);
1083  } else {
1084  movdqu(dst, src);
1085  }
1086  }
1087 
1088  void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); }
1089  void movd(XMMRegister dst, Operand src);
1090  void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
1091  void movd(Operand dst, XMMRegister src);
1092  void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); }
1093  void movsd(XMMRegister dst, Operand src);
1094  void movsd(Operand dst, XMMRegister src);
1095 
1096  void movss(XMMRegister dst, Operand src);
1097  void movss(Operand dst, XMMRegister src);
1098  void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
1099  void extractps(Register dst, XMMRegister src, byte imm8);
1100 
1101  void psllw(XMMRegister reg, uint8_t shift);
1102  void pslld(XMMRegister reg, uint8_t shift);
1103  void psrlw(XMMRegister reg, uint8_t shift);
1104  void psrld(XMMRegister reg, uint8_t shift);
1105  void psraw(XMMRegister reg, uint8_t shift);
1106  void psrad(XMMRegister reg, uint8_t shift);
1107  void psllq(XMMRegister reg, uint8_t shift);
1108  void psllq(XMMRegister dst, XMMRegister src);
1109  void psrlq(XMMRegister reg, uint8_t shift);
1110  void psrlq(XMMRegister dst, XMMRegister src);
1111 
1112  void pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1113  pshufhw(dst, Operand(src), shuffle);
1114  }
1115  void pshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1116  void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1117  pshuflw(dst, Operand(src), shuffle);
1118  }
1119  void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1120  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1121  pshufd(dst, Operand(src), shuffle);
1122  }
1123  void pshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1124 
1125  void pblendw(XMMRegister dst, XMMRegister src, uint8_t mask) {
1126  pblendw(dst, Operand(src), mask);
1127  }
1128  void pblendw(XMMRegister dst, Operand src, uint8_t mask);
1129 
1130  void palignr(XMMRegister dst, XMMRegister src, uint8_t mask) {
1131  palignr(dst, Operand(src), mask);
1132  }
1133  void palignr(XMMRegister dst, Operand src, uint8_t mask);
1134 
1135  void pextrb(Register dst, XMMRegister src, uint8_t offset) {
1136  pextrb(Operand(dst), src, offset);
1137  }
1138  void pextrb(Operand dst, XMMRegister src, uint8_t offset);
1139  // Use SSE4_1 encoding for pextrw reg, xmm, imm8 for consistency
1140  void pextrw(Register dst, XMMRegister src, uint8_t offset) {
1141  pextrw(Operand(dst), src, offset);
1142  }
1143  void pextrw(Operand dst, XMMRegister src, uint8_t offset);
1144  void pextrd(Register dst, XMMRegister src, uint8_t offset) {
1145  pextrd(Operand(dst), src, offset);
1146  }
1147  void pextrd(Operand dst, XMMRegister src, uint8_t offset);
1148 
1149  void insertps(XMMRegister dst, XMMRegister src, uint8_t offset) {
1150  insertps(dst, Operand(src), offset);
1151  }
1152  void insertps(XMMRegister dst, Operand src, uint8_t offset);
1153  void pinsrb(XMMRegister dst, Register src, uint8_t offset) {
1154  pinsrb(dst, Operand(src), offset);
1155  }
1156  void pinsrb(XMMRegister dst, Operand src, uint8_t offset);
1157  void pinsrw(XMMRegister dst, Register src, uint8_t offset) {
1158  pinsrw(dst, Operand(src), offset);
1159  }
1160  void pinsrw(XMMRegister dst, Operand src, uint8_t offset);
1161  void pinsrd(XMMRegister dst, Register src, uint8_t offset) {
1162  pinsrd(dst, Operand(src), offset);
1163  }
1164  void pinsrd(XMMRegister dst, Operand src, uint8_t offset);
1165 
1166  // AVX instructions
1167  void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1168  vfmadd132sd(dst, src1, Operand(src2));
1169  }
1170  void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1171  vfmadd213sd(dst, src1, Operand(src2));
1172  }
1173  void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1174  vfmadd231sd(dst, src1, Operand(src2));
1175  }
1176  void vfmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1177  vfmasd(0x99, dst, src1, src2);
1178  }
1179  void vfmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1180  vfmasd(0xa9, dst, src1, src2);
1181  }
1182  void vfmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1183  vfmasd(0xb9, dst, src1, src2);
1184  }
1185  void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1186  vfmsub132sd(dst, src1, Operand(src2));
1187  }
1188  void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1189  vfmsub213sd(dst, src1, Operand(src2));
1190  }
1191  void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1192  vfmsub231sd(dst, src1, Operand(src2));
1193  }
1194  void vfmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1195  vfmasd(0x9b, dst, src1, src2);
1196  }
1197  void vfmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1198  vfmasd(0xab, dst, src1, src2);
1199  }
1200  void vfmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1201  vfmasd(0xbb, dst, src1, src2);
1202  }
1203  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1204  vfnmadd132sd(dst, src1, Operand(src2));
1205  }
1206  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1207  vfnmadd213sd(dst, src1, Operand(src2));
1208  }
1209  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1210  vfnmadd231sd(dst, src1, Operand(src2));
1211  }
1212  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1213  vfmasd(0x9d, dst, src1, src2);
1214  }
1215  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1216  vfmasd(0xad, dst, src1, src2);
1217  }
1218  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1219  vfmasd(0xbd, dst, src1, src2);
1220  }
1221  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1222  vfnmsub132sd(dst, src1, Operand(src2));
1223  }
1224  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1225  vfnmsub213sd(dst, src1, Operand(src2));
1226  }
1227  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1228  vfnmsub231sd(dst, src1, Operand(src2));
1229  }
1230  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1231  vfmasd(0x9f, dst, src1, src2);
1232  }
1233  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1234  vfmasd(0xaf, dst, src1, src2);
1235  }
1236  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, Operand src2) {
1237  vfmasd(0xbf, dst, src1, src2);
1238  }
1239  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1240 
1241  void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1242  vfmadd132ss(dst, src1, Operand(src2));
1243  }
1244  void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1245  vfmadd213ss(dst, src1, Operand(src2));
1246  }
1247  void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1248  vfmadd231ss(dst, src1, Operand(src2));
1249  }
1250  void vfmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1251  vfmass(0x99, dst, src1, src2);
1252  }
1253  void vfmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1254  vfmass(0xa9, dst, src1, src2);
1255  }
1256  void vfmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1257  vfmass(0xb9, dst, src1, src2);
1258  }
1259  void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1260  vfmsub132ss(dst, src1, Operand(src2));
1261  }
1262  void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1263  vfmsub213ss(dst, src1, Operand(src2));
1264  }
1265  void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1266  vfmsub231ss(dst, src1, Operand(src2));
1267  }
1268  void vfmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1269  vfmass(0x9b, dst, src1, src2);
1270  }
1271  void vfmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1272  vfmass(0xab, dst, src1, src2);
1273  }
1274  void vfmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1275  vfmass(0xbb, dst, src1, src2);
1276  }
1277  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1278  vfnmadd132ss(dst, src1, Operand(src2));
1279  }
1280  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1281  vfnmadd213ss(dst, src1, Operand(src2));
1282  }
1283  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1284  vfnmadd231ss(dst, src1, Operand(src2));
1285  }
1286  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1287  vfmass(0x9d, dst, src1, src2);
1288  }
1289  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1290  vfmass(0xad, dst, src1, src2);
1291  }
1292  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1293  vfmass(0xbd, dst, src1, src2);
1294  }
1295  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1296  vfnmsub132ss(dst, src1, Operand(src2));
1297  }
1298  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1299  vfnmsub213ss(dst, src1, Operand(src2));
1300  }
1301  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1302  vfnmsub231ss(dst, src1, Operand(src2));
1303  }
1304  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1305  vfmass(0x9f, dst, src1, src2);
1306  }
1307  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1308  vfmass(0xaf, dst, src1, src2);
1309  }
1310  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, Operand src2) {
1311  vfmass(0xbf, dst, src1, src2);
1312  }
1313  void vfmass(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1314 
1315  void vaddsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1316  vaddsd(dst, src1, Operand(src2));
1317  }
1318  void vaddsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1319  vsd(0x58, dst, src1, src2);
1320  }
1321  void vsubsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1322  vsubsd(dst, src1, Operand(src2));
1323  }
1324  void vsubsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1325  vsd(0x5c, dst, src1, src2);
1326  }
1327  void vmulsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1328  vmulsd(dst, src1, Operand(src2));
1329  }
1330  void vmulsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1331  vsd(0x59, dst, src1, src2);
1332  }
1333  void vdivsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1334  vdivsd(dst, src1, Operand(src2));
1335  }
1336  void vdivsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1337  vsd(0x5e, dst, src1, src2);
1338  }
1339  void vmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1340  vmaxsd(dst, src1, Operand(src2));
1341  }
1342  void vmaxsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1343  vsd(0x5f, dst, src1, src2);
1344  }
1345  void vminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1346  vminsd(dst, src1, Operand(src2));
1347  }
1348  void vminsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1349  vsd(0x5d, dst, src1, src2);
1350  }
1351  void vsqrtsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1352  vsqrtsd(dst, src1, Operand(src2));
1353  }
1354  void vsqrtsd(XMMRegister dst, XMMRegister src1, Operand src2) {
1355  vsd(0x51, dst, src1, src2);
1356  }
1357  void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1358 
1359  void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1360  vaddss(dst, src1, Operand(src2));
1361  }
1362  void vaddss(XMMRegister dst, XMMRegister src1, Operand src2) {
1363  vss(0x58, dst, src1, src2);
1364  }
1365  void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1366  vsubss(dst, src1, Operand(src2));
1367  }
1368  void vsubss(XMMRegister dst, XMMRegister src1, Operand src2) {
1369  vss(0x5c, dst, src1, src2);
1370  }
1371  void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1372  vmulss(dst, src1, Operand(src2));
1373  }
1374  void vmulss(XMMRegister dst, XMMRegister src1, Operand src2) {
1375  vss(0x59, dst, src1, src2);
1376  }
1377  void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1378  vdivss(dst, src1, Operand(src2));
1379  }
1380  void vdivss(XMMRegister dst, XMMRegister src1, Operand src2) {
1381  vss(0x5e, dst, src1, src2);
1382  }
1383  void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1384  vmaxss(dst, src1, Operand(src2));
1385  }
1386  void vmaxss(XMMRegister dst, XMMRegister src1, Operand src2) {
1387  vss(0x5f, dst, src1, src2);
1388  }
1389  void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1390  vminss(dst, src1, Operand(src2));
1391  }
1392  void vminss(XMMRegister dst, XMMRegister src1, Operand src2) {
1393  vss(0x5d, dst, src1, src2);
1394  }
1395  void vsqrtss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1396  vsqrtss(dst, src1, Operand(src2));
1397  }
1398  void vsqrtss(XMMRegister dst, XMMRegister src1, Operand src2) {
1399  vss(0x51, dst, src1, src2);
1400  }
1401  void vss(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1402 
1403  void vrcpps(XMMRegister dst, XMMRegister src) { vrcpps(dst, Operand(src)); }
1404  void vrcpps(XMMRegister dst, Operand src) {
1405  vinstr(0x53, dst, xmm0, src, kNone, k0F, kWIG);
1406  }
1407  void vrsqrtps(XMMRegister dst, XMMRegister src) {
1408  vrsqrtps(dst, Operand(src));
1409  }
1410  void vrsqrtps(XMMRegister dst, Operand src) {
1411  vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG);
1412  }
1413  void vhaddps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
1414  vhaddps(dst, src1, Operand(src2));
1415  }
1416  void vhaddps(XMMRegister dst, XMMRegister src1, Operand src2) {
1417  vinstr(0x7C, dst, src1, src2, kF2, k0F, kWIG);
1418  }
1419  void vmovaps(XMMRegister dst, XMMRegister src) {
1420  vps(0x28, dst, xmm0, Operand(src));
1421  }
1422  void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
1423  vshufps(dst, src1, Operand(src2), imm8);
1424  }
1425  void vshufps(XMMRegister dst, XMMRegister src1, Operand src2, byte imm8);
1426 
1427  void vpsllw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1428  void vpslld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1429  void vpsrlw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1430  void vpsrld(XMMRegister dst, XMMRegister src, uint8_t imm8);
1431  void vpsraw(XMMRegister dst, XMMRegister src, uint8_t imm8);
1432  void vpsrad(XMMRegister dst, XMMRegister src, uint8_t imm8);
1433 
1434  void vpshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1435  vpshufhw(dst, Operand(src), shuffle);
1436  }
1437  void vpshufhw(XMMRegister dst, Operand src, uint8_t shuffle);
1438  void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1439  vpshuflw(dst, Operand(src), shuffle);
1440  }
1441  void vpshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
1442  void vpshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
1443  vpshufd(dst, Operand(src), shuffle);
1444  }
1445  void vpshufd(XMMRegister dst, Operand src, uint8_t shuffle);
1446 
1447  void vpblendw(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1448  uint8_t mask) {
1449  vpblendw(dst, src1, Operand(src2), mask);
1450  }
1451  void vpblendw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1452 
1453  void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1454  uint8_t mask) {
1455  vpalignr(dst, src1, Operand(src2), mask);
1456  }
1457  void vpalignr(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t mask);
1458 
1459  void vpextrb(Register dst, XMMRegister src, uint8_t offset) {
1460  vpextrb(Operand(dst), src, offset);
1461  }
1462  void vpextrb(Operand dst, XMMRegister src, uint8_t offset);
1463  void vpextrw(Register dst, XMMRegister src, uint8_t offset) {
1464  vpextrw(Operand(dst), src, offset);
1465  }
1466  void vpextrw(Operand dst, XMMRegister src, uint8_t offset);
1467  void vpextrd(Register dst, XMMRegister src, uint8_t offset) {
1468  vpextrd(Operand(dst), src, offset);
1469  }
1470  void vpextrd(Operand dst, XMMRegister src, uint8_t offset);
1471 
1472  void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
1473  uint8_t offset) {
1474  vinsertps(dst, src1, Operand(src2), offset);
1475  }
1476  void vinsertps(XMMRegister dst, XMMRegister src1, Operand src2,
1477  uint8_t offset);
1478  void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2,
1479  uint8_t offset) {
1480  vpinsrb(dst, src1, Operand(src2), offset);
1481  }
1482  void vpinsrb(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1483  void vpinsrw(XMMRegister dst, XMMRegister src1, Register src2,
1484  uint8_t offset) {
1485  vpinsrw(dst, src1, Operand(src2), offset);
1486  }
1487  void vpinsrw(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1488  void vpinsrd(XMMRegister dst, XMMRegister src1, Register src2,
1489  uint8_t offset) {
1490  vpinsrd(dst, src1, Operand(src2), offset);
1491  }
1492  void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
1493 
1494  void vcvtdq2ps(XMMRegister dst, XMMRegister src) {
1495  vcvtdq2ps(dst, Operand(src));
1496  }
1497  void vcvtdq2ps(XMMRegister dst, Operand src) {
1498  vinstr(0x5B, dst, xmm0, src, kNone, k0F, kWIG);
1499  }
1500  void vcvttps2dq(XMMRegister dst, XMMRegister src) {
1501  vcvttps2dq(dst, Operand(src));
1502  }
1503  void vcvttps2dq(XMMRegister dst, Operand src) {
1504  vinstr(0x5B, dst, xmm0, src, kF3, k0F, kWIG);
1505  }
1506 
1507  void vmovdqu(XMMRegister dst, Operand src) {
1508  vinstr(0x6F, dst, xmm0, src, kF3, k0F, kWIG);
1509  }
1510  void vmovdqu(Operand dst, XMMRegister src) {
1511  vinstr(0x7F, src, xmm0, dst, kF3, k0F, kWIG);
1512  }
1513  void vmovd(XMMRegister dst, Register src) { vmovd(dst, Operand(src)); }
1514  void vmovd(XMMRegister dst, Operand src) {
1515  vinstr(0x6E, dst, xmm0, src, k66, k0F, kWIG);
1516  }
1517  void vmovd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
1518  void vmovd(Operand dst, XMMRegister src) {
1519  vinstr(0x7E, src, xmm0, dst, k66, k0F, kWIG);
1520  }
1521 
1522  // BMI instruction
1523  void andn(Register dst, Register src1, Register src2) {
1524  andn(dst, src1, Operand(src2));
1525  }
1526  void andn(Register dst, Register src1, Operand src2) {
1527  bmi1(0xf2, dst, src1, src2);
1528  }
1529  void bextr(Register dst, Register src1, Register src2) {
1530  bextr(dst, Operand(src1), src2);
1531  }
1532  void bextr(Register dst, Operand src1, Register src2) {
1533  bmi1(0xf7, dst, src2, src1);
1534  }
1535  void blsi(Register dst, Register src) { blsi(dst, Operand(src)); }
1536  void blsi(Register dst, Operand src) { bmi1(0xf3, ebx, dst, src); }
1537  void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); }
1538  void blsmsk(Register dst, Operand src) { bmi1(0xf3, edx, dst, src); }
1539  void blsr(Register dst, Register src) { blsr(dst, Operand(src)); }
1540  void blsr(Register dst, Operand src) { bmi1(0xf3, ecx, dst, src); }
1541  void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); }
1542  void tzcnt(Register dst, Operand src);
1543 
1544  void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); }
1545  void lzcnt(Register dst, Operand src);
1546 
1547  void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); }
1548  void popcnt(Register dst, Operand src);
1549 
1550  void bzhi(Register dst, Register src1, Register src2) {
1551  bzhi(dst, Operand(src1), src2);
1552  }
1553  void bzhi(Register dst, Operand src1, Register src2) {
1554  bmi2(kNone, 0xf5, dst, src2, src1);
1555  }
1556  void mulx(Register dst1, Register dst2, Register src) {
1557  mulx(dst1, dst2, Operand(src));
1558  }
1559  void mulx(Register dst1, Register dst2, Operand src) {
1560  bmi2(kF2, 0xf6, dst1, dst2, src);
1561  }
1562  void pdep(Register dst, Register src1, Register src2) {
1563  pdep(dst, src1, Operand(src2));
1564  }
1565  void pdep(Register dst, Register src1, Operand src2) {
1566  bmi2(kF2, 0xf5, dst, src1, src2);
1567  }
1568  void pext(Register dst, Register src1, Register src2) {
1569  pext(dst, src1, Operand(src2));
1570  }
1571  void pext(Register dst, Register src1, Operand src2) {
1572  bmi2(kF3, 0xf5, dst, src1, src2);
1573  }
1574  void sarx(Register dst, Register src1, Register src2) {
1575  sarx(dst, Operand(src1), src2);
1576  }
1577  void sarx(Register dst, Operand src1, Register src2) {
1578  bmi2(kF3, 0xf7, dst, src2, src1);
1579  }
1580  void shlx(Register dst, Register src1, Register src2) {
1581  shlx(dst, Operand(src1), src2);
1582  }
1583  void shlx(Register dst, Operand src1, Register src2) {
1584  bmi2(k66, 0xf7, dst, src2, src1);
1585  }
1586  void shrx(Register dst, Register src1, Register src2) {
1587  shrx(dst, Operand(src1), src2);
1588  }
1589  void shrx(Register dst, Operand src1, Register src2) {
1590  bmi2(kF2, 0xf7, dst, src2, src1);
1591  }
1592  void rorx(Register dst, Register src, byte imm8) {
1593  rorx(dst, Operand(src), imm8);
1594  }
1595  void rorx(Register dst, Operand src, byte imm8);
1596 
1597 #define PACKED_OP_LIST(V) \
1598  V(and, 0x54) \
1599  V(xor, 0x57) \
1600  V(add, 0x58) \
1601  V(mul, 0x59) \
1602  V(sub, 0x5c) \
1603  V(min, 0x5d) \
1604  V(div, 0x5e) \
1605  V(max, 0x5f)
1606 
1607 #define AVX_PACKED_OP_DECLARE(name, opcode) \
1608  void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1609  vps(opcode, dst, src1, Operand(src2)); \
1610  } \
1611  void v##name##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
1612  vps(opcode, dst, src1, src2); \
1613  } \
1614  void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1615  vpd(opcode, dst, src1, Operand(src2)); \
1616  } \
1617  void v##name##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
1618  vpd(opcode, dst, src1, src2); \
1619  }
1620 
1621  PACKED_OP_LIST(AVX_PACKED_OP_DECLARE);
1622  void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1623  void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
1624 
1625  void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t cmp);
1626 #define AVX_CMP_P(instr, imm8) \
1627  void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1628  vcmpps(dst, src1, Operand(src2), imm8); \
1629  } \
1630  void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
1631  vcmpps(dst, src1, src2, imm8); \
1632  }
1633 
1634  AVX_CMP_P(vcmpeq, 0x0);
1635  AVX_CMP_P(vcmplt, 0x1);
1636  AVX_CMP_P(vcmple, 0x2);
1637  AVX_CMP_P(vcmpneq, 0x4);
1638 
1639 #undef AVX_CMP_P
1640 
1641 // Other SSE and AVX instructions
1642 #define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
1643  void instruction(XMMRegister dst, XMMRegister src) { \
1644  instruction(dst, Operand(src)); \
1645  } \
1646  void instruction(XMMRegister dst, Operand src) { \
1647  sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
1648  }
1649 
1650  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
1651 #undef DECLARE_SSE2_INSTRUCTION
1652 
1653 #define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
1654  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1655  v##instruction(dst, src1, Operand(src2)); \
1656  } \
1657  void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1658  vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
1659  }
1660 
1661  SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
1662 #undef DECLARE_SSE2_AVX_INSTRUCTION
1663 
1664 #define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
1665  opcode) \
1666  void instruction(XMMRegister dst, XMMRegister src) { \
1667  instruction(dst, Operand(src)); \
1668  } \
1669  void instruction(XMMRegister dst, Operand src) { \
1670  ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1671  }
1672 
1673  SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
1674 #undef DECLARE_SSSE3_INSTRUCTION
1675 
1676 #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
1677  opcode) \
1678  void instruction(XMMRegister dst, XMMRegister src) { \
1679  instruction(dst, Operand(src)); \
1680  } \
1681  void instruction(XMMRegister dst, Operand src) { \
1682  sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
1683  }
1684 
1685  SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1686  SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
1687 #undef DECLARE_SSE4_INSTRUCTION
1688 
1689 #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
1690  opcode) \
1691  void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
1692  v##instruction(dst, src1, Operand(src2)); \
1693  } \
1694  void v##instruction(XMMRegister dst, XMMRegister src1, Operand src2) { \
1695  vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
1696  }
1697 
1698  SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1699  SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
1700 #undef DECLARE_SSE34_AVX_INSTRUCTION
1701 
1702 #define DECLARE_SSE4_AVX_RM_INSTRUCTION(instruction, prefix, escape1, escape2, \
1703  opcode) \
1704  void v##instruction(XMMRegister dst, XMMRegister src) { \
1705  v##instruction(dst, Operand(src)); \
1706  } \
1707  void v##instruction(XMMRegister dst, Operand src) { \
1708  vinstr(0x##opcode, dst, xmm0, src, k##prefix, k##escape1##escape2, kW0); \
1709  }
1710 
1711  SSE4_RM_INSTRUCTION_LIST(DECLARE_SSE4_AVX_RM_INSTRUCTION)
1712 #undef DECLARE_SSE4_AVX_RM_INSTRUCTION
1713 
1714  // Prefetch src position into cache level.
1715  // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
1716  // non-temporal
1717  void prefetch(Operand src, int level);
1718  // TODO(lrn): Need SFENCE for movnt?
1719 
1720  // Check the code size generated from label to here.
1721  int SizeOfCodeGeneratedSince(Label* label) {
1722  return pc_offset() - label->pos();
1723  }
1724 
1725  // Use --code-comments to enable.
1726  void RecordComment(const char* msg);
1727 
1728  // Record a deoptimization reason that can be used by a log or cpu profiler.
1729  // Use --trace-deopt to enable.
1730  void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
1731  int id);
1732 
1733  // Writes a single byte or word of data in the code stream. Used for
1734  // inline tables, e.g., jump-tables.
1735  void db(uint8_t data);
1736  void dd(uint32_t data);
1737  void dq(uint64_t data);
1738  void dp(uintptr_t data) { dd(data); }
1739  void dd(Label* label);
1740 
1741  // Check if there is less than kGap bytes available in the buffer.
1742  // If this is the case, we need to grow the buffer before emitting
1743  // an instruction or relocation information.
1744  inline bool buffer_overflow() const {
1745  return pc_ >= reloc_info_writer.pos() - kGap;
1746  }
1747 
1748  // Get the number of bytes available in the buffer.
1749  inline int available_space() const { return reloc_info_writer.pos() - pc_; }
1750 
1751  static bool IsNop(Address addr);
1752 
1753  int relocation_writer_size() {
1754  return (buffer_ + buffer_size_) - reloc_info_writer.pos();
1755  }
1756 
1757  // Avoid overflows for displacements etc.
1758  static constexpr int kMaximalBufferSize = 512 * MB;
1759 
1760  byte byte_at(int pos) { return buffer_[pos]; }
1761  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }
1762 
1763  protected:
1764  void emit_sse_operand(XMMRegister reg, Operand adr);
1765  void emit_sse_operand(XMMRegister dst, XMMRegister src);
1766  void emit_sse_operand(Register dst, XMMRegister src);
1767  void emit_sse_operand(XMMRegister dst, Register src);
1768 
1769  byte* addr_at(int pos) { return buffer_ + pos; }
1770 
1771  private:
1772  uint32_t long_at(int pos) {
1773  return *reinterpret_cast<uint32_t*>(addr_at(pos));
1774  }
1775  void long_at_put(int pos, uint32_t x) {
1776  *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
1777  }
1778 
1779  // code emission
1780  void GrowBuffer();
1781  inline void emit(uint32_t x);
1782  inline void emit(Handle<HeapObject> handle);
1783  inline void emit(uint32_t x, RelocInfo::Mode rmode);
1784  inline void emit(Handle<Code> code, RelocInfo::Mode rmode);
1785  inline void emit(const Immediate& x);
1786  inline void emit_b(Immediate x);
1787  inline void emit_w(const Immediate& x);
1788  inline void emit_q(uint64_t x);
1789 
1790  // Emit the code-object-relative offset of the label's position
1791  inline void emit_code_relative_offset(Label* label);
1792 
1793  // instruction generation
1794  void emit_arith_b(int op1, int op2, Register dst, int imm8);
1795 
1796  // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81)
1797  // with a given destination expression and an immediate operand. It attempts
1798  // to use the shortest encoding possible.
1799  // sel specifies the /n in the modrm byte (see the Intel PRM).
1800  void emit_arith(int sel, Operand dst, const Immediate& x);
1801 
1802  void emit_operand(int code, Operand adr);
1803  void emit_operand(Register reg, Operand adr);
1804  void emit_operand(XMMRegister reg, Operand adr);
1805 
1806  void emit_label(Label* label);
1807 
1808  void emit_farith(int b1, int b2, int i);
1809 
1810  // Emit vex prefix
1811  enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
1812  enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
1813  enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
1814  enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
1815  inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp,
1816  LeadingOpcode m, VexW w);
1817  inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp,
1818  LeadingOpcode m, VexW w);
1819 
1820  // labels
1821  void print(const Label* L);
1822  void bind_to(Label* L, int pos);
1823 
1824  // displacements
1825  inline Displacement disp_at(Label* L);
1826  inline void disp_at_put(Label* L, Displacement disp);
1827  inline void emit_disp(Label* L, Displacement::Type type);
1828  inline void emit_near_disp(Label* L);
1829 
1830  void sse2_instr(XMMRegister dst, Operand src, byte prefix, byte escape,
1831  byte opcode);
1832  void ssse3_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1833  byte escape2, byte opcode);
1834  void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
1835  byte escape2, byte opcode);
1836  void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
1837  SIMDPrefix pp, LeadingOpcode m, VexW w);
1838  // Most BMI instructions are similar.
1839  void bmi1(byte op, Register reg, Register vreg, Operand rm);
1840  void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg, Operand rm);
1841 
1842  // record reloc info for current pc_
1843  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1844 
1845  // record the position of jmp/jcc instruction
1846  void record_farjmp_position(Label* L, int pos);
1847 
1848  bool is_optimizable_farjmp(int idx);
1849 
1850  void AllocateAndInstallRequestedHeapObjects(Isolate* isolate);
1851 
1852  friend class EnsureSpace;
1853 
1854  // Internal reference positions, required for (potential) patching in
1855  // GrowBuffer(); contains only those internal references whose labels
1856  // are already bound.
1857  std::deque<int> internal_reference_positions_;
1858 
1859  // code generation
1860  RelocInfoWriter reloc_info_writer;
1861 
1862  // Variables for this instance of assembler
1863  int farjmp_num_ = 0;
1864  std::deque<int> farjmp_positions_;
1865  std::map<Label*, std::vector<int>> label_farjmp_maps_;
1866 };
1867 
1868 
1869 // Helper class that ensures that there is enough space for generating
1870 // instructions and relocation information. The constructor makes
1871 // sure that there is enough space and (in debug mode) the destructor
1872 // checks that we did not generate too much.
1873 class EnsureSpace {
1874  public:
1875  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1876  if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1877 #ifdef DEBUG
1878  space_before_ = assembler_->available_space();
1879 #endif
1880  }
1881 
1882 #ifdef DEBUG
1883  ~EnsureSpace() {
1884  int bytes_generated = space_before_ - assembler_->available_space();
1885  DCHECK(bytes_generated < assembler_->kGap);
1886  }
1887 #endif
1888 
1889  private:
1890  Assembler* assembler_;
1891 #ifdef DEBUG
1892  int space_before_;
1893 #endif
1894 };
1895 
1896 // Define {RegisterName} methods for the register types.
1897 DEFINE_REGISTER_NAMES(Register, GENERAL_REGISTERS)
1898 DEFINE_REGISTER_NAMES(XMMRegister, DOUBLE_REGISTERS)
1899 
1900 } // namespace internal
1901 } // namespace v8
1902 
1903 #endif // V8_IA32_ASSEMBLER_IA32_H_
Definition: libplatform.h:13