V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
simulator-logic-arm64.cc
1 // Copyright 2016 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_ARM64
6 
7 #include <cmath>
8 #include "src/arm64/simulator-arm64.h"
9 
10 namespace v8 {
11 namespace internal {
12 
13 #if defined(USE_SIMULATOR)
14 
15 namespace {
16 
17 // See FPRound for a description of this function.
18 inline double FPRoundToDouble(int64_t sign, int64_t exponent, uint64_t mantissa,
19  FPRounding round_mode) {
20  uint64_t bits = FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(
21  sign, exponent, mantissa, round_mode);
22  return bit_cast<double>(bits);
23 }
24 
25 // See FPRound for a description of this function.
26 inline float FPRoundToFloat(int64_t sign, int64_t exponent, uint64_t mantissa,
27  FPRounding round_mode) {
28  uint32_t bits = FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(
29  sign, exponent, mantissa, round_mode);
30  return bit_cast<float>(bits);
31 }
32 
33 // See FPRound for a description of this function.
34 inline float16 FPRoundToFloat16(int64_t sign, int64_t exponent,
35  uint64_t mantissa, FPRounding round_mode) {
36  return FPRound<float16, kFloat16ExponentBits, kFloat16MantissaBits>(
37  sign, exponent, mantissa, round_mode);
38 }
39 
40 } // namespace
41 
42 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
43  if (src >= 0) {
44  return UFixedToDouble(src, fbits, round);
45  } else if (src == INT64_MIN) {
46  return -UFixedToDouble(src, fbits, round);
47  } else {
48  return -UFixedToDouble(-src, fbits, round);
49  }
50 }
51 
52 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
53  // An input of 0 is a special case because the result is effectively
54  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
55  if (src == 0) {
56  return 0.0;
57  }
58 
59  // Calculate the exponent. The highest significant bit will have the value
60  // 2^exponent.
61  const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
62  const int64_t exponent = highest_significant_bit - fbits;
63 
64  return FPRoundToDouble(0, exponent, src, round);
65 }
66 
67 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
68  if (src >= 0) {
69  return UFixedToFloat(src, fbits, round);
70  } else if (src == INT64_MIN) {
71  return -UFixedToFloat(src, fbits, round);
72  } else {
73  return -UFixedToFloat(-src, fbits, round);
74  }
75 }
76 
77 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
78  // An input of 0 is a special case because the result is effectively
79  // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
80  if (src == 0) {
81  return 0.0f;
82  }
83 
84  // Calculate the exponent. The highest significant bit will have the value
85  // 2^exponent.
86  const int highest_significant_bit = 63 - CountLeadingZeros(src, 64);
87  const int32_t exponent = highest_significant_bit - fbits;
88 
89  return FPRoundToFloat(0, exponent, src, round);
90 }
91 
92 double Simulator::FPToDouble(float value) {
93  switch (std::fpclassify(value)) {
94  case FP_NAN: {
95  if (IsSignallingNaN(value)) {
96  FPProcessException();
97  }
98  if (DN()) return kFP64DefaultNaN;
99 
100  // Convert NaNs as the processor would:
101  // - The sign is propagated.
102  // - The mantissa is transferred entirely, except that the top bit is
103  // forced to '1', making the result a quiet NaN. The unused (low-order)
104  // mantissa bits are set to 0.
105  uint32_t raw = bit_cast<uint32_t>(value);
106 
107  uint64_t sign = raw >> 31;
108  uint64_t exponent = (1 << kDoubleExponentBits) - 1;
109  uint64_t mantissa = unsigned_bitextract_64(21, 0, raw);
110 
111  // Unused low-order bits remain zero.
112  mantissa <<= (kDoubleMantissaBits - kFloatMantissaBits);
113 
114  // Force a quiet NaN.
115  mantissa |= (UINT64_C(1) << (kDoubleMantissaBits - 1));
116 
117  return double_pack(sign, exponent, mantissa);
118  }
119 
120  case FP_ZERO:
121  case FP_NORMAL:
122  case FP_SUBNORMAL:
123  case FP_INFINITE: {
124  // All other inputs are preserved in a standard cast, because every value
125  // representable using an IEEE-754 float is also representable using an
126  // IEEE-754 double.
127  return static_cast<double>(value);
128  }
129  }
130 
131  UNREACHABLE();
132 }
133 
134 float Simulator::FPToFloat(float16 value) {
135  uint32_t sign = value >> 15;
136  uint32_t exponent =
137  unsigned_bitextract_32(kFloat16MantissaBits + kFloat16ExponentBits - 1,
138  kFloat16MantissaBits, value);
139  uint32_t mantissa =
140  unsigned_bitextract_32(kFloat16MantissaBits - 1, 0, value);
141 
142  switch (float16classify(value)) {
143  case FP_ZERO:
144  return (sign == 0) ? 0.0f : -0.0f;
145 
146  case FP_INFINITE:
147  return (sign == 0) ? kFP32PositiveInfinity : kFP32NegativeInfinity;
148 
149  case FP_SUBNORMAL: {
150  // Calculate shift required to put mantissa into the most-significant bits
151  // of the destination mantissa.
152  int shift = CountLeadingZeros(mantissa << (32 - 10), 32);
153 
154  // Shift mantissa and discard implicit '1'.
155  mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits) + shift + 1;
156  mantissa &= (1 << kFloatMantissaBits) - 1;
157 
158  // Adjust the exponent for the shift applied, and rebias.
159  exponent = exponent - shift + (kFloatExponentBias - kFloat16ExponentBias);
160  break;
161  }
162 
163  case FP_NAN: {
164  if (IsSignallingNaN(value)) {
165  FPProcessException();
166  }
167  if (DN()) return kFP32DefaultNaN;
168 
169  // Convert NaNs as the processor would:
170  // - The sign is propagated.
171  // - The mantissa is transferred entirely, except that the top bit is
172  // forced to '1', making the result a quiet NaN. The unused (low-order)
173  // mantissa bits are set to 0.
174  exponent = (1 << kFloatExponentBits) - 1;
175 
176  // Increase bits in mantissa, making low-order bits 0.
177  mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
178  mantissa |= 1 << (kFloatMantissaBits - 1); // Force a quiet NaN.
179  break;
180  }
181 
182  case FP_NORMAL: {
183  // Increase bits in mantissa, making low-order bits 0.
184  mantissa <<= (kFloatMantissaBits - kFloat16MantissaBits);
185 
186  // Change exponent bias.
187  exponent += (kFloatExponentBias - kFloat16ExponentBias);
188  break;
189  }
190 
191  default:
192  UNREACHABLE();
193  }
194  return float_pack(sign, exponent, mantissa);
195 }
196 
197 float16 Simulator::FPToFloat16(float value, FPRounding round_mode) {
198  // Only the FPTieEven rounding mode is implemented.
199  DCHECK_EQ(round_mode, FPTieEven);
200  USE(round_mode);
201 
202  int64_t sign = float_sign(value);
203  int64_t exponent =
204  static_cast<int64_t>(float_exp(value)) - kFloatExponentBias;
205  uint32_t mantissa = float_mantissa(value);
206 
207  switch (std::fpclassify(value)) {
208  case FP_NAN: {
209  if (IsSignallingNaN(value)) {
210  FPProcessException();
211  }
212  if (DN()) return kFP16DefaultNaN;
213 
214  // Convert NaNs as the processor would:
215  // - The sign is propagated.
216  // - The mantissa is transferred as much as possible, except that the top
217  // bit is forced to '1', making the result a quiet NaN.
218  float16 result =
219  (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
220  result |= mantissa >> (kFloatMantissaBits - kFloat16MantissaBits);
221  result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
222  return result;
223  }
224 
225  case FP_ZERO:
226  return (sign == 0) ? 0 : 0x8000;
227 
228  case FP_INFINITE:
229  return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
230 
231  case FP_NORMAL:
232  case FP_SUBNORMAL: {
233  // Convert float-to-half as the processor would, assuming that FPCR.FZ
234  // (flush-to-zero) is not set.
235 
236  // Add the implicit '1' bit to the mantissa.
237  mantissa += (1 << kFloatMantissaBits);
238  return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
239  }
240  }
241 
242  UNREACHABLE();
243 }
244 
245 float16 Simulator::FPToFloat16(double value, FPRounding round_mode) {
246  // Only the FPTieEven rounding mode is implemented.
247  DCHECK_EQ(round_mode, FPTieEven);
248  USE(round_mode);
249 
250  int64_t sign = double_sign(value);
251  int64_t exponent =
252  static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
253  uint64_t mantissa = double_mantissa(value);
254 
255  switch (std::fpclassify(value)) {
256  case FP_NAN: {
257  if (IsSignallingNaN(value)) {
258  FPProcessException();
259  }
260  if (DN()) return kFP16DefaultNaN;
261 
262  // Convert NaNs as the processor would:
263  // - The sign is propagated.
264  // - The mantissa is transferred as much as possible, except that the top
265  // bit is forced to '1', making the result a quiet NaN.
266  float16 result =
267  (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
268  result |= mantissa >> (kDoubleMantissaBits - kFloat16MantissaBits);
269  result |= (1 << (kFloat16MantissaBits - 1)); // Force a quiet NaN;
270  return result;
271  }
272 
273  case FP_ZERO:
274  return (sign == 0) ? 0 : 0x8000;
275 
276  case FP_INFINITE:
277  return (sign == 0) ? kFP16PositiveInfinity : kFP16NegativeInfinity;
278 
279  case FP_NORMAL:
280  case FP_SUBNORMAL: {
281  // Convert double-to-half as the processor would, assuming that FPCR.FZ
282  // (flush-to-zero) is not set.
283 
284  // Add the implicit '1' bit to the mantissa.
285  mantissa += (UINT64_C(1) << kDoubleMantissaBits);
286  return FPRoundToFloat16(sign, exponent, mantissa, round_mode);
287  }
288  }
289 
290  UNREACHABLE();
291 }
292 
293 float Simulator::FPToFloat(double value, FPRounding round_mode) {
294  // Only the FPTieEven rounding mode is implemented.
295  DCHECK((round_mode == FPTieEven) || (round_mode == FPRoundOdd));
296  USE(round_mode);
297 
298  switch (std::fpclassify(value)) {
299  case FP_NAN: {
300  if (IsSignallingNaN(value)) {
301  FPProcessException();
302  }
303  if (DN()) return kFP32DefaultNaN;
304 
305  // Convert NaNs as the processor would:
306  // - The sign is propagated.
307  // - The mantissa is transferred as much as possible, except that the
308  // top bit is forced to '1', making the result a quiet NaN.
309 
310  uint64_t raw = bit_cast<uint64_t>(value);
311 
312  uint32_t sign = raw >> 63;
313  uint32_t exponent = (1 << 8) - 1;
314  uint32_t mantissa = static_cast<uint32_t>(unsigned_bitextract_64(
315  50, kDoubleMantissaBits - kFloatMantissaBits, raw));
316  mantissa |= (1 << (kFloatMantissaBits - 1)); // Force a quiet NaN.
317 
318  return float_pack(sign, exponent, mantissa);
319  }
320 
321  case FP_ZERO:
322  case FP_INFINITE: {
323  // In a C++ cast, any value representable in the target type will be
324  // unchanged. This is always the case for +/-0.0 and infinities.
325  return static_cast<float>(value);
326  }
327 
328  case FP_NORMAL:
329  case FP_SUBNORMAL: {
330  // Convert double-to-float as the processor would, assuming that FPCR.FZ
331  // (flush-to-zero) is not set.
332  uint32_t sign = double_sign(value);
333  int64_t exponent =
334  static_cast<int64_t>(double_exp(value)) - kDoubleExponentBias;
335  uint64_t mantissa = double_mantissa(value);
336  if (std::fpclassify(value) == FP_NORMAL) {
337  // For normal FP values, add the hidden bit.
338  mantissa |= (UINT64_C(1) << kDoubleMantissaBits);
339  }
340  return FPRoundToFloat(sign, exponent, mantissa, round_mode);
341  }
342  }
343 
344  UNREACHABLE();
345 }
346 
347 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
348  dst.ClearForWrite(vform);
349  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
350  dst.ReadUintFromMem(vform, i, addr);
351  addr += LaneSizeInBytesFromFormat(vform);
352  }
353 }
354 
355 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, int index,
356  uint64_t addr) {
357  dst.ReadUintFromMem(vform, index, addr);
358 }
359 
360 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
361  dst.ClearForWrite(vform);
362  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
363  dst.ReadUintFromMem(vform, i, addr);
364  }
365 }
366 
367 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
368  LogicVRegister dst2, uint64_t addr1) {
369  dst1.ClearForWrite(vform);
370  dst2.ClearForWrite(vform);
371  int esize = LaneSizeInBytesFromFormat(vform);
372  uint64_t addr2 = addr1 + esize;
373  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
374  dst1.ReadUintFromMem(vform, i, addr1);
375  dst2.ReadUintFromMem(vform, i, addr2);
376  addr1 += 2 * esize;
377  addr2 += 2 * esize;
378  }
379 }
380 
381 void Simulator::ld2(VectorFormat vform, LogicVRegister dst1,
382  LogicVRegister dst2, int index, uint64_t addr1) {
383  dst1.ClearForWrite(vform);
384  dst2.ClearForWrite(vform);
385  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
386  dst1.ReadUintFromMem(vform, index, addr1);
387  dst2.ReadUintFromMem(vform, index, addr2);
388 }
389 
390 void Simulator::ld2r(VectorFormat vform, LogicVRegister dst1,
391  LogicVRegister dst2, uint64_t addr) {
392  dst1.ClearForWrite(vform);
393  dst2.ClearForWrite(vform);
394  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
395  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
396  dst1.ReadUintFromMem(vform, i, addr);
397  dst2.ReadUintFromMem(vform, i, addr2);
398  }
399 }
400 
401 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
402  LogicVRegister dst2, LogicVRegister dst3, uint64_t addr1) {
403  dst1.ClearForWrite(vform);
404  dst2.ClearForWrite(vform);
405  dst3.ClearForWrite(vform);
406  int esize = LaneSizeInBytesFromFormat(vform);
407  uint64_t addr2 = addr1 + esize;
408  uint64_t addr3 = addr2 + esize;
409  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
410  dst1.ReadUintFromMem(vform, i, addr1);
411  dst2.ReadUintFromMem(vform, i, addr2);
412  dst3.ReadUintFromMem(vform, i, addr3);
413  addr1 += 3 * esize;
414  addr2 += 3 * esize;
415  addr3 += 3 * esize;
416  }
417 }
418 
419 void Simulator::ld3(VectorFormat vform, LogicVRegister dst1,
420  LogicVRegister dst2, LogicVRegister dst3, int index,
421  uint64_t addr1) {
422  dst1.ClearForWrite(vform);
423  dst2.ClearForWrite(vform);
424  dst3.ClearForWrite(vform);
425  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
426  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
427  dst1.ReadUintFromMem(vform, index, addr1);
428  dst2.ReadUintFromMem(vform, index, addr2);
429  dst3.ReadUintFromMem(vform, index, addr3);
430 }
431 
432 void Simulator::ld3r(VectorFormat vform, LogicVRegister dst1,
433  LogicVRegister dst2, LogicVRegister dst3, uint64_t addr) {
434  dst1.ClearForWrite(vform);
435  dst2.ClearForWrite(vform);
436  dst3.ClearForWrite(vform);
437  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
438  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
439  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
440  dst1.ReadUintFromMem(vform, i, addr);
441  dst2.ReadUintFromMem(vform, i, addr2);
442  dst3.ReadUintFromMem(vform, i, addr3);
443  }
444 }
445 
446 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
447  LogicVRegister dst2, LogicVRegister dst3,
448  LogicVRegister dst4, uint64_t addr1) {
449  dst1.ClearForWrite(vform);
450  dst2.ClearForWrite(vform);
451  dst3.ClearForWrite(vform);
452  dst4.ClearForWrite(vform);
453  int esize = LaneSizeInBytesFromFormat(vform);
454  uint64_t addr2 = addr1 + esize;
455  uint64_t addr3 = addr2 + esize;
456  uint64_t addr4 = addr3 + esize;
457  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
458  dst1.ReadUintFromMem(vform, i, addr1);
459  dst2.ReadUintFromMem(vform, i, addr2);
460  dst3.ReadUintFromMem(vform, i, addr3);
461  dst4.ReadUintFromMem(vform, i, addr4);
462  addr1 += 4 * esize;
463  addr2 += 4 * esize;
464  addr3 += 4 * esize;
465  addr4 += 4 * esize;
466  }
467 }
468 
469 void Simulator::ld4(VectorFormat vform, LogicVRegister dst1,
470  LogicVRegister dst2, LogicVRegister dst3,
471  LogicVRegister dst4, int index, uint64_t addr1) {
472  dst1.ClearForWrite(vform);
473  dst2.ClearForWrite(vform);
474  dst3.ClearForWrite(vform);
475  dst4.ClearForWrite(vform);
476  uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
477  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
478  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
479  dst1.ReadUintFromMem(vform, index, addr1);
480  dst2.ReadUintFromMem(vform, index, addr2);
481  dst3.ReadUintFromMem(vform, index, addr3);
482  dst4.ReadUintFromMem(vform, index, addr4);
483 }
484 
485 void Simulator::ld4r(VectorFormat vform, LogicVRegister dst1,
486  LogicVRegister dst2, LogicVRegister dst3,
487  LogicVRegister dst4, uint64_t addr) {
488  dst1.ClearForWrite(vform);
489  dst2.ClearForWrite(vform);
490  dst3.ClearForWrite(vform);
491  dst4.ClearForWrite(vform);
492  uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
493  uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
494  uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
495  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
496  dst1.ReadUintFromMem(vform, i, addr);
497  dst2.ReadUintFromMem(vform, i, addr2);
498  dst3.ReadUintFromMem(vform, i, addr3);
499  dst4.ReadUintFromMem(vform, i, addr4);
500  }
501 }
502 
503 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
504  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
505  src.WriteUintToMem(vform, i, addr);
506  addr += LaneSizeInBytesFromFormat(vform);
507  }
508 }
509 
510 void Simulator::st1(VectorFormat vform, LogicVRegister src, int index,
511  uint64_t addr) {
512  src.WriteUintToMem(vform, index, addr);
513 }
514 
515 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
516  uint64_t addr) {
517  int esize = LaneSizeInBytesFromFormat(vform);
518  uint64_t addr2 = addr + esize;
519  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
520  dst.WriteUintToMem(vform, i, addr);
521  dst2.WriteUintToMem(vform, i, addr2);
522  addr += 2 * esize;
523  addr2 += 2 * esize;
524  }
525 }
526 
527 void Simulator::st2(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
528  int index, uint64_t addr) {
529  int esize = LaneSizeInBytesFromFormat(vform);
530  dst.WriteUintToMem(vform, index, addr);
531  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
532 }
533 
534 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
535  LogicVRegister dst3, uint64_t addr) {
536  int esize = LaneSizeInBytesFromFormat(vform);
537  uint64_t addr2 = addr + esize;
538  uint64_t addr3 = addr2 + esize;
539  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
540  dst.WriteUintToMem(vform, i, addr);
541  dst2.WriteUintToMem(vform, i, addr2);
542  dst3.WriteUintToMem(vform, i, addr3);
543  addr += 3 * esize;
544  addr2 += 3 * esize;
545  addr3 += 3 * esize;
546  }
547 }
548 
549 void Simulator::st3(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
550  LogicVRegister dst3, int index, uint64_t addr) {
551  int esize = LaneSizeInBytesFromFormat(vform);
552  dst.WriteUintToMem(vform, index, addr);
553  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
554  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
555 }
556 
557 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
558  LogicVRegister dst3, LogicVRegister dst4, uint64_t addr) {
559  int esize = LaneSizeInBytesFromFormat(vform);
560  uint64_t addr2 = addr + esize;
561  uint64_t addr3 = addr2 + esize;
562  uint64_t addr4 = addr3 + esize;
563  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
564  dst.WriteUintToMem(vform, i, addr);
565  dst2.WriteUintToMem(vform, i, addr2);
566  dst3.WriteUintToMem(vform, i, addr3);
567  dst4.WriteUintToMem(vform, i, addr4);
568  addr += 4 * esize;
569  addr2 += 4 * esize;
570  addr3 += 4 * esize;
571  addr4 += 4 * esize;
572  }
573 }
574 
575 void Simulator::st4(VectorFormat vform, LogicVRegister dst, LogicVRegister dst2,
576  LogicVRegister dst3, LogicVRegister dst4, int index,
577  uint64_t addr) {
578  int esize = LaneSizeInBytesFromFormat(vform);
579  dst.WriteUintToMem(vform, index, addr);
580  dst2.WriteUintToMem(vform, index, addr + 1 * esize);
581  dst3.WriteUintToMem(vform, index, addr + 2 * esize);
582  dst4.WriteUintToMem(vform, index, addr + 3 * esize);
583 }
584 
585 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
586  const LogicVRegister& src1,
587  const LogicVRegister& src2, Condition cond) {
588  dst.ClearForWrite(vform);
589  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
590  int64_t sa = src1.Int(vform, i);
591  int64_t sb = src2.Int(vform, i);
592  uint64_t ua = src1.Uint(vform, i);
593  uint64_t ub = src2.Uint(vform, i);
594  bool result = false;
595  switch (cond) {
596  case eq:
597  result = (ua == ub);
598  break;
599  case ge:
600  result = (sa >= sb);
601  break;
602  case gt:
603  result = (sa > sb);
604  break;
605  case hi:
606  result = (ua > ub);
607  break;
608  case hs:
609  result = (ua >= ub);
610  break;
611  case lt:
612  result = (sa < sb);
613  break;
614  case le:
615  result = (sa <= sb);
616  break;
617  default:
618  UNREACHABLE();
619  }
620  dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
621  }
622  return dst;
623 }
624 
625 LogicVRegister Simulator::cmp(VectorFormat vform, LogicVRegister dst,
626  const LogicVRegister& src1, int imm,
627  Condition cond) {
628  SimVRegister temp;
629  LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
630  return cmp(vform, dst, src1, imm_reg, cond);
631 }
632 
633 LogicVRegister Simulator::cmptst(VectorFormat vform, LogicVRegister dst,
634  const LogicVRegister& src1,
635  const LogicVRegister& src2) {
636  dst.ClearForWrite(vform);
637  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638  uint64_t ua = src1.Uint(vform, i);
639  uint64_t ub = src2.Uint(vform, i);
640  dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
641  }
642  return dst;
643 }
644 
645 LogicVRegister Simulator::add(VectorFormat vform, LogicVRegister dst,
646  const LogicVRegister& src1,
647  const LogicVRegister& src2) {
648  int lane_size = LaneSizeInBitsFromFormat(vform);
649  dst.ClearForWrite(vform);
650  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
651  // Test for unsigned saturation.
652  uint64_t ua = src1.UintLeftJustified(vform, i);
653  uint64_t ub = src2.UintLeftJustified(vform, i);
654  uint64_t ur = ua + ub;
655  if (ur < ua) {
656  dst.SetUnsignedSat(i, true);
657  }
658 
659  // Test for signed saturation.
660  bool pos_a = (ua >> 63) == 0;
661  bool pos_b = (ub >> 63) == 0;
662  bool pos_r = (ur >> 63) == 0;
663  // If the signs of the operands are the same, but different from the result,
664  // there was an overflow.
665  if ((pos_a == pos_b) && (pos_a != pos_r)) {
666  dst.SetSignedSat(i, pos_a);
667  }
668 
669  dst.SetInt(vform, i, ur >> (64 - lane_size));
670  }
671  return dst;
672 }
673 
674 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
675  const LogicVRegister& src1,
676  const LogicVRegister& src2) {
677  SimVRegister temp1, temp2;
678  uzp1(vform, temp1, src1, src2);
679  uzp2(vform, temp2, src1, src2);
680  add(vform, dst, temp1, temp2);
681  return dst;
682 }
683 
684 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
685  const LogicVRegister& src1,
686  const LogicVRegister& src2) {
687  SimVRegister temp;
688  mul(vform, temp, src1, src2);
689  add(vform, dst, dst, temp);
690  return dst;
691 }
692 
693 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
694  const LogicVRegister& src1,
695  const LogicVRegister& src2) {
696  SimVRegister temp;
697  mul(vform, temp, src1, src2);
698  sub(vform, dst, dst, temp);
699  return dst;
700 }
701 
702 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
703  const LogicVRegister& src1,
704  const LogicVRegister& src2) {
705  dst.ClearForWrite(vform);
706  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707  dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708  }
709  return dst;
710 }
711 
712 LogicVRegister Simulator::mul(VectorFormat vform, LogicVRegister dst,
713  const LogicVRegister& src1,
714  const LogicVRegister& src2, int index) {
715  SimVRegister temp;
716  VectorFormat indexform = VectorFormatFillQ(vform);
717  return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
718 }
719 
720 LogicVRegister Simulator::mla(VectorFormat vform, LogicVRegister dst,
721  const LogicVRegister& src1,
722  const LogicVRegister& src2, int index) {
723  SimVRegister temp;
724  VectorFormat indexform = VectorFormatFillQ(vform);
725  return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
726 }
727 
728 LogicVRegister Simulator::mls(VectorFormat vform, LogicVRegister dst,
729  const LogicVRegister& src1,
730  const LogicVRegister& src2, int index) {
731  SimVRegister temp;
732  VectorFormat indexform = VectorFormatFillQ(vform);
733  return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
734 }
735 
736 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
737  const LogicVRegister& src1,
738  const LogicVRegister& src2, int index) {
739  SimVRegister temp;
740  VectorFormat indexform =
741  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
742  return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
743 }
744 
745 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
746  const LogicVRegister& src1,
747  const LogicVRegister& src2, int index) {
748  SimVRegister temp;
749  VectorFormat indexform =
750  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
751  return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
752 }
753 
754 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
755  const LogicVRegister& src1,
756  const LogicVRegister& src2, int index) {
757  SimVRegister temp;
758  VectorFormat indexform =
759  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
760  return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
761 }
762 
763 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
764  const LogicVRegister& src1,
765  const LogicVRegister& src2, int index) {
766  SimVRegister temp;
767  VectorFormat indexform =
768  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
769  return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
770 }
771 
772 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
773  const LogicVRegister& src1,
774  const LogicVRegister& src2, int index) {
775  SimVRegister temp;
776  VectorFormat indexform =
777  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
778  return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
779 }
780 
781 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
782  const LogicVRegister& src1,
783  const LogicVRegister& src2, int index) {
784  SimVRegister temp;
785  VectorFormat indexform =
786  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
787  return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
788 }
789 
790 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
791  const LogicVRegister& src1,
792  const LogicVRegister& src2, int index) {
793  SimVRegister temp;
794  VectorFormat indexform =
795  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
796  return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
797 }
798 
799 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
800  const LogicVRegister& src1,
801  const LogicVRegister& src2, int index) {
802  SimVRegister temp;
803  VectorFormat indexform =
804  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
805  return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
806 }
807 
808 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
809  const LogicVRegister& src1,
810  const LogicVRegister& src2, int index) {
811  SimVRegister temp;
812  VectorFormat indexform =
813  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
814  return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
815 }
816 
817 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
818  const LogicVRegister& src1,
819  const LogicVRegister& src2, int index) {
820  SimVRegister temp;
821  VectorFormat indexform =
822  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
823  return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
824 }
825 
826 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
827  const LogicVRegister& src1,
828  const LogicVRegister& src2, int index) {
829  SimVRegister temp;
830  VectorFormat indexform =
831  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
832  return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
833 }
834 
835 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
836  const LogicVRegister& src1,
837  const LogicVRegister& src2, int index) {
838  SimVRegister temp;
839  VectorFormat indexform =
840  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
841  return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
842 }
843 
844 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
845  const LogicVRegister& src1,
846  const LogicVRegister& src2, int index) {
847  SimVRegister temp;
848  VectorFormat indexform =
849  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
850  return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
851 }
852 
853 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
854  const LogicVRegister& src1,
855  const LogicVRegister& src2, int index) {
856  SimVRegister temp;
857  VectorFormat indexform =
858  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
859  return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
860 }
861 
862 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
863  const LogicVRegister& src1,
864  const LogicVRegister& src2, int index) {
865  SimVRegister temp;
866  VectorFormat indexform =
867  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
868  return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
869 }
870 
871 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
872  const LogicVRegister& src1,
873  const LogicVRegister& src2, int index) {
874  SimVRegister temp;
875  VectorFormat indexform =
876  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
877  return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
878 }
879 
880 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
881  const LogicVRegister& src1,
882  const LogicVRegister& src2, int index) {
883  SimVRegister temp;
884  VectorFormat indexform =
885  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
886  return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
887 }
888 
889 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
890  const LogicVRegister& src1,
891  const LogicVRegister& src2, int index) {
892  SimVRegister temp;
893  VectorFormat indexform =
894  VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
895  return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
896 }
897 
898 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
899  const LogicVRegister& src1,
900  const LogicVRegister& src2, int index) {
901  SimVRegister temp;
902  VectorFormat indexform = VectorFormatFillQ(vform);
903  return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
904 }
905 
906 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
907  const LogicVRegister& src1,
908  const LogicVRegister& src2, int index) {
909  SimVRegister temp;
910  VectorFormat indexform = VectorFormatFillQ(vform);
911  return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913 
914 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) {
915  uint16_t result = 0;
916  uint16_t extended_op2 = op2;
917  for (int i = 0; i < 8; ++i) {
918  if ((op1 >> i) & 1) {
919  result = result ^ (extended_op2 << i);
920  }
921  }
922  return result;
923 }
924 
925 LogicVRegister Simulator::pmul(VectorFormat vform, LogicVRegister dst,
926  const LogicVRegister& src1,
927  const LogicVRegister& src2) {
928  dst.ClearForWrite(vform);
929  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
930  dst.SetUint(vform, i,
931  PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
932  }
933  return dst;
934 }
935 
936 LogicVRegister Simulator::pmull(VectorFormat vform, LogicVRegister dst,
937  const LogicVRegister& src1,
938  const LogicVRegister& src2) {
939  VectorFormat vform_src = VectorFormatHalfWidth(vform);
940  dst.ClearForWrite(vform);
941  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
942  dst.SetUint(
943  vform, i,
944  PolynomialMult(src1.Uint(vform_src, i), src2.Uint(vform_src, i)));
945  }
946  return dst;
947 }
948 
949 LogicVRegister Simulator::pmull2(VectorFormat vform, LogicVRegister dst,
950  const LogicVRegister& src1,
951  const LogicVRegister& src2) {
952  VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
953  dst.ClearForWrite(vform);
954  int lane_count = LaneCountFromFormat(vform);
955  for (int i = 0; i < lane_count; i++) {
956  dst.SetUint(vform, i,
957  PolynomialMult(src1.Uint(vform_src, lane_count + i),
958  src2.Uint(vform_src, lane_count + i)));
959  }
960  return dst;
961 }
962 
963 LogicVRegister Simulator::sub(VectorFormat vform, LogicVRegister dst,
964  const LogicVRegister& src1,
965  const LogicVRegister& src2) {
966  int lane_size = LaneSizeInBitsFromFormat(vform);
967  dst.ClearForWrite(vform);
968  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
969  // Test for unsigned saturation.
970  uint64_t ua = src1.UintLeftJustified(vform, i);
971  uint64_t ub = src2.UintLeftJustified(vform, i);
972  uint64_t ur = ua - ub;
973  if (ub > ua) {
974  dst.SetUnsignedSat(i, false);
975  }
976 
977  // Test for signed saturation.
978  bool pos_a = (ua >> 63) == 0;
979  bool pos_b = (ub >> 63) == 0;
980  bool pos_r = (ur >> 63) == 0;
981  // If the signs of the operands are different, and the sign of the first
982  // operand doesn't match the result, there was an overflow.
983  if ((pos_a != pos_b) && (pos_a != pos_r)) {
984  dst.SetSignedSat(i, pos_a);
985  }
986 
987  dst.SetInt(vform, i, ur >> (64 - lane_size));
988  }
989  return dst;
990 }
991 
992 LogicVRegister Simulator::and_(VectorFormat vform, LogicVRegister dst,
993  const LogicVRegister& src1,
994  const LogicVRegister& src2) {
995  dst.ClearForWrite(vform);
996  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
997  dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
998  }
999  return dst;
1000 }
1001 
1002 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
1003  const LogicVRegister& src1,
1004  const LogicVRegister& src2) {
1005  dst.ClearForWrite(vform);
1006  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1007  dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1008  }
1009  return dst;
1010 }
1011 
1012 LogicVRegister Simulator::orn(VectorFormat vform, LogicVRegister dst,
1013  const LogicVRegister& src1,
1014  const LogicVRegister& src2) {
1015  dst.ClearForWrite(vform);
1016  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1017  dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1018  }
1019  return dst;
1020 }
1021 
1022 LogicVRegister Simulator::eor(VectorFormat vform, LogicVRegister dst,
1023  const LogicVRegister& src1,
1024  const LogicVRegister& src2) {
1025  dst.ClearForWrite(vform);
1026  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1027  dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1028  }
1029  return dst;
1030 }
1031 
1032 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1033  const LogicVRegister& src1,
1034  const LogicVRegister& src2) {
1035  dst.ClearForWrite(vform);
1036  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1037  dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1038  }
1039  return dst;
1040 }
1041 
1042 LogicVRegister Simulator::bic(VectorFormat vform, LogicVRegister dst,
1043  const LogicVRegister& src, uint64_t imm) {
1044  uint64_t result[16];
1045  int laneCount = LaneCountFromFormat(vform);
1046  for (int i = 0; i < laneCount; ++i) {
1047  result[i] = src.Uint(vform, i) & ~imm;
1048  }
1049  dst.SetUintArray(vform, result);
1050  return dst;
1051 }
1052 
1053 LogicVRegister Simulator::bif(VectorFormat vform, LogicVRegister dst,
1054  const LogicVRegister& src1,
1055  const LogicVRegister& src2) {
1056  dst.ClearForWrite(vform);
1057  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1058  uint64_t operand1 = dst.Uint(vform, i);
1059  uint64_t operand2 = ~src2.Uint(vform, i);
1060  uint64_t operand3 = src1.Uint(vform, i);
1061  uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1062  dst.SetUint(vform, i, result);
1063  }
1064  return dst;
1065 }
1066 
1067 LogicVRegister Simulator::bit(VectorFormat vform, LogicVRegister dst,
1068  const LogicVRegister& src1,
1069  const LogicVRegister& src2) {
1070  dst.ClearForWrite(vform);
1071  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1072  uint64_t operand1 = dst.Uint(vform, i);
1073  uint64_t operand2 = src2.Uint(vform, i);
1074  uint64_t operand3 = src1.Uint(vform, i);
1075  uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1076  dst.SetUint(vform, i, result);
1077  }
1078  return dst;
1079 }
1080 
1081 LogicVRegister Simulator::bsl(VectorFormat vform, LogicVRegister dst,
1082  const LogicVRegister& src1,
1083  const LogicVRegister& src2) {
1084  dst.ClearForWrite(vform);
1085  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1086  uint64_t operand1 = src2.Uint(vform, i);
1087  uint64_t operand2 = dst.Uint(vform, i);
1088  uint64_t operand3 = src1.Uint(vform, i);
1089  uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1090  dst.SetUint(vform, i, result);
1091  }
1092  return dst;
1093 }
1094 
1095 LogicVRegister Simulator::SMinMax(VectorFormat vform, LogicVRegister dst,
1096  const LogicVRegister& src1,
1097  const LogicVRegister& src2, bool max) {
1098  dst.ClearForWrite(vform);
1099  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1100  int64_t src1_val = src1.Int(vform, i);
1101  int64_t src2_val = src2.Int(vform, i);
1102  int64_t dst_val;
1103  if (max) {
1104  dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1105  } else {
1106  dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1107  }
1108  dst.SetInt(vform, i, dst_val);
1109  }
1110  return dst;
1111 }
1112 
1113 LogicVRegister Simulator::smax(VectorFormat vform, LogicVRegister dst,
1114  const LogicVRegister& src1,
1115  const LogicVRegister& src2) {
1116  return SMinMax(vform, dst, src1, src2, true);
1117 }
1118 
1119 LogicVRegister Simulator::smin(VectorFormat vform, LogicVRegister dst,
1120  const LogicVRegister& src1,
1121  const LogicVRegister& src2) {
1122  return SMinMax(vform, dst, src1, src2, false);
1123 }
1124 
1125 LogicVRegister Simulator::SMinMaxP(VectorFormat vform, LogicVRegister dst,
1126  const LogicVRegister& src1,
1127  const LogicVRegister& src2, bool max) {
1128  int lanes = LaneCountFromFormat(vform);
1129  int64_t result[kMaxLanesPerVector];
1130  const LogicVRegister* src = &src1;
1131  for (int j = 0; j < 2; j++) {
1132  for (int i = 0; i < lanes; i += 2) {
1133  int64_t first_val = src->Int(vform, i);
1134  int64_t second_val = src->Int(vform, i + 1);
1135  int64_t dst_val;
1136  if (max) {
1137  dst_val = (first_val > second_val) ? first_val : second_val;
1138  } else {
1139  dst_val = (first_val < second_val) ? first_val : second_val;
1140  }
1141  DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1142  result[(i >> 1) + (j * lanes / 2)] = dst_val;
1143  }
1144  src = &src2;
1145  }
1146  dst.SetIntArray(vform, result);
1147  return dst;
1148 }
1149 
1150 LogicVRegister Simulator::smaxp(VectorFormat vform, LogicVRegister dst,
1151  const LogicVRegister& src1,
1152  const LogicVRegister& src2) {
1153  return SMinMaxP(vform, dst, src1, src2, true);
1154 }
1155 
1156 LogicVRegister Simulator::sminp(VectorFormat vform, LogicVRegister dst,
1157  const LogicVRegister& src1,
1158  const LogicVRegister& src2) {
1159  return SMinMaxP(vform, dst, src1, src2, false);
1160 }
1161 
1162 LogicVRegister Simulator::addp(VectorFormat vform, LogicVRegister dst,
1163  const LogicVRegister& src) {
1164  DCHECK_EQ(vform, kFormatD);
1165 
1166  uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1167  dst.ClearForWrite(vform);
1168  dst.SetUint(vform, 0, dst_val);
1169  return dst;
1170 }
1171 
1172 LogicVRegister Simulator::addv(VectorFormat vform, LogicVRegister dst,
1173  const LogicVRegister& src) {
1174  VectorFormat vform_dst =
1175  ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1176 
1177  int64_t dst_val = 0;
1178  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1179  dst_val += src.Int(vform, i);
1180  }
1181 
1182  dst.ClearForWrite(vform_dst);
1183  dst.SetInt(vform_dst, 0, dst_val);
1184  return dst;
1185 }
1186 
1187 LogicVRegister Simulator::saddlv(VectorFormat vform, LogicVRegister dst,
1188  const LogicVRegister& src) {
1189  VectorFormat vform_dst =
1190  ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1191 
1192  int64_t dst_val = 0;
1193  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1194  dst_val += src.Int(vform, i);
1195  }
1196 
1197  dst.ClearForWrite(vform_dst);
1198  dst.SetInt(vform_dst, 0, dst_val);
1199  return dst;
1200 }
1201 
1202 LogicVRegister Simulator::uaddlv(VectorFormat vform, LogicVRegister dst,
1203  const LogicVRegister& src) {
1204  VectorFormat vform_dst =
1205  ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1206 
1207  uint64_t dst_val = 0;
1208  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1209  dst_val += src.Uint(vform, i);
1210  }
1211 
1212  dst.ClearForWrite(vform_dst);
1213  dst.SetUint(vform_dst, 0, dst_val);
1214  return dst;
1215 }
1216 
1217 LogicVRegister Simulator::SMinMaxV(VectorFormat vform, LogicVRegister dst,
1218  const LogicVRegister& src, bool max) {
1219  int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1220  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1221  int64_t src_val = src.Int(vform, i);
1222  if (max) {
1223  dst_val = (src_val > dst_val) ? src_val : dst_val;
1224  } else {
1225  dst_val = (src_val < dst_val) ? src_val : dst_val;
1226  }
1227  }
1228  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1229  dst.SetInt(vform, 0, dst_val);
1230  return dst;
1231 }
1232 
1233 LogicVRegister Simulator::smaxv(VectorFormat vform, LogicVRegister dst,
1234  const LogicVRegister& src) {
1235  SMinMaxV(vform, dst, src, true);
1236  return dst;
1237 }
1238 
1239 LogicVRegister Simulator::sminv(VectorFormat vform, LogicVRegister dst,
1240  const LogicVRegister& src) {
1241  SMinMaxV(vform, dst, src, false);
1242  return dst;
1243 }
1244 
1245 LogicVRegister Simulator::UMinMax(VectorFormat vform, LogicVRegister dst,
1246  const LogicVRegister& src1,
1247  const LogicVRegister& src2, bool max) {
1248  dst.ClearForWrite(vform);
1249  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1250  uint64_t src1_val = src1.Uint(vform, i);
1251  uint64_t src2_val = src2.Uint(vform, i);
1252  uint64_t dst_val;
1253  if (max) {
1254  dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1255  } else {
1256  dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1257  }
1258  dst.SetUint(vform, i, dst_val);
1259  }
1260  return dst;
1261 }
1262 
1263 LogicVRegister Simulator::umax(VectorFormat vform, LogicVRegister dst,
1264  const LogicVRegister& src1,
1265  const LogicVRegister& src2) {
1266  return UMinMax(vform, dst, src1, src2, true);
1267 }
1268 
1269 LogicVRegister Simulator::umin(VectorFormat vform, LogicVRegister dst,
1270  const LogicVRegister& src1,
1271  const LogicVRegister& src2) {
1272  return UMinMax(vform, dst, src1, src2, false);
1273 }
1274 
1275 LogicVRegister Simulator::UMinMaxP(VectorFormat vform, LogicVRegister dst,
1276  const LogicVRegister& src1,
1277  const LogicVRegister& src2, bool max) {
1278  int lanes = LaneCountFromFormat(vform);
1279  uint64_t result[kMaxLanesPerVector];
1280  const LogicVRegister* src = &src1;
1281  for (int j = 0; j < 2; j++) {
1282  for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1283  uint64_t first_val = src->Uint(vform, i);
1284  uint64_t second_val = src->Uint(vform, i + 1);
1285  uint64_t dst_val;
1286  if (max) {
1287  dst_val = (first_val > second_val) ? first_val : second_val;
1288  } else {
1289  dst_val = (first_val < second_val) ? first_val : second_val;
1290  }
1291  DCHECK_LT((i >> 1) + (j * lanes / 2), kMaxLanesPerVector);
1292  result[(i >> 1) + (j * lanes / 2)] = dst_val;
1293  }
1294  src = &src2;
1295  }
1296  dst.SetUintArray(vform, result);
1297  return dst;
1298 }
1299 
1300 LogicVRegister Simulator::umaxp(VectorFormat vform, LogicVRegister dst,
1301  const LogicVRegister& src1,
1302  const LogicVRegister& src2) {
1303  return UMinMaxP(vform, dst, src1, src2, true);
1304 }
1305 
1306 LogicVRegister Simulator::uminp(VectorFormat vform, LogicVRegister dst,
1307  const LogicVRegister& src1,
1308  const LogicVRegister& src2) {
1309  return UMinMaxP(vform, dst, src1, src2, false);
1310 }
1311 
1312 LogicVRegister Simulator::UMinMaxV(VectorFormat vform, LogicVRegister dst,
1313  const LogicVRegister& src, bool max) {
1314  uint64_t dst_val = max ? 0 : UINT64_MAX;
1315  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1316  uint64_t src_val = src.Uint(vform, i);
1317  if (max) {
1318  dst_val = (src_val > dst_val) ? src_val : dst_val;
1319  } else {
1320  dst_val = (src_val < dst_val) ? src_val : dst_val;
1321  }
1322  }
1323  dst.ClearForWrite(ScalarFormatFromFormat(vform));
1324  dst.SetUint(vform, 0, dst_val);
1325  return dst;
1326 }
1327 
1328 LogicVRegister Simulator::umaxv(VectorFormat vform, LogicVRegister dst,
1329  const LogicVRegister& src) {
1330  UMinMaxV(vform, dst, src, true);
1331  return dst;
1332 }
1333 
1334 LogicVRegister Simulator::uminv(VectorFormat vform, LogicVRegister dst,
1335  const LogicVRegister& src) {
1336  UMinMaxV(vform, dst, src, false);
1337  return dst;
1338 }
1339 
1340 LogicVRegister Simulator::shl(VectorFormat vform, LogicVRegister dst,
1341  const LogicVRegister& src, int shift) {
1342  DCHECK_GE(shift, 0);
1343  SimVRegister temp;
1344  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1345  return ushl(vform, dst, src, shiftreg);
1346 }
1347 
1348 LogicVRegister Simulator::sshll(VectorFormat vform, LogicVRegister dst,
1349  const LogicVRegister& src, int shift) {
1350  DCHECK_GE(shift, 0);
1351  SimVRegister temp1, temp2;
1352  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1353  LogicVRegister extendedreg = sxtl(vform, temp2, src);
1354  return sshl(vform, dst, extendedreg, shiftreg);
1355 }
1356 
1357 LogicVRegister Simulator::sshll2(VectorFormat vform, LogicVRegister dst,
1358  const LogicVRegister& src, int shift) {
1359  DCHECK_GE(shift, 0);
1360  SimVRegister temp1, temp2;
1361  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1362  LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1363  return sshl(vform, dst, extendedreg, shiftreg);
1364 }
1365 
1366 LogicVRegister Simulator::shll(VectorFormat vform, LogicVRegister dst,
1367  const LogicVRegister& src) {
1368  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1369  return sshll(vform, dst, src, shift);
1370 }
1371 
1372 LogicVRegister Simulator::shll2(VectorFormat vform, LogicVRegister dst,
1373  const LogicVRegister& src) {
1374  int shift = LaneSizeInBitsFromFormat(vform) / 2;
1375  return sshll2(vform, dst, src, shift);
1376 }
1377 
1378 LogicVRegister Simulator::ushll(VectorFormat vform, LogicVRegister dst,
1379  const LogicVRegister& src, int shift) {
1380  DCHECK_GE(shift, 0);
1381  SimVRegister temp1, temp2;
1382  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1383  LogicVRegister extendedreg = uxtl(vform, temp2, src);
1384  return ushl(vform, dst, extendedreg, shiftreg);
1385 }
1386 
1387 LogicVRegister Simulator::ushll2(VectorFormat vform, LogicVRegister dst,
1388  const LogicVRegister& src, int shift) {
1389  DCHECK_GE(shift, 0);
1390  SimVRegister temp1, temp2;
1391  LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1392  LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1393  return ushl(vform, dst, extendedreg, shiftreg);
1394 }
1395 
1396 LogicVRegister Simulator::sli(VectorFormat vform, LogicVRegister dst,
1397  const LogicVRegister& src, int shift) {
1398  dst.ClearForWrite(vform);
1399  int laneCount = LaneCountFromFormat(vform);
1400  for (int i = 0; i < laneCount; i++) {
1401  uint64_t src_lane = src.Uint(vform, i);
1402  uint64_t dst_lane = dst.Uint(vform, i);
1403  uint64_t shifted = src_lane << shift;
1404  uint64_t mask = MaxUintFromFormat(vform) << shift;
1405  dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1406  }
1407  return dst;
1408 }
1409 
1410 LogicVRegister Simulator::sqshl(VectorFormat vform, LogicVRegister dst,
1411  const LogicVRegister& src, int shift) {
1412  DCHECK_GE(shift, 0);
1413  SimVRegister temp;
1414  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1415  return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1416 }
1417 
1418 LogicVRegister Simulator::uqshl(VectorFormat vform, LogicVRegister dst,
1419  const LogicVRegister& src, int shift) {
1420  DCHECK_GE(shift, 0);
1421  SimVRegister temp;
1422  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1423  return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1424 }
1425 
1426 LogicVRegister Simulator::sqshlu(VectorFormat vform, LogicVRegister dst,
1427  const LogicVRegister& src, int shift) {
1428  DCHECK_GE(shift, 0);
1429  SimVRegister temp;
1430  LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1431  return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1432 }
1433 
1434 LogicVRegister Simulator::sri(VectorFormat vform, LogicVRegister dst,
1435  const LogicVRegister& src, int shift) {
1436  dst.ClearForWrite(vform);
1437  int laneCount = LaneCountFromFormat(vform);
1438  DCHECK((shift > 0) &&
1439  (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1440  for (int i = 0; i < laneCount; i++) {
1441  uint64_t src_lane = src.Uint(vform, i);
1442  uint64_t dst_lane = dst.Uint(vform, i);
1443  uint64_t shifted;
1444  uint64_t mask;
1445  if (shift == 64) {
1446  shifted = 0;
1447  mask = 0;
1448  } else {
1449  shifted = src_lane >> shift;
1450  mask = MaxUintFromFormat(vform) >> shift;
1451  }
1452  dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1453  }
1454  return dst;
1455 }
1456 
1457 LogicVRegister Simulator::ushr(VectorFormat vform, LogicVRegister dst,
1458  const LogicVRegister& src, int shift) {
1459  DCHECK_GE(shift, 0);
1460  SimVRegister temp;
1461  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1462  return ushl(vform, dst, src, shiftreg);
1463 }
1464 
1465 LogicVRegister Simulator::sshr(VectorFormat vform, LogicVRegister dst,
1466  const LogicVRegister& src, int shift) {
1467  DCHECK_GE(shift, 0);
1468  SimVRegister temp;
1469  LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1470  return sshl(vform, dst, src, shiftreg);
1471 }
1472 
1473 LogicVRegister Simulator::ssra(VectorFormat vform, LogicVRegister dst,
1474  const LogicVRegister& src, int shift) {
1475  SimVRegister temp;
1476  LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1477  return add(vform, dst, dst, shifted_reg);
1478 }
1479 
1480 LogicVRegister Simulator::usra(VectorFormat vform, LogicVRegister dst,
1481  const LogicVRegister& src, int shift) {
1482  SimVRegister temp;
1483  LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1484  return add(vform, dst, dst, shifted_reg);
1485 }
1486 
1487 LogicVRegister Simulator::srsra(VectorFormat vform, LogicVRegister dst,
1488  const LogicVRegister& src, int shift) {
1489  SimVRegister temp;
1490  LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1491  return add(vform, dst, dst, shifted_reg);
1492 }
1493 
1494 LogicVRegister Simulator::ursra(VectorFormat vform, LogicVRegister dst,
1495  const LogicVRegister& src, int shift) {
1496  SimVRegister temp;
1497  LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1498  return add(vform, dst, dst, shifted_reg);
1499 }
1500 
1501 LogicVRegister Simulator::cls(VectorFormat vform, LogicVRegister dst,
1502  const LogicVRegister& src) {
1503  uint64_t result[16];
1504  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1505  int laneCount = LaneCountFromFormat(vform);
1506  for (int i = 0; i < laneCount; i++) {
1507  result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1508  }
1509 
1510  dst.SetUintArray(vform, result);
1511  return dst;
1512 }
1513 
1514 LogicVRegister Simulator::clz(VectorFormat vform, LogicVRegister dst,
1515  const LogicVRegister& src) {
1516  uint64_t result[16];
1517  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1518  int laneCount = LaneCountFromFormat(vform);
1519  for (int i = 0; i < laneCount; i++) {
1520  result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1521  }
1522 
1523  dst.SetUintArray(vform, result);
1524  return dst;
1525 }
1526 
1527 LogicVRegister Simulator::cnt(VectorFormat vform, LogicVRegister dst,
1528  const LogicVRegister& src) {
1529  uint64_t result[16];
1530  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1531  int laneCount = LaneCountFromFormat(vform);
1532  for (int i = 0; i < laneCount; i++) {
1533  uint64_t value = src.Uint(vform, i);
1534  result[i] = 0;
1535  for (int j = 0; j < laneSizeInBits; j++) {
1536  result[i] += (value & 1);
1537  value >>= 1;
1538  }
1539  }
1540 
1541  dst.SetUintArray(vform, result);
1542  return dst;
1543 }
1544 
1545 LogicVRegister Simulator::sshl(VectorFormat vform, LogicVRegister dst,
1546  const LogicVRegister& src1,
1547  const LogicVRegister& src2) {
1548  dst.ClearForWrite(vform);
1549  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1550  int8_t shift_val = src2.Int(vform, i);
1551  int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1552 
1553  // Set signed saturation state.
1554  if ((shift_val > CountLeadingSignBits(lj_src_val, 64)) &&
1555  (lj_src_val != 0)) {
1556  dst.SetSignedSat(i, lj_src_val >= 0);
1557  }
1558 
1559  // Set unsigned saturation state.
1560  if (lj_src_val < 0) {
1561  dst.SetUnsignedSat(i, false);
1562  } else if ((shift_val > CountLeadingZeros(lj_src_val, 64)) &&
1563  (lj_src_val != 0)) {
1564  dst.SetUnsignedSat(i, true);
1565  }
1566 
1567  int64_t src_val = src1.Int(vform, i);
1568  bool src_is_negative = src_val < 0;
1569  if (shift_val > 63) {
1570  dst.SetInt(vform, i, 0);
1571  } else if (shift_val < -63) {
1572  dst.SetRounding(i, src_is_negative);
1573  dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1574  } else {
1575  // Use unsigned types for shifts, as behaviour is undefined for signed
1576  // lhs.
1577  uint64_t usrc_val = static_cast<uint64_t>(src_val);
1578 
1579  if (shift_val < 0) {
1580  // Convert to right shift.
1581  shift_val = -shift_val;
1582 
1583  // Set rounding state by testing most-significant bit shifted out.
1584  // Rounding only needed on right shifts.
1585  if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1586  dst.SetRounding(i, true);
1587  }
1588 
1589  usrc_val >>= shift_val;
1590 
1591  if (src_is_negative) {
1592  // Simulate sign-extension.
1593  usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1594  }
1595  } else {
1596  usrc_val <<= shift_val;
1597  }
1598  dst.SetUint(vform, i, usrc_val);
1599  }
1600  }
1601  return dst;
1602 }
1603 
1604 LogicVRegister Simulator::ushl(VectorFormat vform, LogicVRegister dst,
1605  const LogicVRegister& src1,
1606  const LogicVRegister& src2) {
1607  dst.ClearForWrite(vform);
1608  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1609  int8_t shift_val = src2.Int(vform, i);
1610  uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1611 
1612  // Set saturation state.
1613  if ((shift_val > CountLeadingZeros(lj_src_val, 64)) && (lj_src_val != 0)) {
1614  dst.SetUnsignedSat(i, true);
1615  }
1616 
1617  uint64_t src_val = src1.Uint(vform, i);
1618  if ((shift_val > 63) || (shift_val < -64)) {
1619  dst.SetUint(vform, i, 0);
1620  } else {
1621  if (shift_val < 0) {
1622  // Set rounding state. Rounding only needed on right shifts.
1623  if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1624  dst.SetRounding(i, true);
1625  }
1626 
1627  if (shift_val == -64) {
1628  src_val = 0;
1629  } else {
1630  src_val >>= -shift_val;
1631  }
1632  } else {
1633  src_val <<= shift_val;
1634  }
1635  dst.SetUint(vform, i, src_val);
1636  }
1637  }
1638  return dst;
1639 }
1640 
1641 LogicVRegister Simulator::neg(VectorFormat vform, LogicVRegister dst,
1642  const LogicVRegister& src) {
1643  dst.ClearForWrite(vform);
1644  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1645  // Test for signed saturation.
1646  int64_t sa = src.Int(vform, i);
1647  if (sa == MinIntFromFormat(vform)) {
1648  dst.SetSignedSat(i, true);
1649  }
1650  dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1651  }
1652  return dst;
1653 }
1654 
1655 LogicVRegister Simulator::suqadd(VectorFormat vform, LogicVRegister dst,
1656  const LogicVRegister& src) {
1657  dst.ClearForWrite(vform);
1658  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1659  int64_t sa = dst.IntLeftJustified(vform, i);
1660  uint64_t ub = src.UintLeftJustified(vform, i);
1661  uint64_t ur = sa + ub;
1662 
1663  int64_t sr = bit_cast<int64_t>(ur);
1664  if (sr < sa) { // Test for signed positive saturation.
1665  dst.SetInt(vform, i, MaxIntFromFormat(vform));
1666  } else {
1667  dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1668  }
1669  }
1670  return dst;
1671 }
1672 
1673 LogicVRegister Simulator::usqadd(VectorFormat vform, LogicVRegister dst,
1674  const LogicVRegister& src) {
1675  dst.ClearForWrite(vform);
1676  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1677  uint64_t ua = dst.UintLeftJustified(vform, i);
1678  int64_t sb = src.IntLeftJustified(vform, i);
1679  uint64_t ur = ua + sb;
1680 
1681  if ((sb > 0) && (ur <= ua)) {
1682  dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1683  } else if ((sb < 0) && (ur >= ua)) {
1684  dst.SetUint(vform, i, 0); // Negative saturation.
1685  } else {
1686  dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1687  }
1688  }
1689  return dst;
1690 }
1691 
1692 LogicVRegister Simulator::abs(VectorFormat vform, LogicVRegister dst,
1693  const LogicVRegister& src) {
1694  dst.ClearForWrite(vform);
1695  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1696  // Test for signed saturation.
1697  int64_t sa = src.Int(vform, i);
1698  if (sa == MinIntFromFormat(vform)) {
1699  dst.SetSignedSat(i, true);
1700  }
1701  if (sa < 0) {
1702  dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1703  } else {
1704  dst.SetInt(vform, i, sa);
1705  }
1706  }
1707  return dst;
1708 }
1709 
1710 LogicVRegister Simulator::ExtractNarrow(VectorFormat dstform,
1711  LogicVRegister dst, bool dstIsSigned,
1712  const LogicVRegister& src,
1713  bool srcIsSigned) {
1714  bool upperhalf = false;
1715  VectorFormat srcform = kFormatUndefined;
1716  int64_t ssrc[8];
1717  uint64_t usrc[8];
1718 
1719  switch (dstform) {
1720  case kFormat8B:
1721  upperhalf = false;
1722  srcform = kFormat8H;
1723  break;
1724  case kFormat16B:
1725  upperhalf = true;
1726  srcform = kFormat8H;
1727  break;
1728  case kFormat4H:
1729  upperhalf = false;
1730  srcform = kFormat4S;
1731  break;
1732  case kFormat8H:
1733  upperhalf = true;
1734  srcform = kFormat4S;
1735  break;
1736  case kFormat2S:
1737  upperhalf = false;
1738  srcform = kFormat2D;
1739  break;
1740  case kFormat4S:
1741  upperhalf = true;
1742  srcform = kFormat2D;
1743  break;
1744  case kFormatB:
1745  upperhalf = false;
1746  srcform = kFormatH;
1747  break;
1748  case kFormatH:
1749  upperhalf = false;
1750  srcform = kFormatS;
1751  break;
1752  case kFormatS:
1753  upperhalf = false;
1754  srcform = kFormatD;
1755  break;
1756  default:
1757  UNIMPLEMENTED();
1758  }
1759 
1760  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1761  ssrc[i] = src.Int(srcform, i);
1762  usrc[i] = src.Uint(srcform, i);
1763  }
1764 
1765  int offset;
1766  if (upperhalf) {
1767  offset = LaneCountFromFormat(dstform) / 2;
1768  } else {
1769  offset = 0;
1770  dst.ClearForWrite(dstform);
1771  }
1772 
1773  for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1774  // Test for signed saturation
1775  if (ssrc[i] > MaxIntFromFormat(dstform)) {
1776  dst.SetSignedSat(offset + i, true);
1777  } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1778  dst.SetSignedSat(offset + i, false);
1779  }
1780 
1781  // Test for unsigned saturation
1782  if (srcIsSigned) {
1783  if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1784  dst.SetUnsignedSat(offset + i, true);
1785  } else if (ssrc[i] < 0) {
1786  dst.SetUnsignedSat(offset + i, false);
1787  }
1788  } else {
1789  if (usrc[i] > MaxUintFromFormat(dstform)) {
1790  dst.SetUnsignedSat(offset + i, true);
1791  }
1792  }
1793 
1794  int64_t result;
1795  if (srcIsSigned) {
1796  result = ssrc[i] & MaxUintFromFormat(dstform);
1797  } else {
1798  result = usrc[i] & MaxUintFromFormat(dstform);
1799  }
1800 
1801  if (dstIsSigned) {
1802  dst.SetInt(dstform, offset + i, result);
1803  } else {
1804  dst.SetUint(dstform, offset + i, result);
1805  }
1806  }
1807  return dst;
1808 }
1809 
1810 LogicVRegister Simulator::xtn(VectorFormat vform, LogicVRegister dst,
1811  const LogicVRegister& src) {
1812  return ExtractNarrow(vform, dst, true, src, true);
1813 }
1814 
1815 LogicVRegister Simulator::sqxtn(VectorFormat vform, LogicVRegister dst,
1816  const LogicVRegister& src) {
1817  return ExtractNarrow(vform, dst, true, src, true).SignedSaturate(vform);
1818 }
1819 
1820 LogicVRegister Simulator::sqxtun(VectorFormat vform, LogicVRegister dst,
1821  const LogicVRegister& src) {
1822  return ExtractNarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
1823 }
1824 
1825 LogicVRegister Simulator::uqxtn(VectorFormat vform, LogicVRegister dst,
1826  const LogicVRegister& src) {
1827  return ExtractNarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
1828 }
1829 
1830 LogicVRegister Simulator::AbsDiff(VectorFormat vform, LogicVRegister dst,
1831  const LogicVRegister& src1,
1832  const LogicVRegister& src2, bool issigned) {
1833  dst.ClearForWrite(vform);
1834  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835  if (issigned) {
1836  int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
1837  sr = sr > 0 ? sr : -sr;
1838  dst.SetInt(vform, i, sr);
1839  } else {
1840  int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
1841  sr = sr > 0 ? sr : -sr;
1842  dst.SetUint(vform, i, sr);
1843  }
1844  }
1845  return dst;
1846 }
1847 
1848 LogicVRegister Simulator::saba(VectorFormat vform, LogicVRegister dst,
1849  const LogicVRegister& src1,
1850  const LogicVRegister& src2) {
1851  SimVRegister temp;
1852  dst.ClearForWrite(vform);
1853  AbsDiff(vform, temp, src1, src2, true);
1854  add(vform, dst, dst, temp);
1855  return dst;
1856 }
1857 
1858 LogicVRegister Simulator::uaba(VectorFormat vform, LogicVRegister dst,
1859  const LogicVRegister& src1,
1860  const LogicVRegister& src2) {
1861  SimVRegister temp;
1862  dst.ClearForWrite(vform);
1863  AbsDiff(vform, temp, src1, src2, false);
1864  add(vform, dst, dst, temp);
1865  return dst;
1866 }
1867 
1868 LogicVRegister Simulator::not_(VectorFormat vform, LogicVRegister dst,
1869  const LogicVRegister& src) {
1870  dst.ClearForWrite(vform);
1871  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1872  dst.SetUint(vform, i, ~src.Uint(vform, i));
1873  }
1874  return dst;
1875 }
1876 
1877 LogicVRegister Simulator::rbit(VectorFormat vform, LogicVRegister dst,
1878  const LogicVRegister& src) {
1879  uint64_t result[16];
1880  int laneCount = LaneCountFromFormat(vform);
1881  int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1882  uint64_t reversed_value;
1883  uint64_t value;
1884  for (int i = 0; i < laneCount; i++) {
1885  value = src.Uint(vform, i);
1886  reversed_value = 0;
1887  for (int j = 0; j < laneSizeInBits; j++) {
1888  reversed_value = (reversed_value << 1) | (value & 1);
1889  value >>= 1;
1890  }
1891  result[i] = reversed_value;
1892  }
1893 
1894  dst.SetUintArray(vform, result);
1895  return dst;
1896 }
1897 
1898 LogicVRegister Simulator::rev(VectorFormat vform, LogicVRegister dst,
1899  const LogicVRegister& src, int revSize) {
1900  uint64_t result[16];
1901  int laneCount = LaneCountFromFormat(vform);
1902  int laneSize = LaneSizeInBytesFromFormat(vform);
1903  int lanesPerLoop = revSize / laneSize;
1904  for (int i = 0; i < laneCount; i += lanesPerLoop) {
1905  for (int j = 0; j < lanesPerLoop; j++) {
1906  result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
1907  }
1908  }
1909  dst.SetUintArray(vform, result);
1910  return dst;
1911 }
1912 
1913 LogicVRegister Simulator::rev16(VectorFormat vform, LogicVRegister dst,
1914  const LogicVRegister& src) {
1915  return rev(vform, dst, src, 2);
1916 }
1917 
1918 LogicVRegister Simulator::rev32(VectorFormat vform, LogicVRegister dst,
1919  const LogicVRegister& src) {
1920  return rev(vform, dst, src, 4);
1921 }
1922 
1923 LogicVRegister Simulator::rev64(VectorFormat vform, LogicVRegister dst,
1924  const LogicVRegister& src) {
1925  return rev(vform, dst, src, 8);
1926 }
1927 
1928 LogicVRegister Simulator::addlp(VectorFormat vform, LogicVRegister dst,
1929  const LogicVRegister& src, bool is_signed,
1930  bool do_accumulate) {
1931  VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
1932  DCHECK_LE(LaneSizeInBitsFromFormat(vformsrc), 32U);
1933  DCHECK_LE(LaneCountFromFormat(vform), 8);
1934 
1935  uint64_t result[8];
1936  int lane_count = LaneCountFromFormat(vform);
1937  for (int i = 0; i < lane_count; i++) {
1938  if (is_signed) {
1939  result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
1940  src.Int(vformsrc, 2 * i + 1));
1941  } else {
1942  result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
1943  }
1944  }
1945 
1946  dst.ClearForWrite(vform);
1947  for (int i = 0; i < lane_count; ++i) {
1948  if (do_accumulate) {
1949  result[i] += dst.Uint(vform, i);
1950  }
1951  dst.SetUint(vform, i, result[i]);
1952  }
1953 
1954  return dst;
1955 }
1956 
1957 LogicVRegister Simulator::saddlp(VectorFormat vform, LogicVRegister dst,
1958  const LogicVRegister& src) {
1959  return addlp(vform, dst, src, true, false);
1960 }
1961 
1962 LogicVRegister Simulator::uaddlp(VectorFormat vform, LogicVRegister dst,
1963  const LogicVRegister& src) {
1964  return addlp(vform, dst, src, false, false);
1965 }
1966 
1967 LogicVRegister Simulator::sadalp(VectorFormat vform, LogicVRegister dst,
1968  const LogicVRegister& src) {
1969  return addlp(vform, dst, src, true, true);
1970 }
1971 
1972 LogicVRegister Simulator::uadalp(VectorFormat vform, LogicVRegister dst,
1973  const LogicVRegister& src) {
1974  return addlp(vform, dst, src, false, true);
1975 }
1976 
1977 LogicVRegister Simulator::ext(VectorFormat vform, LogicVRegister dst,
1978  const LogicVRegister& src1,
1979  const LogicVRegister& src2, int index) {
1980  uint8_t result[16];
1981  int laneCount = LaneCountFromFormat(vform);
1982  for (int i = 0; i < laneCount - index; ++i) {
1983  result[i] = src1.Uint(vform, i + index);
1984  }
1985  for (int i = 0; i < index; ++i) {
1986  result[laneCount - index + i] = src2.Uint(vform, i);
1987  }
1988  dst.ClearForWrite(vform);
1989  for (int i = 0; i < laneCount; ++i) {
1990  dst.SetUint(vform, i, result[i]);
1991  }
1992  return dst;
1993 }
1994 
1995 LogicVRegister Simulator::dup_element(VectorFormat vform, LogicVRegister dst,
1996  const LogicVRegister& src,
1997  int src_index) {
1998  int laneCount = LaneCountFromFormat(vform);
1999  uint64_t value = src.Uint(vform, src_index);
2000  dst.ClearForWrite(vform);
2001  for (int i = 0; i < laneCount; ++i) {
2002  dst.SetUint(vform, i, value);
2003  }
2004  return dst;
2005 }
2006 
2007 LogicVRegister Simulator::dup_immediate(VectorFormat vform, LogicVRegister dst,
2008  uint64_t imm) {
2009  int laneCount = LaneCountFromFormat(vform);
2010  uint64_t value = imm & MaxUintFromFormat(vform);
2011  dst.ClearForWrite(vform);
2012  for (int i = 0; i < laneCount; ++i) {
2013  dst.SetUint(vform, i, value);
2014  }
2015  return dst;
2016 }
2017 
2018 LogicVRegister Simulator::ins_element(VectorFormat vform, LogicVRegister dst,
2019  int dst_index, const LogicVRegister& src,
2020  int src_index) {
2021  dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2022  return dst;
2023 }
2024 
2025 LogicVRegister Simulator::ins_immediate(VectorFormat vform, LogicVRegister dst,
2026  int dst_index, uint64_t imm) {
2027  uint64_t value = imm & MaxUintFromFormat(vform);
2028  dst.SetUint(vform, dst_index, value);
2029  return dst;
2030 }
2031 
2032 LogicVRegister Simulator::movi(VectorFormat vform, LogicVRegister dst,
2033  uint64_t imm) {
2034  int laneCount = LaneCountFromFormat(vform);
2035  dst.ClearForWrite(vform);
2036  for (int i = 0; i < laneCount; ++i) {
2037  dst.SetUint(vform, i, imm);
2038  }
2039  return dst;
2040 }
2041 
2042 LogicVRegister Simulator::mvni(VectorFormat vform, LogicVRegister dst,
2043  uint64_t imm) {
2044  int laneCount = LaneCountFromFormat(vform);
2045  dst.ClearForWrite(vform);
2046  for (int i = 0; i < laneCount; ++i) {
2047  dst.SetUint(vform, i, ~imm);
2048  }
2049  return dst;
2050 }
2051 
2052 LogicVRegister Simulator::orr(VectorFormat vform, LogicVRegister dst,
2053  const LogicVRegister& src, uint64_t imm) {
2054  uint64_t result[16];
2055  int laneCount = LaneCountFromFormat(vform);
2056  for (int i = 0; i < laneCount; ++i) {
2057  result[i] = src.Uint(vform, i) | imm;
2058  }
2059  dst.SetUintArray(vform, result);
2060  return dst;
2061 }
2062 
2063 LogicVRegister Simulator::uxtl(VectorFormat vform, LogicVRegister dst,
2064  const LogicVRegister& src) {
2065  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2066 
2067  dst.ClearForWrite(vform);
2068  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2069  dst.SetUint(vform, i, src.Uint(vform_half, i));
2070  }
2071  return dst;
2072 }
2073 
2074 LogicVRegister Simulator::sxtl(VectorFormat vform, LogicVRegister dst,
2075  const LogicVRegister& src) {
2076  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2077 
2078  dst.ClearForWrite(vform);
2079  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2080  dst.SetInt(vform, i, src.Int(vform_half, i));
2081  }
2082  return dst;
2083 }
2084 
2085 LogicVRegister Simulator::uxtl2(VectorFormat vform, LogicVRegister dst,
2086  const LogicVRegister& src) {
2087  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2088  int lane_count = LaneCountFromFormat(vform);
2089 
2090  dst.ClearForWrite(vform);
2091  for (int i = 0; i < lane_count; i++) {
2092  dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2093  }
2094  return dst;
2095 }
2096 
2097 LogicVRegister Simulator::sxtl2(VectorFormat vform, LogicVRegister dst,
2098  const LogicVRegister& src) {
2099  VectorFormat vform_half = VectorFormatHalfWidth(vform);
2100  int lane_count = LaneCountFromFormat(vform);
2101 
2102  dst.ClearForWrite(vform);
2103  for (int i = 0; i < lane_count; i++) {
2104  dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2105  }
2106  return dst;
2107 }
2108 
2109 LogicVRegister Simulator::shrn(VectorFormat vform, LogicVRegister dst,
2110  const LogicVRegister& src, int shift) {
2111  SimVRegister temp;
2112  VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2113  VectorFormat vform_dst = vform;
2114  LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2115  return ExtractNarrow(vform_dst, dst, false, shifted_src, false);
2116 }
2117 
2118 LogicVRegister Simulator::shrn2(VectorFormat vform, LogicVRegister dst,
2119  const LogicVRegister& src, int shift) {
2120  SimVRegister temp;
2121  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2122  VectorFormat vformdst = vform;
2123  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2124  return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2125 }
2126 
2127 LogicVRegister Simulator::rshrn(VectorFormat vform, LogicVRegister dst,
2128  const LogicVRegister& src, int shift) {
2129  SimVRegister temp;
2130  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2131  VectorFormat vformdst = vform;
2132  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2133  return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2134 }
2135 
2136 LogicVRegister Simulator::rshrn2(VectorFormat vform, LogicVRegister dst,
2137  const LogicVRegister& src, int shift) {
2138  SimVRegister temp;
2139  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2140  VectorFormat vformdst = vform;
2141  LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2142  return ExtractNarrow(vformdst, dst, false, shifted_src, false);
2143 }
2144 
2145 LogicVRegister Simulator::Table(VectorFormat vform, LogicVRegister dst,
2146  const LogicVRegister& ind,
2147  bool zero_out_of_bounds,
2148  const LogicVRegister* tab1,
2149  const LogicVRegister* tab2,
2150  const LogicVRegister* tab3,
2151  const LogicVRegister* tab4) {
2152  DCHECK_NOT_NULL(tab1);
2153  const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2154  uint64_t result[kMaxLanesPerVector];
2155  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2156  result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2157  }
2158  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2159  uint64_t j = ind.Uint(vform, i);
2160  int tab_idx = static_cast<int>(j >> 4);
2161  int j_idx = static_cast<int>(j & 15);
2162  if ((tab_idx < 4) && (tab[tab_idx] != nullptr)) {
2163  result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2164  }
2165  }
2166  dst.SetUintArray(vform, result);
2167  return dst;
2168 }
2169 
2170 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2171  const LogicVRegister& tab,
2172  const LogicVRegister& ind) {
2173  return Table(vform, dst, ind, true, &tab);
2174 }
2175 
2176 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2177  const LogicVRegister& tab,
2178  const LogicVRegister& tab2,
2179  const LogicVRegister& ind) {
2180  return Table(vform, dst, ind, true, &tab, &tab2);
2181 }
2182 
2183 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2184  const LogicVRegister& tab,
2185  const LogicVRegister& tab2,
2186  const LogicVRegister& tab3,
2187  const LogicVRegister& ind) {
2188  return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2189 }
2190 
2191 LogicVRegister Simulator::tbl(VectorFormat vform, LogicVRegister dst,
2192  const LogicVRegister& tab,
2193  const LogicVRegister& tab2,
2194  const LogicVRegister& tab3,
2195  const LogicVRegister& tab4,
2196  const LogicVRegister& ind) {
2197  return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2198 }
2199 
2200 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2201  const LogicVRegister& tab,
2202  const LogicVRegister& ind) {
2203  return Table(vform, dst, ind, false, &tab);
2204 }
2205 
2206 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2207  const LogicVRegister& tab,
2208  const LogicVRegister& tab2,
2209  const LogicVRegister& ind) {
2210  return Table(vform, dst, ind, false, &tab, &tab2);
2211 }
2212 
2213 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2214  const LogicVRegister& tab,
2215  const LogicVRegister& tab2,
2216  const LogicVRegister& tab3,
2217  const LogicVRegister& ind) {
2218  return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2219 }
2220 
2221 LogicVRegister Simulator::tbx(VectorFormat vform, LogicVRegister dst,
2222  const LogicVRegister& tab,
2223  const LogicVRegister& tab2,
2224  const LogicVRegister& tab3,
2225  const LogicVRegister& tab4,
2226  const LogicVRegister& ind) {
2227  return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2228 }
2229 
2230 LogicVRegister Simulator::uqshrn(VectorFormat vform, LogicVRegister dst,
2231  const LogicVRegister& src, int shift) {
2232  return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2233 }
2234 
2235 LogicVRegister Simulator::uqshrn2(VectorFormat vform, LogicVRegister dst,
2236  const LogicVRegister& src, int shift) {
2237  return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2238 }
2239 
2240 LogicVRegister Simulator::uqrshrn(VectorFormat vform, LogicVRegister dst,
2241  const LogicVRegister& src, int shift) {
2242  return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2243 }
2244 
2245 LogicVRegister Simulator::uqrshrn2(VectorFormat vform, LogicVRegister dst,
2246  const LogicVRegister& src, int shift) {
2247  return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2248 }
2249 
2250 LogicVRegister Simulator::sqshrn(VectorFormat vform, LogicVRegister dst,
2251  const LogicVRegister& src, int shift) {
2252  SimVRegister temp;
2253  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2254  VectorFormat vformdst = vform;
2255  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2256  return sqxtn(vformdst, dst, shifted_src);
2257 }
2258 
2259 LogicVRegister Simulator::sqshrn2(VectorFormat vform, LogicVRegister dst,
2260  const LogicVRegister& src, int shift) {
2261  SimVRegister temp;
2262  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2263  VectorFormat vformdst = vform;
2264  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2265  return sqxtn(vformdst, dst, shifted_src);
2266 }
2267 
2268 LogicVRegister Simulator::sqrshrn(VectorFormat vform, LogicVRegister dst,
2269  const LogicVRegister& src, int shift) {
2270  SimVRegister temp;
2271  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2272  VectorFormat vformdst = vform;
2273  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2274  return sqxtn(vformdst, dst, shifted_src);
2275 }
2276 
2277 LogicVRegister Simulator::sqrshrn2(VectorFormat vform, LogicVRegister dst,
2278  const LogicVRegister& src, int shift) {
2279  SimVRegister temp;
2280  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2281  VectorFormat vformdst = vform;
2282  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2283  return sqxtn(vformdst, dst, shifted_src);
2284 }
2285 
2286 LogicVRegister Simulator::sqshrun(VectorFormat vform, LogicVRegister dst,
2287  const LogicVRegister& src, int shift) {
2288  SimVRegister temp;
2289  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2290  VectorFormat vformdst = vform;
2291  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2292  return sqxtun(vformdst, dst, shifted_src);
2293 }
2294 
2295 LogicVRegister Simulator::sqshrun2(VectorFormat vform, LogicVRegister dst,
2296  const LogicVRegister& src, int shift) {
2297  SimVRegister temp;
2298  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2299  VectorFormat vformdst = vform;
2300  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2301  return sqxtun(vformdst, dst, shifted_src);
2302 }
2303 
2304 LogicVRegister Simulator::sqrshrun(VectorFormat vform, LogicVRegister dst,
2305  const LogicVRegister& src, int shift) {
2306  SimVRegister temp;
2307  VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2308  VectorFormat vformdst = vform;
2309  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2310  return sqxtun(vformdst, dst, shifted_src);
2311 }
2312 
2313 LogicVRegister Simulator::sqrshrun2(VectorFormat vform, LogicVRegister dst,
2314  const LogicVRegister& src, int shift) {
2315  SimVRegister temp;
2316  VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2317  VectorFormat vformdst = vform;
2318  LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2319  return sqxtun(vformdst, dst, shifted_src);
2320 }
2321 
2322 LogicVRegister Simulator::uaddl(VectorFormat vform, LogicVRegister dst,
2323  const LogicVRegister& src1,
2324  const LogicVRegister& src2) {
2325  SimVRegister temp1, temp2;
2326  uxtl(vform, temp1, src1);
2327  uxtl(vform, temp2, src2);
2328  add(vform, dst, temp1, temp2);
2329  return dst;
2330 }
2331 
2332 LogicVRegister Simulator::uaddl2(VectorFormat vform, LogicVRegister dst,
2333  const LogicVRegister& src1,
2334  const LogicVRegister& src2) {
2335  SimVRegister temp1, temp2;
2336  uxtl2(vform, temp1, src1);
2337  uxtl2(vform, temp2, src2);
2338  add(vform, dst, temp1, temp2);
2339  return dst;
2340 }
2341 
2342 LogicVRegister Simulator::uaddw(VectorFormat vform, LogicVRegister dst,
2343  const LogicVRegister& src1,
2344  const LogicVRegister& src2) {
2345  SimVRegister temp;
2346  uxtl(vform, temp, src2);
2347  add(vform, dst, src1, temp);
2348  return dst;
2349 }
2350 
2351 LogicVRegister Simulator::uaddw2(VectorFormat vform, LogicVRegister dst,
2352  const LogicVRegister& src1,
2353  const LogicVRegister& src2) {
2354  SimVRegister temp;
2355  uxtl2(vform, temp, src2);
2356  add(vform, dst, src1, temp);
2357  return dst;
2358 }
2359 
2360 LogicVRegister Simulator::saddl(VectorFormat vform, LogicVRegister dst,
2361  const LogicVRegister& src1,
2362  const LogicVRegister& src2) {
2363  SimVRegister temp1, temp2;
2364  sxtl(vform, temp1, src1);
2365  sxtl(vform, temp2, src2);
2366  add(vform, dst, temp1, temp2);
2367  return dst;
2368 }
2369 
2370 LogicVRegister Simulator::saddl2(VectorFormat vform, LogicVRegister dst,
2371  const LogicVRegister& src1,
2372  const LogicVRegister& src2) {
2373  SimVRegister temp1, temp2;
2374  sxtl2(vform, temp1, src1);
2375  sxtl2(vform, temp2, src2);
2376  add(vform, dst, temp1, temp2);
2377  return dst;
2378 }
2379 
2380 LogicVRegister Simulator::saddw(VectorFormat vform, LogicVRegister dst,
2381  const LogicVRegister& src1,
2382  const LogicVRegister& src2) {
2383  SimVRegister temp;
2384  sxtl(vform, temp, src2);
2385  add(vform, dst, src1, temp);
2386  return dst;
2387 }
2388 
2389 LogicVRegister Simulator::saddw2(VectorFormat vform, LogicVRegister dst,
2390  const LogicVRegister& src1,
2391  const LogicVRegister& src2) {
2392  SimVRegister temp;
2393  sxtl2(vform, temp, src2);
2394  add(vform, dst, src1, temp);
2395  return dst;
2396 }
2397 
2398 LogicVRegister Simulator::usubl(VectorFormat vform, LogicVRegister dst,
2399  const LogicVRegister& src1,
2400  const LogicVRegister& src2) {
2401  SimVRegister temp1, temp2;
2402  uxtl(vform, temp1, src1);
2403  uxtl(vform, temp2, src2);
2404  sub(vform, dst, temp1, temp2);
2405  return dst;
2406 }
2407 
2408 LogicVRegister Simulator::usubl2(VectorFormat vform, LogicVRegister dst,
2409  const LogicVRegister& src1,
2410  const LogicVRegister& src2) {
2411  SimVRegister temp1, temp2;
2412  uxtl2(vform, temp1, src1);
2413  uxtl2(vform, temp2, src2);
2414  sub(vform, dst, temp1, temp2);
2415  return dst;
2416 }
2417 
2418 LogicVRegister Simulator::usubw(VectorFormat vform, LogicVRegister dst,
2419  const LogicVRegister& src1,
2420  const LogicVRegister& src2) {
2421  SimVRegister temp;
2422  uxtl(vform, temp, src2);
2423  sub(vform, dst, src1, temp);
2424  return dst;
2425 }
2426 
2427 LogicVRegister Simulator::usubw2(VectorFormat vform, LogicVRegister dst,
2428  const LogicVRegister& src1,
2429  const LogicVRegister& src2) {
2430  SimVRegister temp;
2431  uxtl2(vform, temp, src2);
2432  sub(vform, dst, src1, temp);
2433  return dst;
2434 }
2435 
2436 LogicVRegister Simulator::ssubl(VectorFormat vform, LogicVRegister dst,
2437  const LogicVRegister& src1,
2438  const LogicVRegister& src2) {
2439  SimVRegister temp1, temp2;
2440  sxtl(vform, temp1, src1);
2441  sxtl(vform, temp2, src2);
2442  sub(vform, dst, temp1, temp2);
2443  return dst;
2444 }
2445 
2446 LogicVRegister Simulator::ssubl2(VectorFormat vform, LogicVRegister dst,
2447  const LogicVRegister& src1,
2448  const LogicVRegister& src2) {
2449  SimVRegister temp1, temp2;
2450  sxtl2(vform, temp1, src1);
2451  sxtl2(vform, temp2, src2);
2452  sub(vform, dst, temp1, temp2);
2453  return dst;
2454 }
2455 
2456 LogicVRegister Simulator::ssubw(VectorFormat vform, LogicVRegister dst,
2457  const LogicVRegister& src1,
2458  const LogicVRegister& src2) {
2459  SimVRegister temp;
2460  sxtl(vform, temp, src2);
2461  sub(vform, dst, src1, temp);
2462  return dst;
2463 }
2464 
2465 LogicVRegister Simulator::ssubw2(VectorFormat vform, LogicVRegister dst,
2466  const LogicVRegister& src1,
2467  const LogicVRegister& src2) {
2468  SimVRegister temp;
2469  sxtl2(vform, temp, src2);
2470  sub(vform, dst, src1, temp);
2471  return dst;
2472 }
2473 
2474 LogicVRegister Simulator::uabal(VectorFormat vform, LogicVRegister dst,
2475  const LogicVRegister& src1,
2476  const LogicVRegister& src2) {
2477  SimVRegister temp1, temp2;
2478  uxtl(vform, temp1, src1);
2479  uxtl(vform, temp2, src2);
2480  uaba(vform, dst, temp1, temp2);
2481  return dst;
2482 }
2483 
2484 LogicVRegister Simulator::uabal2(VectorFormat vform, LogicVRegister dst,
2485  const LogicVRegister& src1,
2486  const LogicVRegister& src2) {
2487  SimVRegister temp1, temp2;
2488  uxtl2(vform, temp1, src1);
2489  uxtl2(vform, temp2, src2);
2490  uaba(vform, dst, temp1, temp2);
2491  return dst;
2492 }
2493 
2494 LogicVRegister Simulator::sabal(VectorFormat vform, LogicVRegister dst,
2495  const LogicVRegister& src1,
2496  const LogicVRegister& src2) {
2497  SimVRegister temp1, temp2;
2498  sxtl(vform, temp1, src1);
2499  sxtl(vform, temp2, src2);
2500  saba(vform, dst, temp1, temp2);
2501  return dst;
2502 }
2503 
2504 LogicVRegister Simulator::sabal2(VectorFormat vform, LogicVRegister dst,
2505  const LogicVRegister& src1,
2506  const LogicVRegister& src2) {
2507  SimVRegister temp1, temp2;
2508  sxtl2(vform, temp1, src1);
2509  sxtl2(vform, temp2, src2);
2510  saba(vform, dst, temp1, temp2);
2511  return dst;
2512 }
2513 
2514 LogicVRegister Simulator::uabdl(VectorFormat vform, LogicVRegister dst,
2515  const LogicVRegister& src1,
2516  const LogicVRegister& src2) {
2517  SimVRegister temp1, temp2;
2518  uxtl(vform, temp1, src1);
2519  uxtl(vform, temp2, src2);
2520  AbsDiff(vform, dst, temp1, temp2, false);
2521  return dst;
2522 }
2523 
2524 LogicVRegister Simulator::uabdl2(VectorFormat vform, LogicVRegister dst,
2525  const LogicVRegister& src1,
2526  const LogicVRegister& src2) {
2527  SimVRegister temp1, temp2;
2528  uxtl2(vform, temp1, src1);
2529  uxtl2(vform, temp2, src2);
2530  AbsDiff(vform, dst, temp1, temp2, false);
2531  return dst;
2532 }
2533 
2534 LogicVRegister Simulator::sabdl(VectorFormat vform, LogicVRegister dst,
2535  const LogicVRegister& src1,
2536  const LogicVRegister& src2) {
2537  SimVRegister temp1, temp2;
2538  sxtl(vform, temp1, src1);
2539  sxtl(vform, temp2, src2);
2540  AbsDiff(vform, dst, temp1, temp2, true);
2541  return dst;
2542 }
2543 
2544 LogicVRegister Simulator::sabdl2(VectorFormat vform, LogicVRegister dst,
2545  const LogicVRegister& src1,
2546  const LogicVRegister& src2) {
2547  SimVRegister temp1, temp2;
2548  sxtl2(vform, temp1, src1);
2549  sxtl2(vform, temp2, src2);
2550  AbsDiff(vform, dst, temp1, temp2, true);
2551  return dst;
2552 }
2553 
2554 LogicVRegister Simulator::umull(VectorFormat vform, LogicVRegister dst,
2555  const LogicVRegister& src1,
2556  const LogicVRegister& src2) {
2557  SimVRegister temp1, temp2;
2558  uxtl(vform, temp1, src1);
2559  uxtl(vform, temp2, src2);
2560  mul(vform, dst, temp1, temp2);
2561  return dst;
2562 }
2563 
2564 LogicVRegister Simulator::umull2(VectorFormat vform, LogicVRegister dst,
2565  const LogicVRegister& src1,
2566  const LogicVRegister& src2) {
2567  SimVRegister temp1, temp2;
2568  uxtl2(vform, temp1, src1);
2569  uxtl2(vform, temp2, src2);
2570  mul(vform, dst, temp1, temp2);
2571  return dst;
2572 }
2573 
2574 LogicVRegister Simulator::smull(VectorFormat vform, LogicVRegister dst,
2575  const LogicVRegister& src1,
2576  const LogicVRegister& src2) {
2577  SimVRegister temp1, temp2;
2578  sxtl(vform, temp1, src1);
2579  sxtl(vform, temp2, src2);
2580  mul(vform, dst, temp1, temp2);
2581  return dst;
2582 }
2583 
2584 LogicVRegister Simulator::smull2(VectorFormat vform, LogicVRegister dst,
2585  const LogicVRegister& src1,
2586  const LogicVRegister& src2) {
2587  SimVRegister temp1, temp2;
2588  sxtl2(vform, temp1, src1);
2589  sxtl2(vform, temp2, src2);
2590  mul(vform, dst, temp1, temp2);
2591  return dst;
2592 }
2593 
2594 LogicVRegister Simulator::umlsl(VectorFormat vform, LogicVRegister dst,
2595  const LogicVRegister& src1,
2596  const LogicVRegister& src2) {
2597  SimVRegister temp1, temp2;
2598  uxtl(vform, temp1, src1);
2599  uxtl(vform, temp2, src2);
2600  mls(vform, dst, temp1, temp2);
2601  return dst;
2602 }
2603 
2604 LogicVRegister Simulator::umlsl2(VectorFormat vform, LogicVRegister dst,
2605  const LogicVRegister& src1,
2606  const LogicVRegister& src2) {
2607  SimVRegister temp1, temp2;
2608  uxtl2(vform, temp1, src1);
2609  uxtl2(vform, temp2, src2);
2610  mls(vform, dst, temp1, temp2);
2611  return dst;
2612 }
2613 
2614 LogicVRegister Simulator::smlsl(VectorFormat vform, LogicVRegister dst,
2615  const LogicVRegister& src1,
2616  const LogicVRegister& src2) {
2617  SimVRegister temp1, temp2;
2618  sxtl(vform, temp1, src1);
2619  sxtl(vform, temp2, src2);
2620  mls(vform, dst, temp1, temp2);
2621  return dst;
2622 }
2623 
2624 LogicVRegister Simulator::smlsl2(VectorFormat vform, LogicVRegister dst,
2625  const LogicVRegister& src1,
2626  const LogicVRegister& src2) {
2627  SimVRegister temp1, temp2;
2628  sxtl2(vform, temp1, src1);
2629  sxtl2(vform, temp2, src2);
2630  mls(vform, dst, temp1, temp2);
2631  return dst;
2632 }
2633 
2634 LogicVRegister Simulator::umlal(VectorFormat vform, LogicVRegister dst,
2635  const LogicVRegister& src1,
2636  const LogicVRegister& src2) {
2637  SimVRegister temp1, temp2;
2638  uxtl(vform, temp1, src1);
2639  uxtl(vform, temp2, src2);
2640  mla(vform, dst, temp1, temp2);
2641  return dst;
2642 }
2643 
2644 LogicVRegister Simulator::umlal2(VectorFormat vform, LogicVRegister dst,
2645  const LogicVRegister& src1,
2646  const LogicVRegister& src2) {
2647  SimVRegister temp1, temp2;
2648  uxtl2(vform, temp1, src1);
2649  uxtl2(vform, temp2, src2);
2650  mla(vform, dst, temp1, temp2);
2651  return dst;
2652 }
2653 
2654 LogicVRegister Simulator::smlal(VectorFormat vform, LogicVRegister dst,
2655  const LogicVRegister& src1,
2656  const LogicVRegister& src2) {
2657  SimVRegister temp1, temp2;
2658  sxtl(vform, temp1, src1);
2659  sxtl(vform, temp2, src2);
2660  mla(vform, dst, temp1, temp2);
2661  return dst;
2662 }
2663 
2664 LogicVRegister Simulator::smlal2(VectorFormat vform, LogicVRegister dst,
2665  const LogicVRegister& src1,
2666  const LogicVRegister& src2) {
2667  SimVRegister temp1, temp2;
2668  sxtl2(vform, temp1, src1);
2669  sxtl2(vform, temp2, src2);
2670  mla(vform, dst, temp1, temp2);
2671  return dst;
2672 }
2673 
2674 LogicVRegister Simulator::sqdmlal(VectorFormat vform, LogicVRegister dst,
2675  const LogicVRegister& src1,
2676  const LogicVRegister& src2) {
2677  SimVRegister temp;
2678  LogicVRegister product = sqdmull(vform, temp, src1, src2);
2679  return add(vform, dst, dst, product).SignedSaturate(vform);
2680 }
2681 
2682 LogicVRegister Simulator::sqdmlal2(VectorFormat vform, LogicVRegister dst,
2683  const LogicVRegister& src1,
2684  const LogicVRegister& src2) {
2685  SimVRegister temp;
2686  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2687  return add(vform, dst, dst, product).SignedSaturate(vform);
2688 }
2689 
2690 LogicVRegister Simulator::sqdmlsl(VectorFormat vform, LogicVRegister dst,
2691  const LogicVRegister& src1,
2692  const LogicVRegister& src2) {
2693  SimVRegister temp;
2694  LogicVRegister product = sqdmull(vform, temp, src1, src2);
2695  return sub(vform, dst, dst, product).SignedSaturate(vform);
2696 }
2697 
2698 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform, LogicVRegister dst,
2699  const LogicVRegister& src1,
2700  const LogicVRegister& src2) {
2701  SimVRegister temp;
2702  LogicVRegister product = sqdmull2(vform, temp, src1, src2);
2703  return sub(vform, dst, dst, product).SignedSaturate(vform);
2704 }
2705 
2706 LogicVRegister Simulator::sqdmull(VectorFormat vform, LogicVRegister dst,
2707  const LogicVRegister& src1,
2708  const LogicVRegister& src2) {
2709  SimVRegister temp;
2710  LogicVRegister product = smull(vform, temp, src1, src2);
2711  return add(vform, dst, product, product).SignedSaturate(vform);
2712 }
2713 
2714 LogicVRegister Simulator::sqdmull2(VectorFormat vform, LogicVRegister dst,
2715  const LogicVRegister& src1,
2716  const LogicVRegister& src2) {
2717  SimVRegister temp;
2718  LogicVRegister product = smull2(vform, temp, src1, src2);
2719  return add(vform, dst, product, product).SignedSaturate(vform);
2720 }
2721 
2722 LogicVRegister Simulator::sqrdmulh(VectorFormat vform, LogicVRegister dst,
2723  const LogicVRegister& src1,
2724  const LogicVRegister& src2, bool round) {
2725  // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
2726  // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
2727  // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
2728 
2729  int esize = LaneSizeInBitsFromFormat(vform);
2730  int round_const = round ? (1 << (esize - 2)) : 0;
2731  int64_t product;
2732 
2733  dst.ClearForWrite(vform);
2734  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2735  product = src1.Int(vform, i) * src2.Int(vform, i);
2736  product += round_const;
2737  product = product >> (esize - 1);
2738 
2739  if (product > MaxIntFromFormat(vform)) {
2740  product = MaxIntFromFormat(vform);
2741  } else if (product < MinIntFromFormat(vform)) {
2742  product = MinIntFromFormat(vform);
2743  }
2744  dst.SetInt(vform, i, product);
2745  }
2746  return dst;
2747 }
2748 
2749 LogicVRegister Simulator::sqdmulh(VectorFormat vform, LogicVRegister dst,
2750  const LogicVRegister& src1,
2751  const LogicVRegister& src2) {
2752  return sqrdmulh(vform, dst, src1, src2, false);
2753 }
2754 
2755 LogicVRegister Simulator::addhn(VectorFormat vform, LogicVRegister dst,
2756  const LogicVRegister& src1,
2757  const LogicVRegister& src2) {
2758  SimVRegister temp;
2759  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2760  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2761  return dst;
2762 }
2763 
2764 LogicVRegister Simulator::addhn2(VectorFormat vform, LogicVRegister dst,
2765  const LogicVRegister& src1,
2766  const LogicVRegister& src2) {
2767  SimVRegister temp;
2768  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2769  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2770  return dst;
2771 }
2772 
2773 LogicVRegister Simulator::raddhn(VectorFormat vform, LogicVRegister dst,
2774  const LogicVRegister& src1,
2775  const LogicVRegister& src2) {
2776  SimVRegister temp;
2777  add(VectorFormatDoubleWidth(vform), temp, src1, src2);
2778  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2779  return dst;
2780 }
2781 
2782 LogicVRegister Simulator::raddhn2(VectorFormat vform, LogicVRegister dst,
2783  const LogicVRegister& src1,
2784  const LogicVRegister& src2) {
2785  SimVRegister temp;
2786  add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2787  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2788  return dst;
2789 }
2790 
2791 LogicVRegister Simulator::subhn(VectorFormat vform, LogicVRegister dst,
2792  const LogicVRegister& src1,
2793  const LogicVRegister& src2) {
2794  SimVRegister temp;
2795  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2796  shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2797  return dst;
2798 }
2799 
2800 LogicVRegister Simulator::subhn2(VectorFormat vform, LogicVRegister dst,
2801  const LogicVRegister& src1,
2802  const LogicVRegister& src2) {
2803  SimVRegister temp;
2804  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2805  shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2806  return dst;
2807 }
2808 
2809 LogicVRegister Simulator::rsubhn(VectorFormat vform, LogicVRegister dst,
2810  const LogicVRegister& src1,
2811  const LogicVRegister& src2) {
2812  SimVRegister temp;
2813  sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
2814  rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2815  return dst;
2816 }
2817 
2818 LogicVRegister Simulator::rsubhn2(VectorFormat vform, LogicVRegister dst,
2819  const LogicVRegister& src1,
2820  const LogicVRegister& src2) {
2821  SimVRegister temp;
2822  sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
2823  rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
2824  return dst;
2825 }
2826 
2827 LogicVRegister Simulator::trn1(VectorFormat vform, LogicVRegister dst,
2828  const LogicVRegister& src1,
2829  const LogicVRegister& src2) {
2830  uint64_t result[16];
2831  int laneCount = LaneCountFromFormat(vform);
2832  int pairs = laneCount / 2;
2833  for (int i = 0; i < pairs; ++i) {
2834  result[2 * i] = src1.Uint(vform, 2 * i);
2835  result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
2836  }
2837 
2838  dst.SetUintArray(vform, result);
2839  return dst;
2840 }
2841 
2842 LogicVRegister Simulator::trn2(VectorFormat vform, LogicVRegister dst,
2843  const LogicVRegister& src1,
2844  const LogicVRegister& src2) {
2845  uint64_t result[16];
2846  int laneCount = LaneCountFromFormat(vform);
2847  int pairs = laneCount / 2;
2848  for (int i = 0; i < pairs; ++i) {
2849  result[2 * i] = src1.Uint(vform, (2 * i) + 1);
2850  result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
2851  }
2852 
2853  dst.SetUintArray(vform, result);
2854  return dst;
2855 }
2856 
2857 LogicVRegister Simulator::zip1(VectorFormat vform, LogicVRegister dst,
2858  const LogicVRegister& src1,
2859  const LogicVRegister& src2) {
2860  uint64_t result[16];
2861  int laneCount = LaneCountFromFormat(vform);
2862  int pairs = laneCount / 2;
2863  for (int i = 0; i < pairs; ++i) {
2864  result[2 * i] = src1.Uint(vform, i);
2865  result[(2 * i) + 1] = src2.Uint(vform, i);
2866  }
2867 
2868  dst.SetUintArray(vform, result);
2869  return dst;
2870 }
2871 
2872 LogicVRegister Simulator::zip2(VectorFormat vform, LogicVRegister dst,
2873  const LogicVRegister& src1,
2874  const LogicVRegister& src2) {
2875  uint64_t result[16];
2876  int laneCount = LaneCountFromFormat(vform);
2877  int pairs = laneCount / 2;
2878  for (int i = 0; i < pairs; ++i) {
2879  result[2 * i] = src1.Uint(vform, pairs + i);
2880  result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
2881  }
2882 
2883  dst.SetUintArray(vform, result);
2884  return dst;
2885 }
2886 
2887 LogicVRegister Simulator::uzp1(VectorFormat vform, LogicVRegister dst,
2888  const LogicVRegister& src1,
2889  const LogicVRegister& src2) {
2890  uint64_t result[32];
2891  int laneCount = LaneCountFromFormat(vform);
2892  for (int i = 0; i < laneCount; ++i) {
2893  result[i] = src1.Uint(vform, i);
2894  result[laneCount + i] = src2.Uint(vform, i);
2895  }
2896 
2897  dst.ClearForWrite(vform);
2898  for (int i = 0; i < laneCount; ++i) {
2899  dst.SetUint(vform, i, result[2 * i]);
2900  }
2901  return dst;
2902 }
2903 
2904 LogicVRegister Simulator::uzp2(VectorFormat vform, LogicVRegister dst,
2905  const LogicVRegister& src1,
2906  const LogicVRegister& src2) {
2907  uint64_t result[32];
2908  int laneCount = LaneCountFromFormat(vform);
2909  for (int i = 0; i < laneCount; ++i) {
2910  result[i] = src1.Uint(vform, i);
2911  result[laneCount + i] = src2.Uint(vform, i);
2912  }
2913 
2914  dst.ClearForWrite(vform);
2915  for (int i = 0; i < laneCount; ++i) {
2916  dst.SetUint(vform, i, result[(2 * i) + 1]);
2917  }
2918  return dst;
2919 }
2920 
2921 template <typename T>
2922 T Simulator::FPAdd(T op1, T op2) {
2923  T result = FPProcessNaNs(op1, op2);
2924  if (std::isnan(result)) return result;
2925 
2926  if (std::isinf(op1) && std::isinf(op2) && (op1 != op2)) {
2927  // inf + -inf returns the default NaN.
2928  FPProcessException();
2929  return FPDefaultNaN<T>();
2930  } else {
2931  // Other cases should be handled by standard arithmetic.
2932  return op1 + op2;
2933  }
2934 }
2935 
2936 template <typename T>
2937 T Simulator::FPSub(T op1, T op2) {
2938  // NaNs should be handled elsewhere.
2939  DCHECK(!std::isnan(op1) && !std::isnan(op2));
2940 
2941  if (std::isinf(op1) && std::isinf(op2) && (op1 == op2)) {
2942  // inf - inf returns the default NaN.
2943  FPProcessException();
2944  return FPDefaultNaN<T>();
2945  } else {
2946  // Other cases should be handled by standard arithmetic.
2947  return op1 - op2;
2948  }
2949 }
2950 
2951 template <typename T>
2952 T Simulator::FPMul(T op1, T op2) {
2953  // NaNs should be handled elsewhere.
2954  DCHECK(!std::isnan(op1) && !std::isnan(op2));
2955 
2956  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2957  // inf * 0.0 returns the default NaN.
2958  FPProcessException();
2959  return FPDefaultNaN<T>();
2960  } else {
2961  // Other cases should be handled by standard arithmetic.
2962  return op1 * op2;
2963  }
2964 }
2965 
2966 template <typename T>
2967 T Simulator::FPMulx(T op1, T op2) {
2968  if ((std::isinf(op1) && (op2 == 0.0)) || (std::isinf(op2) && (op1 == 0.0))) {
2969  // inf * 0.0 returns +/-2.0.
2970  T two = 2.0;
2971  return copysign(1.0, op1) * copysign(1.0, op2) * two;
2972  }
2973  return FPMul(op1, op2);
2974 }
2975 
2976 template <typename T>
2977 T Simulator::FPMulAdd(T a, T op1, T op2) {
2978  T result = FPProcessNaNs3(a, op1, op2);
2979 
2980  T sign_a = copysign(1.0, a);
2981  T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
2982  bool isinf_prod = std::isinf(op1) || std::isinf(op2);
2983  bool operation_generates_nan =
2984  (std::isinf(op1) && (op2 == 0.0)) || // inf * 0.0
2985  (std::isinf(op2) && (op1 == 0.0)) || // 0.0 * inf
2986  (std::isinf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
2987 
2988  if (std::isnan(result)) {
2989  // Generated NaNs override quiet NaNs propagated from a.
2990  if (operation_generates_nan && IsQuietNaN(a)) {
2991  FPProcessException();
2992  return FPDefaultNaN<T>();
2993  } else {
2994  return result;
2995  }
2996  }
2997 
2998  // If the operation would produce a NaN, return the default NaN.
2999  if (operation_generates_nan) {
3000  FPProcessException();
3001  return FPDefaultNaN<T>();
3002  }
3003 
3004  // Work around broken fma implementations for exact zero results: The sign of
3005  // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3006  if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3007  return ((sign_a < 0) && (sign_prod < 0)) ? -0.0 : 0.0;
3008  }
3009 
3010  result = FusedMultiplyAdd(op1, op2, a);
3011  DCHECK(!std::isnan(result));
3012 
3013  // Work around broken fma implementations for rounded zero results: If a is
3014  // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3015  if ((a == 0.0) && (result == 0.0)) {
3016  return copysign(0.0, sign_prod);
3017  }
3018 
3019  return result;
3020 }
3021 
3022 template <typename T>
3023 T Simulator::FPDiv(T op1, T op2) {
3024  // NaNs should be handled elsewhere.
3025  DCHECK(!std::isnan(op1) && !std::isnan(op2));
3026 
3027  if ((std::isinf(op1) && std::isinf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3028  // inf / inf and 0.0 / 0.0 return the default NaN.
3029  FPProcessException();
3030  return FPDefaultNaN<T>();
3031  } else {
3032  if (op2 == 0.0) {
3033  FPProcessException();
3034  if (!std::isnan(op1)) {
3035  double op1_sign = copysign(1.0, op1);
3036  double op2_sign = copysign(1.0, op2);
3037  return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3038  }
3039  }
3040 
3041  // Other cases should be handled by standard arithmetic.
3042  return op1 / op2;
3043  }
3044 }
3045 
3046 template <typename T>
3047 T Simulator::FPSqrt(T op) {
3048  if (std::isnan(op)) {
3049  return FPProcessNaN(op);
3050  } else if (op < 0.0) {
3051  FPProcessException();
3052  return FPDefaultNaN<T>();
3053  } else {
3054  return sqrt(op);
3055  }
3056 }
3057 
3058 template <typename T>
3059 T Simulator::FPMax(T a, T b) {
3060  T result = FPProcessNaNs(a, b);
3061  if (std::isnan(result)) return result;
3062 
3063  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3064  // a and b are zero, and the sign differs: return +0.0.
3065  return 0.0;
3066  } else {
3067  return (a > b) ? a : b;
3068  }
3069 }
3070 
3071 template <typename T>
3072 T Simulator::FPMaxNM(T a, T b) {
3073  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3074  a = kFP64NegativeInfinity;
3075  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3076  b = kFP64NegativeInfinity;
3077  }
3078 
3079  T result = FPProcessNaNs(a, b);
3080  return std::isnan(result) ? result : FPMax(a, b);
3081 }
3082 
3083 template <typename T>
3084 T Simulator::FPMin(T a, T b) {
3085  T result = FPProcessNaNs(a, b);
3086  if (std::isnan(result)) return result;
3087 
3088  if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3089  // a and b are zero, and the sign differs: return -0.0.
3090  return -0.0;
3091  } else {
3092  return (a < b) ? a : b;
3093  }
3094 }
3095 
3096 template <typename T>
3097 T Simulator::FPMinNM(T a, T b) {
3098  if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3099  a = kFP64PositiveInfinity;
3100  } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3101  b = kFP64PositiveInfinity;
3102  }
3103 
3104  T result = FPProcessNaNs(a, b);
3105  return std::isnan(result) ? result : FPMin(a, b);
3106 }
3107 
3108 template <typename T>
3109 T Simulator::FPRecipStepFused(T op1, T op2) {
3110  const T two = 2.0;
3111  if ((std::isinf(op1) && (op2 == 0.0)) ||
3112  ((op1 == 0.0) && (std::isinf(op2)))) {
3113  return two;
3114  } else if (std::isinf(op1) || std::isinf(op2)) {
3115  // Return +inf if signs match, otherwise -inf.
3116  return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3117  : kFP64NegativeInfinity;
3118  } else {
3119  return FusedMultiplyAdd(op1, op2, two);
3120  }
3121 }
3122 
3123 template <typename T>
3124 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3125  const T one_point_five = 1.5;
3126  const T two = 2.0;
3127 
3128  if ((std::isinf(op1) && (op2 == 0.0)) ||
3129  ((op1 == 0.0) && (std::isinf(op2)))) {
3130  return one_point_five;
3131  } else if (std::isinf(op1) || std::isinf(op2)) {
3132  // Return +inf if signs match, otherwise -inf.
3133  return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3134  : kFP64NegativeInfinity;
3135  } else {
3136  // The multiply-add-halve operation must be fully fused, so avoid interim
3137  // rounding by checking which operand can be losslessly divided by two
3138  // before doing the multiply-add.
3139  if (std::isnormal(op1 / two)) {
3140  return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3141  } else if (std::isnormal(op2 / two)) {
3142  return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3143  } else {
3144  // Neither operand is normal after halving: the result is dominated by
3145  // the addition term, so just return that.
3146  return one_point_five;
3147  }
3148  }
3149 }
3150 
3151 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
3152  if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3153  (value == kFP64NegativeInfinity)) {
3154  return value;
3155  } else if (std::isnan(value)) {
3156  return FPProcessNaN(value);
3157  }
3158 
3159  double int_result = std::floor(value);
3160  double error = value - int_result;
3161  switch (round_mode) {
3162  case FPTieAway: {
3163  // Take care of correctly handling the range ]-0.5, -0.0], which must
3164  // yield -0.0.
3165  if ((-0.5 < value) && (value < 0.0)) {
3166  int_result = -0.0;
3167 
3168  } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
3169  // If the error is greater than 0.5, or is equal to 0.5 and the integer
3170  // result is positive, round up.
3171  int_result++;
3172  }
3173  break;
3174  }
3175  case FPTieEven: {
3176  // Take care of correctly handling the range [-0.5, -0.0], which must
3177  // yield -0.0.
3178  if ((-0.5 <= value) && (value < 0.0)) {
3179  int_result = -0.0;
3180 
3181  // If the error is greater than 0.5, or is equal to 0.5 and the integer
3182  // result is odd, round up.
3183  } else if ((error > 0.5) ||
3184  ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
3185  int_result++;
3186  }
3187  break;
3188  }
3189  case FPZero: {
3190  // If value>0 then we take floor(value)
3191  // otherwise, ceil(value).
3192  if (value < 0) {
3193  int_result = ceil(value);
3194  }
3195  break;
3196  }
3197  case FPNegativeInfinity: {
3198  // We always use floor(value).
3199  break;
3200  }
3201  case FPPositiveInfinity: {
3202  // Take care of correctly handling the range ]-1.0, -0.0], which must
3203  // yield -0.0.
3204  if ((-1.0 < value) && (value < 0.0)) {
3205  int_result = -0.0;
3206 
3207  // If the error is non-zero, round up.
3208  } else if (error > 0.0) {
3209  int_result++;
3210  }
3211  break;
3212  }
3213  default:
3214  UNIMPLEMENTED();
3215  }
3216  return int_result;
3217 }
3218 
3219 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
3220  value = FPRoundInt(value, rmode);
3221  if (value >= kWMaxInt) {
3222  return kWMaxInt;
3223  } else if (value < kWMinInt) {
3224  return kWMinInt;
3225  }
3226  return std::isnan(value) ? 0 : static_cast<int32_t>(value);
3227 }
3228 
3229 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
3230  value = FPRoundInt(value, rmode);
3231  if (value >= kXMaxInt) {
3232  return kXMaxInt;
3233  } else if (value < kXMinInt) {
3234  return kXMinInt;
3235  }
3236  return std::isnan(value) ? 0 : static_cast<int64_t>(value);
3237 }
3238 
3239 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
3240  value = FPRoundInt(value, rmode);
3241  if (value >= kWMaxUInt) {
3242  return kWMaxUInt;
3243  } else if (value < 0.0) {
3244  return 0;
3245  }
3246  return std::isnan(value) ? 0 : static_cast<uint32_t>(value);
3247 }
3248 
3249 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
3250  value = FPRoundInt(value, rmode);
3251  if (value >= kXMaxUInt) {
3252  return kXMaxUInt;
3253  } else if (value < 0.0) {
3254  return 0;
3255  }
3256  return std::isnan(value) ? 0 : static_cast<uint64_t>(value);
3257 }
3258 
3259 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
3260  template <typename T> \
3261  LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3262  const LogicVRegister& src1, \
3263  const LogicVRegister& src2) { \
3264  dst.ClearForWrite(vform); \
3265  for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
3266  T op1 = src1.Float<T>(i); \
3267  T op2 = src2.Float<T>(i); \
3268  T result; \
3269  if (PROCNAN) { \
3270  result = FPProcessNaNs(op1, op2); \
3271  if (!std::isnan(result)) { \
3272  result = OP(op1, op2); \
3273  } \
3274  } else { \
3275  result = OP(op1, op2); \
3276  } \
3277  dst.SetFloat(i, result); \
3278  } \
3279  return dst; \
3280  } \
3281  \
3282  LogicVRegister Simulator::FN(VectorFormat vform, LogicVRegister dst, \
3283  const LogicVRegister& src1, \
3284  const LogicVRegister& src2) { \
3285  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) { \
3286  FN<float>(vform, dst, src1, src2); \
3287  } else { \
3288  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize); \
3289  FN<double>(vform, dst, src1, src2); \
3290  } \
3291  return dst; \
3292  }
3293 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
3294 #undef DEFINE_NEON_FP_VECTOR_OP
3295 
3296 LogicVRegister Simulator::fnmul(VectorFormat vform, LogicVRegister dst,
3297  const LogicVRegister& src1,
3298  const LogicVRegister& src2) {
3299  SimVRegister temp;
3300  LogicVRegister product = fmul(vform, temp, src1, src2);
3301  return fneg(vform, dst, product);
3302 }
3303 
3304 template <typename T>
3305 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3306  const LogicVRegister& src1,
3307  const LogicVRegister& src2) {
3308  dst.ClearForWrite(vform);
3309  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3310  T op1 = -src1.Float<T>(i);
3311  T op2 = src2.Float<T>(i);
3312  T result = FPProcessNaNs(op1, op2);
3313  dst.SetFloat(i, std::isnan(result) ? result : FPRecipStepFused(op1, op2));
3314  }
3315  return dst;
3316 }
3317 
3318 LogicVRegister Simulator::frecps(VectorFormat vform, LogicVRegister dst,
3319  const LogicVRegister& src1,
3320  const LogicVRegister& src2) {
3321  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3322  frecps<float>(vform, dst, src1, src2);
3323  } else {
3324  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3325  frecps<double>(vform, dst, src1, src2);
3326  }
3327  return dst;
3328 }
3329 
3330 template <typename T>
3331 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3332  const LogicVRegister& src1,
3333  const LogicVRegister& src2) {
3334  dst.ClearForWrite(vform);
3335  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3336  T op1 = -src1.Float<T>(i);
3337  T op2 = src2.Float<T>(i);
3338  T result = FPProcessNaNs(op1, op2);
3339  dst.SetFloat(i, std::isnan(result) ? result : FPRSqrtStepFused(op1, op2));
3340  }
3341  return dst;
3342 }
3343 
3344 LogicVRegister Simulator::frsqrts(VectorFormat vform, LogicVRegister dst,
3345  const LogicVRegister& src1,
3346  const LogicVRegister& src2) {
3347  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3348  frsqrts<float>(vform, dst, src1, src2);
3349  } else {
3350  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3351  frsqrts<double>(vform, dst, src1, src2);
3352  }
3353  return dst;
3354 }
3355 
3356 template <typename T>
3357 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3358  const LogicVRegister& src1,
3359  const LogicVRegister& src2, Condition cond) {
3360  dst.ClearForWrite(vform);
3361  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3362  bool result = false;
3363  T op1 = src1.Float<T>(i);
3364  T op2 = src2.Float<T>(i);
3365  T nan_result = FPProcessNaNs(op1, op2);
3366  if (!std::isnan(nan_result)) {
3367  switch (cond) {
3368  case eq:
3369  result = (op1 == op2);
3370  break;
3371  case ge:
3372  result = (op1 >= op2);
3373  break;
3374  case gt:
3375  result = (op1 > op2);
3376  break;
3377  case le:
3378  result = (op1 <= op2);
3379  break;
3380  case lt:
3381  result = (op1 < op2);
3382  break;
3383  default:
3384  UNREACHABLE();
3385  }
3386  }
3387  dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
3388  }
3389  return dst;
3390 }
3391 
3392 LogicVRegister Simulator::fcmp(VectorFormat vform, LogicVRegister dst,
3393  const LogicVRegister& src1,
3394  const LogicVRegister& src2, Condition cond) {
3395  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3396  fcmp<float>(vform, dst, src1, src2, cond);
3397  } else {
3398  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3399  fcmp<double>(vform, dst, src1, src2, cond);
3400  }
3401  return dst;
3402 }
3403 
3404 LogicVRegister Simulator::fcmp_zero(VectorFormat vform, LogicVRegister dst,
3405  const LogicVRegister& src, Condition cond) {
3406  SimVRegister temp;
3407  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3408  LogicVRegister zero_reg =
3409  dup_immediate(vform, temp, bit_cast<uint32_t>(0.0f));
3410  fcmp<float>(vform, dst, src, zero_reg, cond);
3411  } else {
3412  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3413  LogicVRegister zero_reg =
3414  dup_immediate(vform, temp, bit_cast<uint64_t>(0.0));
3415  fcmp<double>(vform, dst, src, zero_reg, cond);
3416  }
3417  return dst;
3418 }
3419 
3420 LogicVRegister Simulator::fabscmp(VectorFormat vform, LogicVRegister dst,
3421  const LogicVRegister& src1,
3422  const LogicVRegister& src2, Condition cond) {
3423  SimVRegister temp1, temp2;
3424  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3425  LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
3426  LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
3427  fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
3428  } else {
3429  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3430  LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
3431  LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
3432  fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
3433  }
3434  return dst;
3435 }
3436 
3437 template <typename T>
3438 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3439  const LogicVRegister& src1,
3440  const LogicVRegister& src2) {
3441  dst.ClearForWrite(vform);
3442  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3443  T op1 = src1.Float<T>(i);
3444  T op2 = src2.Float<T>(i);
3445  T acc = dst.Float<T>(i);
3446  T result = FPMulAdd(acc, op1, op2);
3447  dst.SetFloat(i, result);
3448  }
3449  return dst;
3450 }
3451 
3452 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3453  const LogicVRegister& src1,
3454  const LogicVRegister& src2) {
3455  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3456  fmla<float>(vform, dst, src1, src2);
3457  } else {
3458  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3459  fmla<double>(vform, dst, src1, src2);
3460  }
3461  return dst;
3462 }
3463 
3464 template <typename T>
3465 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3466  const LogicVRegister& src1,
3467  const LogicVRegister& src2) {
3468  dst.ClearForWrite(vform);
3469  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3470  T op1 = -src1.Float<T>(i);
3471  T op2 = src2.Float<T>(i);
3472  T acc = dst.Float<T>(i);
3473  T result = FPMulAdd(acc, op1, op2);
3474  dst.SetFloat(i, result);
3475  }
3476  return dst;
3477 }
3478 
3479 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3480  const LogicVRegister& src1,
3481  const LogicVRegister& src2) {
3482  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3483  fmls<float>(vform, dst, src1, src2);
3484  } else {
3485  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3486  fmls<double>(vform, dst, src1, src2);
3487  }
3488  return dst;
3489 }
3490 
3491 template <typename T>
3492 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3493  const LogicVRegister& src) {
3494  dst.ClearForWrite(vform);
3495  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3496  T op = src.Float<T>(i);
3497  op = -op;
3498  dst.SetFloat(i, op);
3499  }
3500  return dst;
3501 }
3502 
3503 LogicVRegister Simulator::fneg(VectorFormat vform, LogicVRegister dst,
3504  const LogicVRegister& src) {
3505  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3506  fneg<float>(vform, dst, src);
3507  } else {
3508  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3509  fneg<double>(vform, dst, src);
3510  }
3511  return dst;
3512 }
3513 
3514 template <typename T>
3515 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3516  const LogicVRegister& src) {
3517  dst.ClearForWrite(vform);
3518  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3519  T op = src.Float<T>(i);
3520  if (copysign(1.0, op) < 0.0) {
3521  op = -op;
3522  }
3523  dst.SetFloat(i, op);
3524  }
3525  return dst;
3526 }
3527 
3528 LogicVRegister Simulator::fabs_(VectorFormat vform, LogicVRegister dst,
3529  const LogicVRegister& src) {
3530  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3531  fabs_<float>(vform, dst, src);
3532  } else {
3533  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3534  fabs_<double>(vform, dst, src);
3535  }
3536  return dst;
3537 }
3538 
3539 LogicVRegister Simulator::fabd(VectorFormat vform, LogicVRegister dst,
3540  const LogicVRegister& src1,
3541  const LogicVRegister& src2) {
3542  SimVRegister temp;
3543  fsub(vform, temp, src1, src2);
3544  fabs_(vform, dst, temp);
3545  return dst;
3546 }
3547 
3548 LogicVRegister Simulator::fsqrt(VectorFormat vform, LogicVRegister dst,
3549  const LogicVRegister& src) {
3550  dst.ClearForWrite(vform);
3551  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3552  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3553  float result = FPSqrt(src.Float<float>(i));
3554  dst.SetFloat(i, result);
3555  }
3556  } else {
3557  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3558  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3559  double result = FPSqrt(src.Float<double>(i));
3560  dst.SetFloat(i, result);
3561  }
3562  }
3563  return dst;
3564 }
3565 
3566 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
3567  LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3568  const LogicVRegister& src1, \
3569  const LogicVRegister& src2) { \
3570  SimVRegister temp1, temp2; \
3571  uzp1(vform, temp1, src1, src2); \
3572  uzp2(vform, temp2, src1, src2); \
3573  FN(vform, dst, temp1, temp2); \
3574  return dst; \
3575  } \
3576  \
3577  LogicVRegister Simulator::FNP(VectorFormat vform, LogicVRegister dst, \
3578  const LogicVRegister& src) { \
3579  if (vform == kFormatS) { \
3580  float result = OP(src.Float<float>(0), src.Float<float>(1)); \
3581  dst.SetFloat(0, result); \
3582  } else { \
3583  DCHECK_EQ(vform, kFormatD); \
3584  double result = OP(src.Float<double>(0), src.Float<double>(1)); \
3585  dst.SetFloat(0, result); \
3586  } \
3587  dst.ClearForWrite(vform); \
3588  return dst; \
3589  }
3590 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
3591 #undef DEFINE_NEON_FP_PAIR_OP
3592 
3593 LogicVRegister Simulator::FMinMaxV(VectorFormat vform, LogicVRegister dst,
3594  const LogicVRegister& src, FPMinMaxOp Op) {
3595  DCHECK_EQ(vform, kFormat4S);
3596  USE(vform);
3597  float result1 = (this->*Op)(src.Float<float>(0), src.Float<float>(1));
3598  float result2 = (this->*Op)(src.Float<float>(2), src.Float<float>(3));
3599  float result = (this->*Op)(result1, result2);
3600  dst.ClearForWrite(kFormatS);
3601  dst.SetFloat<float>(0, result);
3602  return dst;
3603 }
3604 
3605 LogicVRegister Simulator::fmaxv(VectorFormat vform, LogicVRegister dst,
3606  const LogicVRegister& src) {
3607  return FMinMaxV(vform, dst, src, &Simulator::FPMax);
3608 }
3609 
3610 LogicVRegister Simulator::fminv(VectorFormat vform, LogicVRegister dst,
3611  const LogicVRegister& src) {
3612  return FMinMaxV(vform, dst, src, &Simulator::FPMin);
3613 }
3614 
3615 LogicVRegister Simulator::fmaxnmv(VectorFormat vform, LogicVRegister dst,
3616  const LogicVRegister& src) {
3617  return FMinMaxV(vform, dst, src, &Simulator::FPMaxNM);
3618 }
3619 
3620 LogicVRegister Simulator::fminnmv(VectorFormat vform, LogicVRegister dst,
3621  const LogicVRegister& src) {
3622  return FMinMaxV(vform, dst, src, &Simulator::FPMinNM);
3623 }
3624 
3625 LogicVRegister Simulator::fmul(VectorFormat vform, LogicVRegister dst,
3626  const LogicVRegister& src1,
3627  const LogicVRegister& src2, int index) {
3628  dst.ClearForWrite(vform);
3629  SimVRegister temp;
3630  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3631  LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3632  fmul<float>(vform, dst, src1, index_reg);
3633  } else {
3634  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3635  LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3636  fmul<double>(vform, dst, src1, index_reg);
3637  }
3638  return dst;
3639 }
3640 
3641 LogicVRegister Simulator::fmla(VectorFormat vform, LogicVRegister dst,
3642  const LogicVRegister& src1,
3643  const LogicVRegister& src2, int index) {
3644  dst.ClearForWrite(vform);
3645  SimVRegister temp;
3646  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3647  LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3648  fmla<float>(vform, dst, src1, index_reg);
3649  } else {
3650  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3651  LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3652  fmla<double>(vform, dst, src1, index_reg);
3653  }
3654  return dst;
3655 }
3656 
3657 LogicVRegister Simulator::fmls(VectorFormat vform, LogicVRegister dst,
3658  const LogicVRegister& src1,
3659  const LogicVRegister& src2, int index) {
3660  dst.ClearForWrite(vform);
3661  SimVRegister temp;
3662  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3663  LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3664  fmls<float>(vform, dst, src1, index_reg);
3665  } else {
3666  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3667  LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3668  fmls<double>(vform, dst, src1, index_reg);
3669  }
3670  return dst;
3671 }
3672 
3673 LogicVRegister Simulator::fmulx(VectorFormat vform, LogicVRegister dst,
3674  const LogicVRegister& src1,
3675  const LogicVRegister& src2, int index) {
3676  dst.ClearForWrite(vform);
3677  SimVRegister temp;
3678  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3679  LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
3680  fmulx<float>(vform, dst, src1, index_reg);
3681 
3682  } else {
3683  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3684  LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
3685  fmulx<double>(vform, dst, src1, index_reg);
3686  }
3687  return dst;
3688 }
3689 
3690 LogicVRegister Simulator::frint(VectorFormat vform, LogicVRegister dst,
3691  const LogicVRegister& src,
3692  FPRounding rounding_mode,
3693  bool inexact_exception) {
3694  dst.ClearForWrite(vform);
3695  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3696  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3697  float input = src.Float<float>(i);
3698  float rounded = FPRoundInt(input, rounding_mode);
3699  if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3700  FPProcessException();
3701  }
3702  dst.SetFloat<float>(i, rounded);
3703  }
3704  } else {
3705  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3706  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3707  double input = src.Float<double>(i);
3708  double rounded = FPRoundInt(input, rounding_mode);
3709  if (inexact_exception && !std::isnan(input) && (input != rounded)) {
3710  FPProcessException();
3711  }
3712  dst.SetFloat<double>(i, rounded);
3713  }
3714  }
3715  return dst;
3716 }
3717 
3718 LogicVRegister Simulator::fcvts(VectorFormat vform, LogicVRegister dst,
3719  const LogicVRegister& src,
3720  FPRounding rounding_mode, int fbits) {
3721  dst.ClearForWrite(vform);
3722  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3723  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3724  float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3725  dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
3726  }
3727  } else {
3728  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3729  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3730  double op = src.Float<double>(i) * std::pow(2.0, fbits);
3731  dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
3732  }
3733  }
3734  return dst;
3735 }
3736 
3737 LogicVRegister Simulator::fcvtu(VectorFormat vform, LogicVRegister dst,
3738  const LogicVRegister& src,
3739  FPRounding rounding_mode, int fbits) {
3740  dst.ClearForWrite(vform);
3741  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3742  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3743  float op = src.Float<float>(i) * std::pow(2.0f, fbits);
3744  dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
3745  }
3746  } else {
3747  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3748  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3749  double op = src.Float<double>(i) * std::pow(2.0, fbits);
3750  dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
3751  }
3752  }
3753  return dst;
3754 }
3755 
3756 LogicVRegister Simulator::fcvtl(VectorFormat vform, LogicVRegister dst,
3757  const LogicVRegister& src) {
3758  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3759  for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3760  dst.SetFloat(i, FPToFloat(src.Float<float16>(i)));
3761  }
3762  } else {
3763  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3764  for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
3765  dst.SetFloat(i, FPToDouble(src.Float<float>(i)));
3766  }
3767  }
3768  return dst;
3769 }
3770 
3771 LogicVRegister Simulator::fcvtl2(VectorFormat vform, LogicVRegister dst,
3772  const LogicVRegister& src) {
3773  int lane_count = LaneCountFromFormat(vform);
3774  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3775  for (int i = 0; i < lane_count; i++) {
3776  dst.SetFloat(i, FPToFloat(src.Float<float16>(i + lane_count)));
3777  }
3778  } else {
3779  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3780  for (int i = 0; i < lane_count; i++) {
3781  dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count)));
3782  }
3783  }
3784  return dst;
3785 }
3786 
3787 LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
3788  const LogicVRegister& src) {
3789  if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3790  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3791  dst.SetFloat(i, FPToFloat16(src.Float<float>(i), FPTieEven));
3792  }
3793  } else {
3794  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3795  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3796  dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
3797  }
3798  }
3799  return dst;
3800 }
3801 
3802 LogicVRegister Simulator::fcvtn2(VectorFormat vform, LogicVRegister dst,
3803  const LogicVRegister& src) {
3804  int lane_count = LaneCountFromFormat(vform) / 2;
3805  if (LaneSizeInBytesFromFormat(vform) == kHRegSize) {
3806  for (int i = lane_count - 1; i >= 0; i--) {
3807  dst.SetFloat(i + lane_count, FPToFloat16(src.Float<float>(i), FPTieEven));
3808  }
3809  } else {
3810  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3811  for (int i = lane_count - 1; i >= 0; i--) {
3812  dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPTieEven));
3813  }
3814  }
3815  return dst;
3816 }
3817 
3818 LogicVRegister Simulator::fcvtxn(VectorFormat vform, LogicVRegister dst,
3819  const LogicVRegister& src) {
3820  dst.ClearForWrite(vform);
3821  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3822  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3823  dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd));
3824  }
3825  return dst;
3826 }
3827 
3828 LogicVRegister Simulator::fcvtxn2(VectorFormat vform, LogicVRegister dst,
3829  const LogicVRegister& src) {
3830  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kSRegSize);
3831  int lane_count = LaneCountFromFormat(vform) / 2;
3832  for (int i = lane_count - 1; i >= 0; i--) {
3833  dst.SetFloat(i + lane_count, FPToFloat(src.Float<double>(i), FPRoundOdd));
3834  }
3835  return dst;
3836 }
3837 
3838 // Based on reference C function recip_sqrt_estimate from ARM ARM.
3839 double Simulator::recip_sqrt_estimate(double a) {
3840  int q0, q1, s;
3841  double r;
3842  if (a < 0.5) {
3843  q0 = static_cast<int>(a * 512.0);
3844  r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
3845  } else {
3846  q1 = static_cast<int>(a * 256.0);
3847  r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
3848  }
3849  s = static_cast<int>(256.0 * r + 0.5);
3850  return static_cast<double>(s) / 256.0;
3851 }
3852 
3853 namespace {
3854 
3855 inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
3856  return unsigned_bitextract_64(start_bit, end_bit, val);
3857 }
3858 
3859 } // anonymous namespace
3860 
3861 template <typename T>
3862 T Simulator::FPRecipSqrtEstimate(T op) {
3863  static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3864  "T must be a float or double");
3865 
3866  if (std::isnan(op)) {
3867  return FPProcessNaN(op);
3868  } else if (op == 0.0) {
3869  if (copysign(1.0, op) < 0.0) {
3870  return kFP64NegativeInfinity;
3871  } else {
3872  return kFP64PositiveInfinity;
3873  }
3874  } else if (copysign(1.0, op) < 0.0) {
3875  FPProcessException();
3876  return FPDefaultNaN<T>();
3877  } else if (std::isinf(op)) {
3878  return 0.0;
3879  } else {
3880  uint64_t fraction;
3881  int32_t exp, result_exp;
3882 
3883  if (sizeof(T) == sizeof(float)) {
3884  exp = static_cast<int32_t>(float_exp(op));
3885  fraction = float_mantissa(op);
3886  fraction <<= 29;
3887  } else {
3888  exp = static_cast<int32_t>(double_exp(op));
3889  fraction = double_mantissa(op);
3890  }
3891 
3892  if (exp == 0) {
3893  while (Bits(fraction, 51, 51) == 0) {
3894  fraction = Bits(fraction, 50, 0) << 1;
3895  exp -= 1;
3896  }
3897  fraction = Bits(fraction, 50, 0) << 1;
3898  }
3899 
3900  double scaled;
3901  if (Bits(exp, 0, 0) == 0) {
3902  scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
3903  } else {
3904  scaled = double_pack(0, 1021, Bits(fraction, 51, 44) << 44);
3905  }
3906 
3907  if (sizeof(T) == sizeof(float)) {
3908  result_exp = (380 - exp) / 2;
3909  } else {
3910  result_exp = (3068 - exp) / 2;
3911  }
3912 
3913  uint64_t estimate = bit_cast<uint64_t>(recip_sqrt_estimate(scaled));
3914 
3915  if (sizeof(T) == sizeof(float)) {
3916  uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
3917  uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
3918  return float_pack(0, exp_bits, est_bits);
3919  } else {
3920  return double_pack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
3921  }
3922  }
3923 }
3924 
3925 LogicVRegister Simulator::frsqrte(VectorFormat vform, LogicVRegister dst,
3926  const LogicVRegister& src) {
3927  dst.ClearForWrite(vform);
3928  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
3929  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3930  float input = src.Float<float>(i);
3931  dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
3932  }
3933  } else {
3934  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
3935  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3936  double input = src.Float<double>(i);
3937  dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
3938  }
3939  }
3940  return dst;
3941 }
3942 
3943 template <typename T>
3944 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
3945  static_assert(std::is_same<float, T>::value || std::is_same<double, T>::value,
3946  "T must be a float or double");
3947  uint32_t sign;
3948 
3949  if (sizeof(T) == sizeof(float)) {
3950  sign = float_sign(op);
3951  } else {
3952  sign = double_sign(op);
3953  }
3954 
3955  if (std::isnan(op)) {
3956  return FPProcessNaN(op);
3957  } else if (std::isinf(op)) {
3958  return (sign == 1) ? -0.0 : 0.0;
3959  } else if (op == 0.0) {
3960  FPProcessException(); // FPExc_DivideByZero exception.
3961  return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3962  } else if (((sizeof(T) == sizeof(float)) &&
3963  (std::fabs(op) < std::pow(2.0, -128.0))) ||
3964  ((sizeof(T) == sizeof(double)) &&
3965  (std::fabs(op) < std::pow(2.0, -1024.0)))) {
3966  bool overflow_to_inf = false;
3967  switch (rounding) {
3968  case FPTieEven:
3969  overflow_to_inf = true;
3970  break;
3971  case FPPositiveInfinity:
3972  overflow_to_inf = (sign == 0);
3973  break;
3974  case FPNegativeInfinity:
3975  overflow_to_inf = (sign == 1);
3976  break;
3977  case FPZero:
3978  overflow_to_inf = false;
3979  break;
3980  default:
3981  break;
3982  }
3983  FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
3984  if (overflow_to_inf) {
3985  return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
3986  } else {
3987  // Return FPMaxNormal(sign).
3988  if (sizeof(T) == sizeof(float)) {
3989  return float_pack(sign, 0xFE, 0x07FFFFF);
3990  } else {
3991  return double_pack(sign, 0x7FE, 0x0FFFFFFFFFFFFFl);
3992  }
3993  }
3994  } else {
3995  uint64_t fraction;
3996  int32_t exp, result_exp;
3997  uint32_t sign;
3998 
3999  if (sizeof(T) == sizeof(float)) {
4000  sign = float_sign(op);
4001  exp = static_cast<int32_t>(float_exp(op));
4002  fraction = float_mantissa(op);
4003  fraction <<= 29;
4004  } else {
4005  sign = double_sign(op);
4006  exp = static_cast<int32_t>(double_exp(op));
4007  fraction = double_mantissa(op);
4008  }
4009 
4010  if (exp == 0) {
4011  if (Bits(fraction, 51, 51) == 0) {
4012  exp -= 1;
4013  fraction = Bits(fraction, 49, 0) << 2;
4014  } else {
4015  fraction = Bits(fraction, 50, 0) << 1;
4016  }
4017  }
4018 
4019  double scaled = double_pack(0, 1022, Bits(fraction, 51, 44) << 44);
4020 
4021  if (sizeof(T) == sizeof(float)) {
4022  result_exp = 253 - exp;
4023  } else {
4024  result_exp = 2045 - exp;
4025  }
4026 
4027  double estimate = recip_estimate(scaled);
4028 
4029  fraction = double_mantissa(estimate);
4030  if (result_exp == 0) {
4031  fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
4032  } else if (result_exp == -1) {
4033  fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
4034  result_exp = 0;
4035  }
4036  if (sizeof(T) == sizeof(float)) {
4037  uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
4038  uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
4039  return float_pack(sign, exp_bits, frac_bits);
4040  } else {
4041  return double_pack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
4042  }
4043  }
4044 }
4045 
4046 LogicVRegister Simulator::frecpe(VectorFormat vform, LogicVRegister dst,
4047  const LogicVRegister& src, FPRounding round) {
4048  dst.ClearForWrite(vform);
4049  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4050  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4051  float input = src.Float<float>(i);
4052  dst.SetFloat(i, FPRecipEstimate<float>(input, round));
4053  }
4054  } else {
4055  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4056  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4057  double input = src.Float<double>(i);
4058  dst.SetFloat(i, FPRecipEstimate<double>(input, round));
4059  }
4060  }
4061  return dst;
4062 }
4063 
4064 LogicVRegister Simulator::ursqrte(VectorFormat vform, LogicVRegister dst,
4065  const LogicVRegister& src) {
4066  dst.ClearForWrite(vform);
4067  uint64_t operand;
4068  uint32_t result;
4069  double dp_operand, dp_result;
4070  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4071  operand = src.Uint(vform, i);
4072  if (operand <= 0x3FFFFFFF) {
4073  result = 0xFFFFFFFF;
4074  } else {
4075  dp_operand = operand * std::pow(2.0, -32);
4076  dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
4077  result = static_cast<uint32_t>(dp_result);
4078  }
4079  dst.SetUint(vform, i, result);
4080  }
4081  return dst;
4082 }
4083 
4084 // Based on reference C function recip_estimate from ARM ARM.
4085 double Simulator::recip_estimate(double a) {
4086  int q, s;
4087  double r;
4088  q = static_cast<int>(a * 512.0);
4089  r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
4090  s = static_cast<int>(256.0 * r + 0.5);
4091  return static_cast<double>(s) / 256.0;
4092 }
4093 
4094 LogicVRegister Simulator::urecpe(VectorFormat vform, LogicVRegister dst,
4095  const LogicVRegister& src) {
4096  dst.ClearForWrite(vform);
4097  uint64_t operand;
4098  uint32_t result;
4099  double dp_operand, dp_result;
4100  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4101  operand = src.Uint(vform, i);
4102  if (operand <= 0x7FFFFFFF) {
4103  result = 0xFFFFFFFF;
4104  } else {
4105  dp_operand = operand * std::pow(2.0, -32);
4106  dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
4107  result = static_cast<uint32_t>(dp_result);
4108  }
4109  dst.SetUint(vform, i, result);
4110  }
4111  return dst;
4112 }
4113 
4114 template <typename T>
4115 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4116  const LogicVRegister& src) {
4117  dst.ClearForWrite(vform);
4118  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4119  T op = src.Float<T>(i);
4120  T result;
4121  if (std::isnan(op)) {
4122  result = FPProcessNaN(op);
4123  } else {
4124  int exp;
4125  uint32_t sign;
4126  if (sizeof(T) == sizeof(float)) {
4127  sign = float_sign(op);
4128  exp = static_cast<int>(float_exp(op));
4129  exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
4130  result = float_pack(sign, exp, 0);
4131  } else {
4132  sign = double_sign(op);
4133  exp = static_cast<int>(double_exp(op));
4134  exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
4135  result = double_pack(sign, exp, 0);
4136  }
4137  }
4138  dst.SetFloat(i, result);
4139  }
4140  return dst;
4141 }
4142 
4143 LogicVRegister Simulator::frecpx(VectorFormat vform, LogicVRegister dst,
4144  const LogicVRegister& src) {
4145  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4146  frecpx<float>(vform, dst, src);
4147  } else {
4148  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4149  frecpx<double>(vform, dst, src);
4150  }
4151  return dst;
4152 }
4153 
4154 LogicVRegister Simulator::scvtf(VectorFormat vform, LogicVRegister dst,
4155  const LogicVRegister& src, int fbits,
4156  FPRounding round) {
4157  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4158  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4159  float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
4160  dst.SetFloat<float>(i, result);
4161  } else {
4162  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4163  double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
4164  dst.SetFloat<double>(i, result);
4165  }
4166  }
4167  return dst;
4168 }
4169 
4170 LogicVRegister Simulator::ucvtf(VectorFormat vform, LogicVRegister dst,
4171  const LogicVRegister& src, int fbits,
4172  FPRounding round) {
4173  for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4174  if (LaneSizeInBytesFromFormat(vform) == kSRegSize) {
4175  float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
4176  dst.SetFloat<float>(i, result);
4177  } else {
4178  DCHECK_EQ(LaneSizeInBytesFromFormat(vform), kDRegSize);
4179  double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
4180  dst.SetFloat<double>(i, result);
4181  }
4182  }
4183  return dst;
4184 }
4185 
4186 #endif // USE_SIMULATOR
4187 
4188 } // namespace internal
4189 } // namespace v8
4190 
4191 #endif // V8_TARGET_ARCH_ARM64
Definition: libplatform.h:13