V8 API Reference, 7.2.502.16 (for Deno 0.2.4)
codegen-mips64.cc
1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #if V8_TARGET_ARCH_MIPS64
6 
7 #include <memory>
8 
9 #include "src/codegen.h"
10 #include "src/macro-assembler.h"
11 #include "src/mips64/simulator-mips64.h"
12 
13 namespace v8 {
14 namespace internal {
15 
16 #define __ masm.
17 
18 #if defined(V8_HOST_ARCH_MIPS)
19 
20 MemCopyUint8Function CreateMemCopyUint8Function(MemCopyUint8Function stub) {
21 #if defined(USE_SIMULATOR)
22  return stub;
23 #else
24  v8::PageAllocator* page_allocator = GetPlatformPageAllocator();
25  size_t allocated = 0;
26  byte* buffer = AllocatePage(page_allocator,
27  page_allocator->GetRandomMmapAddr(), &allocated);
28  if (buffer == nullptr) return stub;
29 
30  MacroAssembler masm(AssemblerOptions{}, buffer, static_cast<int>(allocated));
31 
32  // This code assumes that cache lines are 32 bytes and if the cache line is
33  // larger it will not work correctly.
34  {
35  Label lastb, unaligned, aligned, chkw,
36  loop16w, chk1w, wordCopy_loop, skip_pref, lastbloop,
37  leave, ua_chk16w, ua_loop16w, ua_skip_pref, ua_chkw,
38  ua_chk1w, ua_wordCopy_loop, ua_smallCopy, ua_smallCopy_loop;
39 
40  // The size of each prefetch.
41  uint32_t pref_chunk = 32;
42  // The maximum size of a prefetch, it must not be less than pref_chunk.
43  // If the real size of a prefetch is greater than max_pref_size and
44  // the kPrefHintPrepareForStore hint is used, the code will not work
45  // correctly.
46  uint32_t max_pref_size = 128;
47  DCHECK(pref_chunk < max_pref_size);
48 
49  // pref_limit is set based on the fact that we never use an offset
50  // greater then 5 on a store pref and that a single pref can
51  // never be larger then max_pref_size.
52  uint32_t pref_limit = (5 * pref_chunk) + max_pref_size;
53  int32_t pref_hint_load = kPrefHintLoadStreamed;
54  int32_t pref_hint_store = kPrefHintPrepareForStore;
55  uint32_t loadstore_chunk = 4;
56 
57  // The initial prefetches may fetch bytes that are before the buffer being
58  // copied. Start copies with an offset of 4 so avoid this situation when
59  // using kPrefHintPrepareForStore.
60  DCHECK(pref_hint_store != kPrefHintPrepareForStore ||
61  pref_chunk * 4 >= max_pref_size);
62  // If the size is less than 8, go to lastb. Regardless of size,
63  // copy dst pointer to v0 for the retuen value.
64  __ slti(a6, a2, 2 * loadstore_chunk);
65  __ bne(a6, zero_reg, &lastb);
66  __ mov(v0, a0); // In delay slot.
67 
68  // If src and dst have different alignments, go to unaligned, if they
69  // have the same alignment (but are not actually aligned) do a partial
70  // load/store to make them aligned. If they are both already aligned
71  // we can start copying at aligned.
72  __ xor_(t8, a1, a0);
73  __ andi(t8, t8, loadstore_chunk - 1); // t8 is a0/a1 word-displacement.
74  __ bne(t8, zero_reg, &unaligned);
75  __ subu(a3, zero_reg, a0); // In delay slot.
76 
77  __ andi(a3, a3, loadstore_chunk - 1); // Copy a3 bytes to align a0/a1.
78  __ beq(a3, zero_reg, &aligned); // Already aligned.
79  __ subu(a2, a2, a3); // In delay slot. a2 is the remining bytes count.
80 
81  if (kArchEndian == kLittle) {
82  __ lwr(t8, MemOperand(a1));
83  __ addu(a1, a1, a3);
84  __ swr(t8, MemOperand(a0));
85  __ addu(a0, a0, a3);
86  } else {
87  __ lwl(t8, MemOperand(a1));
88  __ addu(a1, a1, a3);
89  __ swl(t8, MemOperand(a0));
90  __ addu(a0, a0, a3);
91  }
92 
93  // Now dst/src are both aligned to (word) aligned addresses. Set a2 to
94  // count how many bytes we have to copy after all the 64 byte chunks are
95  // copied and a3 to the dst pointer after all the 64 byte chunks have been
96  // copied. We will loop, incrementing a0 and a1 until a0 equals a3.
97  __ bind(&aligned);
98  __ andi(t8, a2, 0x3F);
99  __ beq(a2, t8, &chkw); // Less than 64?
100  __ subu(a3, a2, t8); // In delay slot.
101  __ addu(a3, a0, a3); // Now a3 is the final dst after loop.
102 
103  // When in the loop we prefetch with kPrefHintPrepareForStore hint,
104  // in this case the a0+x should be past the "a4-32" address. This means:
105  // for x=128 the last "safe" a0 address is "a4-160". Alternatively, for
106  // x=64 the last "safe" a0 address is "a4-96". In the current version we
107  // will use "pref hint, 128(a0)", so "a4-160" is the limit.
108  if (pref_hint_store == kPrefHintPrepareForStore) {
109  __ addu(a4, a0, a2); // a4 is the "past the end" address.
110  __ Subu(t9, a4, pref_limit); // t9 is the "last safe pref" address.
111  }
112 
113  __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk));
114  __ Pref(pref_hint_load, MemOperand(a1, 1 * pref_chunk));
115  __ Pref(pref_hint_load, MemOperand(a1, 2 * pref_chunk));
116  __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk));
117 
118  if (pref_hint_store != kPrefHintPrepareForStore) {
119  __ Pref(pref_hint_store, MemOperand(a0, 1 * pref_chunk));
120  __ Pref(pref_hint_store, MemOperand(a0, 2 * pref_chunk));
121  __ Pref(pref_hint_store, MemOperand(a0, 3 * pref_chunk));
122  }
123  __ bind(&loop16w);
124  __ Lw(a4, MemOperand(a1));
125 
126  if (pref_hint_store == kPrefHintPrepareForStore) {
127  __ sltu(v1, t9, a0); // If a0 > t9, don't use next prefetch.
128  __ Branch(USE_DELAY_SLOT, &skip_pref, gt, v1, Operand(zero_reg));
129  }
130  __ Lw(a5, MemOperand(a1, 1, loadstore_chunk)); // Maybe in delay slot.
131 
132  __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk));
133  __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk));
134 
135  __ bind(&skip_pref);
136  __ Lw(a6, MemOperand(a1, 2, loadstore_chunk));
137  __ Lw(a7, MemOperand(a1, 3, loadstore_chunk));
138  __ Lw(t0, MemOperand(a1, 4, loadstore_chunk));
139  __ Lw(t1, MemOperand(a1, 5, loadstore_chunk));
140  __ Lw(t2, MemOperand(a1, 6, loadstore_chunk));
141  __ Lw(t3, MemOperand(a1, 7, loadstore_chunk));
142  __ Pref(pref_hint_load, MemOperand(a1, 4 * pref_chunk));
143 
144  __ Sw(a4, MemOperand(a0));
145  __ Sw(a5, MemOperand(a0, 1, loadstore_chunk));
146  __ Sw(a6, MemOperand(a0, 2, loadstore_chunk));
147  __ Sw(a7, MemOperand(a0, 3, loadstore_chunk));
148  __ Sw(t0, MemOperand(a0, 4, loadstore_chunk));
149  __ Sw(t1, MemOperand(a0, 5, loadstore_chunk));
150  __ Sw(t2, MemOperand(a0, 6, loadstore_chunk));
151  __ Sw(t3, MemOperand(a0, 7, loadstore_chunk));
152 
153  __ Lw(a4, MemOperand(a1, 8, loadstore_chunk));
154  __ Lw(a5, MemOperand(a1, 9, loadstore_chunk));
155  __ Lw(a6, MemOperand(a1, 10, loadstore_chunk));
156  __ Lw(a7, MemOperand(a1, 11, loadstore_chunk));
157  __ Lw(t0, MemOperand(a1, 12, loadstore_chunk));
158  __ Lw(t1, MemOperand(a1, 13, loadstore_chunk));
159  __ Lw(t2, MemOperand(a1, 14, loadstore_chunk));
160  __ Lw(t3, MemOperand(a1, 15, loadstore_chunk));
161  __ Pref(pref_hint_load, MemOperand(a1, 5 * pref_chunk));
162 
163  __ Sw(a4, MemOperand(a0, 8, loadstore_chunk));
164  __ Sw(a5, MemOperand(a0, 9, loadstore_chunk));
165  __ Sw(a6, MemOperand(a0, 10, loadstore_chunk));
166  __ Sw(a7, MemOperand(a0, 11, loadstore_chunk));
167  __ Sw(t0, MemOperand(a0, 12, loadstore_chunk));
168  __ Sw(t1, MemOperand(a0, 13, loadstore_chunk));
169  __ Sw(t2, MemOperand(a0, 14, loadstore_chunk));
170  __ Sw(t3, MemOperand(a0, 15, loadstore_chunk));
171  __ addiu(a0, a0, 16 * loadstore_chunk);
172  __ bne(a0, a3, &loop16w);
173  __ addiu(a1, a1, 16 * loadstore_chunk); // In delay slot.
174  __ mov(a2, t8);
175 
176  // Here we have src and dest word-aligned but less than 64-bytes to go.
177  // Check for a 32 bytes chunk and copy if there is one. Otherwise jump
178  // down to chk1w to handle the tail end of the copy.
179  __ bind(&chkw);
180  __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk));
181  __ andi(t8, a2, 0x1F);
182  __ beq(a2, t8, &chk1w); // Less than 32?
183  __ nop(); // In delay slot.
184  __ Lw(a4, MemOperand(a1));
185  __ Lw(a5, MemOperand(a1, 1, loadstore_chunk));
186  __ Lw(a6, MemOperand(a1, 2, loadstore_chunk));
187  __ Lw(a7, MemOperand(a1, 3, loadstore_chunk));
188  __ Lw(t0, MemOperand(a1, 4, loadstore_chunk));
189  __ Lw(t1, MemOperand(a1, 5, loadstore_chunk));
190  __ Lw(t2, MemOperand(a1, 6, loadstore_chunk));
191  __ Lw(t3, MemOperand(a1, 7, loadstore_chunk));
192  __ addiu(a1, a1, 8 * loadstore_chunk);
193  __ Sw(a4, MemOperand(a0));
194  __ Sw(a5, MemOperand(a0, 1, loadstore_chunk));
195  __ Sw(a6, MemOperand(a0, 2, loadstore_chunk));
196  __ Sw(a7, MemOperand(a0, 3, loadstore_chunk));
197  __ Sw(t0, MemOperand(a0, 4, loadstore_chunk));
198  __ Sw(t1, MemOperand(a0, 5, loadstore_chunk));
199  __ Sw(t2, MemOperand(a0, 6, loadstore_chunk));
200  __ Sw(t3, MemOperand(a0, 7, loadstore_chunk));
201  __ addiu(a0, a0, 8 * loadstore_chunk);
202 
203  // Here we have less than 32 bytes to copy. Set up for a loop to copy
204  // one word at a time. Set a2 to count how many bytes we have to copy
205  // after all the word chunks are copied and a3 to the dst pointer after
206  // all the word chunks have been copied. We will loop, incrementing a0
207  // and a1 until a0 equals a3.
208  __ bind(&chk1w);
209  __ andi(a2, t8, loadstore_chunk - 1);
210  __ beq(a2, t8, &lastb);
211  __ subu(a3, t8, a2); // In delay slot.
212  __ addu(a3, a0, a3);
213 
214  __ bind(&wordCopy_loop);
215  __ Lw(a7, MemOperand(a1));
216  __ addiu(a0, a0, loadstore_chunk);
217  __ addiu(a1, a1, loadstore_chunk);
218  __ bne(a0, a3, &wordCopy_loop);
219  __ Sw(a7, MemOperand(a0, -1, loadstore_chunk)); // In delay slot.
220 
221  __ bind(&lastb);
222  __ Branch(&leave, le, a2, Operand(zero_reg));
223  __ addu(a3, a0, a2);
224 
225  __ bind(&lastbloop);
226  __ Lb(v1, MemOperand(a1));
227  __ addiu(a0, a0, 1);
228  __ addiu(a1, a1, 1);
229  __ bne(a0, a3, &lastbloop);
230  __ Sb(v1, MemOperand(a0, -1)); // In delay slot.
231 
232  __ bind(&leave);
233  __ jr(ra);
234  __ nop();
235 
236  // Unaligned case. Only the dst gets aligned so we need to do partial
237  // loads of the source followed by normal stores to the dst (once we
238  // have aligned the destination).
239  __ bind(&unaligned);
240  __ andi(a3, a3, loadstore_chunk - 1); // Copy a3 bytes to align a0/a1.
241  __ beq(a3, zero_reg, &ua_chk16w);
242  __ subu(a2, a2, a3); // In delay slot.
243 
244  if (kArchEndian == kLittle) {
245  __ lwr(v1, MemOperand(a1));
246  __ lwl(v1,
247  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
248  __ addu(a1, a1, a3);
249  __ swr(v1, MemOperand(a0));
250  __ addu(a0, a0, a3);
251  } else {
252  __ lwl(v1, MemOperand(a1));
253  __ lwr(v1,
254  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
255  __ addu(a1, a1, a3);
256  __ swl(v1, MemOperand(a0));
257  __ addu(a0, a0, a3);
258  }
259 
260  // Now the dst (but not the source) is aligned. Set a2 to count how many
261  // bytes we have to copy after all the 64 byte chunks are copied and a3 to
262  // the dst pointer after all the 64 byte chunks have been copied. We will
263  // loop, incrementing a0 and a1 until a0 equals a3.
264  __ bind(&ua_chk16w);
265  __ andi(t8, a2, 0x3F);
266  __ beq(a2, t8, &ua_chkw);
267  __ subu(a3, a2, t8); // In delay slot.
268  __ addu(a3, a0, a3);
269 
270  if (pref_hint_store == kPrefHintPrepareForStore) {
271  __ addu(a4, a0, a2);
272  __ Subu(t9, a4, pref_limit);
273  }
274 
275  __ Pref(pref_hint_load, MemOperand(a1, 0 * pref_chunk));
276  __ Pref(pref_hint_load, MemOperand(a1, 1 * pref_chunk));
277  __ Pref(pref_hint_load, MemOperand(a1, 2 * pref_chunk));
278 
279  if (pref_hint_store != kPrefHintPrepareForStore) {
280  __ Pref(pref_hint_store, MemOperand(a0, 1 * pref_chunk));
281  __ Pref(pref_hint_store, MemOperand(a0, 2 * pref_chunk));
282  __ Pref(pref_hint_store, MemOperand(a0, 3 * pref_chunk));
283  }
284 
285  __ bind(&ua_loop16w);
286  if (kArchEndian == kLittle) {
287  __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk));
288  __ lwr(a4, MemOperand(a1));
289  __ lwr(a5, MemOperand(a1, 1, loadstore_chunk));
290  __ lwr(a6, MemOperand(a1, 2, loadstore_chunk));
291 
292  if (pref_hint_store == kPrefHintPrepareForStore) {
293  __ sltu(v1, t9, a0);
294  __ Branch(USE_DELAY_SLOT, &ua_skip_pref, gt, v1, Operand(zero_reg));
295  }
296  __ lwr(a7, MemOperand(a1, 3, loadstore_chunk)); // Maybe in delay slot.
297 
298  __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk));
299  __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk));
300 
301  __ bind(&ua_skip_pref);
302  __ lwr(t0, MemOperand(a1, 4, loadstore_chunk));
303  __ lwr(t1, MemOperand(a1, 5, loadstore_chunk));
304  __ lwr(t2, MemOperand(a1, 6, loadstore_chunk));
305  __ lwr(t3, MemOperand(a1, 7, loadstore_chunk));
306  __ lwl(a4,
307  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
308  __ lwl(a5,
309  MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
310  __ lwl(a6,
311  MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
312  __ lwl(a7,
313  MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
314  __ lwl(t0,
315  MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
316  __ lwl(t1,
317  MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
318  __ lwl(t2,
319  MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
320  __ lwl(t3,
321  MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
322  } else {
323  __ Pref(pref_hint_load, MemOperand(a1, 3 * pref_chunk));
324  __ lwl(a4, MemOperand(a1));
325  __ lwl(a5, MemOperand(a1, 1, loadstore_chunk));
326  __ lwl(a6, MemOperand(a1, 2, loadstore_chunk));
327 
328  if (pref_hint_store == kPrefHintPrepareForStore) {
329  __ sltu(v1, t9, a0);
330  __ Branch(USE_DELAY_SLOT, &ua_skip_pref, gt, v1, Operand(zero_reg));
331  }
332  __ lwl(a7, MemOperand(a1, 3, loadstore_chunk)); // Maybe in delay slot.
333 
334  __ Pref(pref_hint_store, MemOperand(a0, 4 * pref_chunk));
335  __ Pref(pref_hint_store, MemOperand(a0, 5 * pref_chunk));
336 
337  __ bind(&ua_skip_pref);
338  __ lwl(t0, MemOperand(a1, 4, loadstore_chunk));
339  __ lwl(t1, MemOperand(a1, 5, loadstore_chunk));
340  __ lwl(t2, MemOperand(a1, 6, loadstore_chunk));
341  __ lwl(t3, MemOperand(a1, 7, loadstore_chunk));
342  __ lwr(a4,
343  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
344  __ lwr(a5,
345  MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
346  __ lwr(a6,
347  MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
348  __ lwr(a7,
349  MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
350  __ lwr(t0,
351  MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
352  __ lwr(t1,
353  MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
354  __ lwr(t2,
355  MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
356  __ lwr(t3,
357  MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
358  }
359  __ Pref(pref_hint_load, MemOperand(a1, 4 * pref_chunk));
360  __ Sw(a4, MemOperand(a0));
361  __ Sw(a5, MemOperand(a0, 1, loadstore_chunk));
362  __ Sw(a6, MemOperand(a0, 2, loadstore_chunk));
363  __ Sw(a7, MemOperand(a0, 3, loadstore_chunk));
364  __ Sw(t0, MemOperand(a0, 4, loadstore_chunk));
365  __ Sw(t1, MemOperand(a0, 5, loadstore_chunk));
366  __ Sw(t2, MemOperand(a0, 6, loadstore_chunk));
367  __ Sw(t3, MemOperand(a0, 7, loadstore_chunk));
368  if (kArchEndian == kLittle) {
369  __ lwr(a4, MemOperand(a1, 8, loadstore_chunk));
370  __ lwr(a5, MemOperand(a1, 9, loadstore_chunk));
371  __ lwr(a6, MemOperand(a1, 10, loadstore_chunk));
372  __ lwr(a7, MemOperand(a1, 11, loadstore_chunk));
373  __ lwr(t0, MemOperand(a1, 12, loadstore_chunk));
374  __ lwr(t1, MemOperand(a1, 13, loadstore_chunk));
375  __ lwr(t2, MemOperand(a1, 14, loadstore_chunk));
376  __ lwr(t3, MemOperand(a1, 15, loadstore_chunk));
377  __ lwl(a4,
378  MemOperand(a1, 9, loadstore_chunk, MemOperand::offset_minus_one));
379  __ lwl(a5,
380  MemOperand(a1, 10, loadstore_chunk, MemOperand::offset_minus_one));
381  __ lwl(a6,
382  MemOperand(a1, 11, loadstore_chunk, MemOperand::offset_minus_one));
383  __ lwl(a7,
384  MemOperand(a1, 12, loadstore_chunk, MemOperand::offset_minus_one));
385  __ lwl(t0,
386  MemOperand(a1, 13, loadstore_chunk, MemOperand::offset_minus_one));
387  __ lwl(t1,
388  MemOperand(a1, 14, loadstore_chunk, MemOperand::offset_minus_one));
389  __ lwl(t2,
390  MemOperand(a1, 15, loadstore_chunk, MemOperand::offset_minus_one));
391  __ lwl(t3,
392  MemOperand(a1, 16, loadstore_chunk, MemOperand::offset_minus_one));
393  } else {
394  __ lwl(a4, MemOperand(a1, 8, loadstore_chunk));
395  __ lwl(a5, MemOperand(a1, 9, loadstore_chunk));
396  __ lwl(a6, MemOperand(a1, 10, loadstore_chunk));
397  __ lwl(a7, MemOperand(a1, 11, loadstore_chunk));
398  __ lwl(t0, MemOperand(a1, 12, loadstore_chunk));
399  __ lwl(t1, MemOperand(a1, 13, loadstore_chunk));
400  __ lwl(t2, MemOperand(a1, 14, loadstore_chunk));
401  __ lwl(t3, MemOperand(a1, 15, loadstore_chunk));
402  __ lwr(a4,
403  MemOperand(a1, 9, loadstore_chunk, MemOperand::offset_minus_one));
404  __ lwr(a5,
405  MemOperand(a1, 10, loadstore_chunk, MemOperand::offset_minus_one));
406  __ lwr(a6,
407  MemOperand(a1, 11, loadstore_chunk, MemOperand::offset_minus_one));
408  __ lwr(a7,
409  MemOperand(a1, 12, loadstore_chunk, MemOperand::offset_minus_one));
410  __ lwr(t0,
411  MemOperand(a1, 13, loadstore_chunk, MemOperand::offset_minus_one));
412  __ lwr(t1,
413  MemOperand(a1, 14, loadstore_chunk, MemOperand::offset_minus_one));
414  __ lwr(t2,
415  MemOperand(a1, 15, loadstore_chunk, MemOperand::offset_minus_one));
416  __ lwr(t3,
417  MemOperand(a1, 16, loadstore_chunk, MemOperand::offset_minus_one));
418  }
419  __ Pref(pref_hint_load, MemOperand(a1, 5 * pref_chunk));
420  __ Sw(a4, MemOperand(a0, 8, loadstore_chunk));
421  __ Sw(a5, MemOperand(a0, 9, loadstore_chunk));
422  __ Sw(a6, MemOperand(a0, 10, loadstore_chunk));
423  __ Sw(a7, MemOperand(a0, 11, loadstore_chunk));
424  __ Sw(t0, MemOperand(a0, 12, loadstore_chunk));
425  __ Sw(t1, MemOperand(a0, 13, loadstore_chunk));
426  __ Sw(t2, MemOperand(a0, 14, loadstore_chunk));
427  __ Sw(t3, MemOperand(a0, 15, loadstore_chunk));
428  __ addiu(a0, a0, 16 * loadstore_chunk);
429  __ bne(a0, a3, &ua_loop16w);
430  __ addiu(a1, a1, 16 * loadstore_chunk); // In delay slot.
431  __ mov(a2, t8);
432 
433  // Here less than 64-bytes. Check for
434  // a 32 byte chunk and copy if there is one. Otherwise jump down to
435  // ua_chk1w to handle the tail end of the copy.
436  __ bind(&ua_chkw);
437  __ Pref(pref_hint_load, MemOperand(a1));
438  __ andi(t8, a2, 0x1F);
439 
440  __ beq(a2, t8, &ua_chk1w);
441  __ nop(); // In delay slot.
442  if (kArchEndian == kLittle) {
443  __ lwr(a4, MemOperand(a1));
444  __ lwr(a5, MemOperand(a1, 1, loadstore_chunk));
445  __ lwr(a6, MemOperand(a1, 2, loadstore_chunk));
446  __ lwr(a7, MemOperand(a1, 3, loadstore_chunk));
447  __ lwr(t0, MemOperand(a1, 4, loadstore_chunk));
448  __ lwr(t1, MemOperand(a1, 5, loadstore_chunk));
449  __ lwr(t2, MemOperand(a1, 6, loadstore_chunk));
450  __ lwr(t3, MemOperand(a1, 7, loadstore_chunk));
451  __ lwl(a4,
452  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
453  __ lwl(a5,
454  MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
455  __ lwl(a6,
456  MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
457  __ lwl(a7,
458  MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
459  __ lwl(t0,
460  MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
461  __ lwl(t1,
462  MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
463  __ lwl(t2,
464  MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
465  __ lwl(t3,
466  MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
467  } else {
468  __ lwl(a4, MemOperand(a1));
469  __ lwl(a5, MemOperand(a1, 1, loadstore_chunk));
470  __ lwl(a6, MemOperand(a1, 2, loadstore_chunk));
471  __ lwl(a7, MemOperand(a1, 3, loadstore_chunk));
472  __ lwl(t0, MemOperand(a1, 4, loadstore_chunk));
473  __ lwl(t1, MemOperand(a1, 5, loadstore_chunk));
474  __ lwl(t2, MemOperand(a1, 6, loadstore_chunk));
475  __ lwl(t3, MemOperand(a1, 7, loadstore_chunk));
476  __ lwr(a4,
477  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
478  __ lwr(a5,
479  MemOperand(a1, 2, loadstore_chunk, MemOperand::offset_minus_one));
480  __ lwr(a6,
481  MemOperand(a1, 3, loadstore_chunk, MemOperand::offset_minus_one));
482  __ lwr(a7,
483  MemOperand(a1, 4, loadstore_chunk, MemOperand::offset_minus_one));
484  __ lwr(t0,
485  MemOperand(a1, 5, loadstore_chunk, MemOperand::offset_minus_one));
486  __ lwr(t1,
487  MemOperand(a1, 6, loadstore_chunk, MemOperand::offset_minus_one));
488  __ lwr(t2,
489  MemOperand(a1, 7, loadstore_chunk, MemOperand::offset_minus_one));
490  __ lwr(t3,
491  MemOperand(a1, 8, loadstore_chunk, MemOperand::offset_minus_one));
492  }
493  __ addiu(a1, a1, 8 * loadstore_chunk);
494  __ Sw(a4, MemOperand(a0));
495  __ Sw(a5, MemOperand(a0, 1, loadstore_chunk));
496  __ Sw(a6, MemOperand(a0, 2, loadstore_chunk));
497  __ Sw(a7, MemOperand(a0, 3, loadstore_chunk));
498  __ Sw(t0, MemOperand(a0, 4, loadstore_chunk));
499  __ Sw(t1, MemOperand(a0, 5, loadstore_chunk));
500  __ Sw(t2, MemOperand(a0, 6, loadstore_chunk));
501  __ Sw(t3, MemOperand(a0, 7, loadstore_chunk));
502  __ addiu(a0, a0, 8 * loadstore_chunk);
503 
504  // Less than 32 bytes to copy. Set up for a loop to
505  // copy one word at a time.
506  __ bind(&ua_chk1w);
507  __ andi(a2, t8, loadstore_chunk - 1);
508  __ beq(a2, t8, &ua_smallCopy);
509  __ subu(a3, t8, a2); // In delay slot.
510  __ addu(a3, a0, a3);
511 
512  __ bind(&ua_wordCopy_loop);
513  if (kArchEndian == kLittle) {
514  __ lwr(v1, MemOperand(a1));
515  __ lwl(v1,
516  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
517  } else {
518  __ lwl(v1, MemOperand(a1));
519  __ lwr(v1,
520  MemOperand(a1, 1, loadstore_chunk, MemOperand::offset_minus_one));
521  }
522  __ addiu(a0, a0, loadstore_chunk);
523  __ addiu(a1, a1, loadstore_chunk);
524  __ bne(a0, a3, &ua_wordCopy_loop);
525  __ Sw(v1, MemOperand(a0, -1, loadstore_chunk)); // In delay slot.
526 
527  // Copy the last 8 bytes.
528  __ bind(&ua_smallCopy);
529  __ beq(a2, zero_reg, &leave);
530  __ addu(a3, a0, a2); // In delay slot.
531 
532  __ bind(&ua_smallCopy_loop);
533  __ Lb(v1, MemOperand(a1));
534  __ addiu(a0, a0, 1);
535  __ addiu(a1, a1, 1);
536  __ bne(a0, a3, &ua_smallCopy_loop);
537  __ Sb(v1, MemOperand(a0, -1)); // In delay slot.
538 
539  __ jr(ra);
540  __ nop();
541  }
542  CodeDesc desc;
543  masm.GetCode(nullptr, &desc);
544  DCHECK(!RelocInfo::RequiresRelocationAfterCodegen(desc));
545 
546  Assembler::FlushICache(buffer, allocated);
547  CHECK(SetPermissions(page_allocator, buffer, allocated,
548  PageAllocator::kReadExecute));
549  return FUNCTION_CAST<MemCopyUint8Function>(buffer);
550 #endif
551 }
552 #endif
553 
554 UnaryMathFunction CreateSqrtFunction() {
555 #if defined(USE_SIMULATOR)
556  return nullptr;
557 #else
558  v8::PageAllocator* page_allocator = GetPlatformPageAllocator();
559  size_t allocated = 0;
560  byte* buffer = AllocatePage(page_allocator,
561  page_allocator->GetRandomMmapAddr(), &allocated);
562  if (buffer == nullptr) return nullptr;
563 
564  MacroAssembler masm(AssemblerOptions{}, buffer, static_cast<int>(allocated));
565 
566  __ MovFromFloatParameter(f12);
567  __ sqrt_d(f0, f12);
568  __ MovToFloatResult(f0);
569  __ Ret();
570 
571  CodeDesc desc;
572  masm.GetCode(nullptr, &desc);
573  DCHECK(!RelocInfo::RequiresRelocationAfterCodegen(desc));
574 
575  Assembler::FlushICache(buffer, allocated);
576  CHECK(SetPermissions(page_allocator, buffer, allocated,
577  PageAllocator::kReadExecute));
578  return FUNCTION_CAST<UnaryMathFunction>(buffer);
579 #endif
580 }
581 
582 #undef __
583 
584 } // namespace internal
585 } // namespace v8
586 
587 #endif // V8_TARGET_ARCH_MIPS64
virtual void * GetRandomMmapAddr()=0
Definition: libplatform.h:13