1 /*---------------------------------------------------------------------------*
2 Project: TwlSDK - MI
3 File: mi_memory.c
4
5 Copyright 2003-2008 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Date:: 2008-09-17#$
14 $Rev: 8556 $
15 $Author: okubata_ryoma $
16 *---------------------------------------------------------------------------*/
17
18 #include <nitro/types.h>
19 #include <nitro/platform.h>
20 #include <nitro/mi/memory.h>
21 #include <nitro/math/math.h>
22
23
24 //****Bug fix****
25 // Because halfword access instructions such as ldrh and strh are not passed through by the inline assembler due to a CW bug, instruction values are written directly using dcd in order to avoid this bug.
26 //
27 // When the bug is fixed, the 'define' below will be removed.
28
29 // Fixed with CodeWarrior 0.4 or later
30 //#define CW_BUG_FOR_LDRH_AND_STRH
31
32
33 #define HALFW_CONDAL 0xe0000000 // Condition(ALL)
34 #define HALFW_CONDNE 0x10000000 // Condition(NE)
35 #define HALFW_CONDEQ 0x00000000 // Condition(EQ)
36
37 #define HALFW_OFF_PL 0x00800000 // Offset plus
38 #define HALFW_OFF_MI 0x00000000 // Offset minus
39 #define HALFW_LOAD 0x00100000 // Load
40 #define HALFW_STORE 0x00000000 // Store
41 #define HALFW_RN(n) ((n)<<16) // Register Rn
42 #define HALFW_RD(n) ((n)<<12) // Register Rd
43
44 #define HALFW_DEF1 0x004000B0 // Fixed
45 #define HALFW_DEF2 0x014000B0 // Fixed
46
47 #define HALFW_IMM(n) ( ((n)&0xf) | (((n)&0xf0)<<4) ) // Immediate
48
49
50 #define HALFW_DCD( cond, d, n, offset, sign, ldst, def ) \
51 dcd (def)|(cond)|(sign)|(ldst)|HALFW_RN(n)|HALFW_RD(d)|HALFW_IMM(offset)
52
53 //---- ldrh Rn, [Rd], +#offset
54 #define LDRH_AD1( cond, d, n, offset ) \
55 HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_LOAD, HALFW_DEF1 )
56
57 //---- ldrh Rn, [Rd, +#offset]
58 #define LDRH_AD2( cond, d, n, offset ) \
59 HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_LOAD, HALFW_DEF2 )
60
61 //---- ldrh Rn, [Rd], -#offset
62 #define LDRH_AD3( cond, d, n, offset ) \
63 HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_LOAD, HALFW_DEF1 )
64
65 //---- ldrh Rn, [Rd, -#offset]
66 #define LDRH_AD4( cond, d, n, offset ) \
67 HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_LOAD, HALFW_DEF2 )
68
69 //---- strh Rn, [Rd], +#offset
70 #define STRH_AD1( cond, d, n, offset ) \
71 HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_STORE, HALFW_DEF1 )
72
73 //---- strh Rn, [Rd, +#offset]
74 #define STRH_AD2( cond, d, n, offset ) \
75 HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_STORE, HALFW_DEF2 )
76
77 //---- strh Rn, [Rd], -#offset
78 #define STRH_AD3( cond, d, n, offset ) \
79 HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_STORE, HALFW_DEF1 )
80
81 //---- strh Rn, [Rd, -#offset]
82 #define STRH_AD4( cond, d, n, offset ) \
83 HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_STORE, HALFW_DEF2 )
84
85
86
87 #include <nitro/code32.h>
88 //=======================================================================
89 // MEMORY OPERATIONS
90 //=======================================================================
91 /*---------------------------------------------------------------------------*
92 Name: MIi_CpuClear16
93
94 Description: Fills memory with specified data.
95 16-bit version.
96
97 Arguments: data: Fill data
98 destp: Destination address
99 size: Size (bytes)
100
101 Returns: None.
102 *---------------------------------------------------------------------------*/
MIi_CpuClear16(register u16 data,register void * destp,register u32 size)103 asm void MIi_CpuClear16( register u16 data, register void* destp, register u32 size )
104 {
105 mov r3, #0 // n = 0
106
107 @00:
108 cmp r3, r2 // n < size ?
109 strlth r0, [r1, r3] // *((vu16 *)(destp + n)) = data
110 addlt r3, r3, #2 // n += 2
111 blt @00
112
113 bx lr
114 }
115
116 /*---------------------------------------------------------------------------*
117 Name: MIi_CpuCopy16
118
119 Description: Copies memory by CPU.
120 16-bit version
121
122 Arguments: srcp: Source address
123 destp: Destination address
124 size: Size (bytes)
125
126 Returns: None.
127 *---------------------------------------------------------------------------*/
MIi_CpuCopy16(register const void * srcp,register void * destp,register u32 size)128 asm void MIi_CpuCopy16( register const void *srcp, register void *destp, register u32 size )
129 {
130 mov r12, #0 // n = 0
131
132 @10:
133 cmp r12, r2 // n < size ?
134
135 #ifndef CW_BUG_FOR_LDRH_AND_STRH
136 ldrlth r3, [r0, r12] // *((vu16 *)(destp + n)) = *((vu16 *)(srcp + n))
137 #else
138 dcd 0xb19030bc
139 #endif
140 #ifndef CW_BUG_FOR_LDRH_AND_STRH
141 strlth r3, [r1, r12]
142 #else
143 dcd 0xb18130bc
144 #endif
145 addlt r12, r12, #2 // n += 2
146 blt @10
147
148 bx lr
149 }
150
151 /*---------------------------------------------------------------------------*
152 Name: MIi_CpuSend16
153
154 Description: Sends u16 data to fixed address.
155 16-bit version.
156
157 Arguments: src: Data stream to send
158 dest: Destination address. Not incremented.
159 size: Size (bytes)
160
161 Returns: None.
162 *---------------------------------------------------------------------------*/
MIi_CpuSend16(register const void * srcp,register volatile void * destp,register u32 size)163 asm void MIi_CpuSend16( register const void *srcp, register volatile void* destp, register u32 size )
164 {
165 mov r12, #0 // n = 0
166
167 @11:
168 cmp r12, r2 // n < size ?
169 #ifndef CW_BUG_FOR_LDRH_AND_STRH
170 ldrlth r3, [r0, r12] // *((vu16 *)(destp + n)) = *((vu16 *)(srcp + n))
171 #else
172 dcd 0xb19030bc
173 #endif
174 strlth r3, [r1, #0]
175 addlt r12, r12, #2 // n += 2
176 blt @11
177
178 bx lr
179 }
180
181 /*---------------------------------------------------------------------------*
182 Name: MIi_CpuRecv16
183
184 Description: Receives u16 data from fixed address.
185 16-bit version.
186
187 Arguments: src: Source address. Not incremented.
188 dest: Data buffer to receive
189 size: Size (bytes)
190
191 Returns: None.
192 *---------------------------------------------------------------------------*/
MIi_CpuRecv16(register volatile const void * srcp,register void * destp,register u32 size)193 asm void MIi_CpuRecv16( register volatile const void *srcp, register void* destp, register u32 size )
194 {
195 mov r12, #0 // n = 0
196
197 @12:
198 cmp r12, r2 // n < size ?
199 ldrlth r3, [r0] // *((vu16 *)(destp + n)) = *((vu16 *)(srcp + n))
200 strlth r3, [r1, r12]
201 addlt r12, r12, #2 // n += 2
202 blt @12
203
204 bx lr
205 }
206
207 /*---------------------------------------------------------------------------*
208 Name: MIi_CpuPipe16
209
210 Description: Pipes data from fixed address to fixed address.
211 16-bit version.
212
213 Arguments: src: Source address. Not incremented.
214 dest: Destination address. Not incremented.
215 size: Size (bytes)
216
217 Returns: None.
218 *---------------------------------------------------------------------------*/
MIi_CpuPipe16(register volatile const void * srcp,register volatile void * destp,register u32 size)219 asm void MIi_CpuPipe16( register volatile const void *srcp, register volatile void* destp, register u32 size )
220 {
221 mov r12, #0 // n = 0
222
223 @13:
224 cmp r12, r2 // n < size ?
225 ldrlth r3, [r0] // *((vu32 *)(destp)) = *((vu32 *)(srcp))
226 strlth r3, [r1]
227 addlt r12, r12, #2 // n += 2
228 blt @13
229
230 bx lr
231 }
232
233 /*---------------------------------------------------------------------------*
234 Name: MIi_CpuMove16
235
236 Description: Moves memory data (16-bit version).
237
238 Arguments: src: Source address, must be in 2-byte alignment
239 dest: Destination address, must be in 2-byte alignment
240 size: Size (bytes), must be in 2-byte alignment
241
242 Returns: None.
243 *---------------------------------------------------------------------------*/
CpuCopy16Reverse(register const void * srcp,register void * destp,register u32 size)244 static asm void CpuCopy16Reverse( register const void *srcp, register void *destp, register u32 size )
245 {
246 mov r12, r1 // r12: destEndp = destp
247 add r0, r0, r2 // r0: srcp += size
248 add r1, r1, r2 // r1: destp += size
249
250 @14:
251 cmp r12, r1 // while (destEndp < destp)
252 ldrlth r2, [r0, #-2]! // *(--(vu32 *)(destp)) = *(--(vu32 *)(srcp))
253 strlth r2, [r1, #-2]!
254 blt @14
255
256 bx lr
257 }
258
MIi_CpuMove16(const void * src,void * dest,u32 size)259 void MIi_CpuMove16(const void *src, void *dest, u32 size)
260 {
261 if( ( (u32)dest <= (u32)src )
262 || ( (u32)src + size <= (u32)dest ) )
263 {
264 MIi_CpuCopy16(src, dest, size);
265 }
266 else
267 {
268 CpuCopy16Reverse(src, dest, size);
269 }
270 }
271
272 /*---------------------------------------------------------------------------*
273 Name: MIi_CpuFind16
274
275 Description: Finds memory data (16-bit version).
276
277 Arguments: src: Source address, must be in 2 byte alignment
278 data: Target data
279 size: Size (bytes), must be in 2 byte alignment
280
281 Returns: Pointer to found data or NULL.
282 *---------------------------------------------------------------------------*/
MIi_CpuFind16(const void * src,u16 data,u32 size)283 void* MIi_CpuFind16(const void *src, u16 data, u32 size)
284 {
285 const u16* p = src;
286 u32 i;
287
288 for( i = 0; i < size; i += 2, ++p )
289 {
290 if( *p == data )
291 {
292 return (void*)p;
293 }
294 }
295
296 return NULL;
297 }
298
299 /*---------------------------------------------------------------------------*
300 Name: MIi_CpuComp16
301
302 Description: Compares memory data (16-bit version).
303
304 Arguments: mem1: Target address 1, must be in 2-byte alignment
305 mem2: Target address 2, must be in 2-byte alignment
306 size: Size (bytes), must be in 2-byte alignment
307
308 Returns: < 0: mem1 smaller than mem2.
309 = 0: mem1 equals mem2.
310 > 0: mem1 larger than mem2.
311 *---------------------------------------------------------------------------*/
MIi_CpuComp16(const void * mem1,const void * mem2,u32 size)312 int MIi_CpuComp16(const void *mem1, const void *mem2, u32 size)
313 {
314 const u16* p1 = mem1;
315 const u16* p2 = mem2;
316 const u16* p1end = (const u16*)( (const u8*)p1 + size );
317
318 while( p1 < p1end )
319 {
320 int d = (int)*p1++ - (int)*p2++;
321
322 if( d != 0 )
323 {
324 return d;
325 }
326 }
327
328 return 0;
329 }
330
331
332 /*---------------------------------------------------------------------------*
333 Name: MIi_CpuClear32
334
335 Description: Fills memory with specified data.
336 32-bit version.
337
338 Arguments: data: Fill data
339 destp: Destination address
340 size: Size (bytes)
341
342 Returns: None.
343 *---------------------------------------------------------------------------*/
MIi_CpuClear32(register u32 data,register void * destp,register u32 size)344 asm void MIi_CpuClear32( register u32 data, register void *destp, register u32 size )
345 {
346 add r12, r1, r2 // r12: destEndp = destp + size
347
348 @20:
349 cmp r1, r12 // while (destp < destEndp)
350 stmltia r1!, {r0} // *((vu32 *)(destp++)) = data
351 blt @20
352 bx lr
353 }
354
355 /*---------------------------------------------------------------------------*
356 Name: MIi_CpuCopy32
357
358 Description: Copies memory by CPU.
359 32-bit version.
360
361 Arguments: srcp: Source address
362 destp: Destination address
363 size: size (bytes)
364
365 Returns: None.
366 *---------------------------------------------------------------------------*/
MIi_CpuCopy32(register const void * srcp,register void * destp,register u32 size)367 asm void MIi_CpuCopy32( register const void *srcp, register void *destp, register u32 size )
368 {
369 add r12, r1, r2 // r12: destEndp = destp + size
370
371 @30:
372 cmp r1, r12 // while (destp < destEndp)
373 ldmltia r0!, {r2} // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
374 stmltia r1!, {r2}
375 blt @30
376
377 bx lr
378 }
379
380 /*---------------------------------------------------------------------------*
381 Name: MIi_CpuSend32
382
383 Description: Sends u32 data to fixed address.
384 32-bit version.
385
386 Arguments: src: Data stream to send
387 dest: Destination address. Not incremented
388 size: Size (bytes)
389
390 Returns: None.
391 *---------------------------------------------------------------------------*/
MIi_CpuSend32(register const void * srcp,register volatile void * destp,register u32 size)392 asm void MIi_CpuSend32( register const void *srcp, register volatile void *destp, register u32 size )
393 {
394 add r12, r0, r2 // r12: srcEndp = srcp + size
395
396 @31:
397 cmp r0, r12 // while (srcp < srcEndp)
398 ldmltia r0!, {r2} // *((vu32 *)(destp)) = *((vu32 *)(srcp)++)
399 strlt r2, [r1]
400 blt @31
401
402 bx lr
403 }
404
405 /*---------------------------------------------------------------------------*
406 Name: MIi_CpuRecv32
407
408 Description: Receives u32 data from fixed address.
409 32-bit version.
410
411 Arguments: src: Source address. Not incremented
412 dest: Data buffer to receive
413 size: Size (bytes)
414
415 Returns: None.
416 *---------------------------------------------------------------------------*/
MIi_CpuRecv32(volatile const void * srcp,register void * destp,register u32 size)417 asm void MIi_CpuRecv32( volatile const void *srcp, register void *destp, register u32 size )
418 {
419 add r12, r1, r2 // r12: destEndp = destp + size
420
421 @32:
422 cmp r1, r12 // while (dest < destEndp)
423 ldrlt r2, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
424 stmltia r1!, {r2}
425 blt @32
426
427 bx lr
428 }
429
430 /*---------------------------------------------------------------------------*
431 Name: MIi_CpuPipe32
432
433 Description: Pipes data from fixed address to fixed address.
434 32-bit version.
435
436 Arguments: src: Source address. Not incremented
437 dest: Destination address. Not incremented
438 size: Size (bytes)
439
440 Returns: None.
441 *---------------------------------------------------------------------------*/
MIi_CpuPipe32(volatile const void * srcp,register volatile void * destp,register u32 size)442 asm void MIi_CpuPipe32( volatile const void *srcp, register volatile void *destp, register u32 size )
443 {
444 mov r12, #0 // n = 0
445
446 @33:
447 cmp r12, r2 // n < size ?
448 ldrlt r3, [r0] // *((vu32 *)(destp)) = *((vu32 *)(srcp))
449 strlt r3, [r1]
450 addlt r12, r12, #4 // n += 4
451 blt @33
452
453 bx lr
454 }
455
456 /*---------------------------------------------------------------------------*
457 Name: MIi_CpuMove32
458
459 Description: Moves memory data (32-bit version).
460
461 Arguments: src: Source address, must be in 4-byte alignment
462 dest: Destination address, must be in 4-byte alignment
463 size: Size (bytes), must be in 4-byte alignment
464
465 Returns: None.
466 *---------------------------------------------------------------------------*/
CpuCopy32Reverse(register const void * srcp,register void * destp,register u32 size)467 static asm void CpuCopy32Reverse( register const void *srcp, register void *destp, register u32 size )
468 {
469 mov r12, r1 // r12: destEndp = destp
470 add r0, r0, r2 // r0: srcp += size
471 add r1, r1, r2 // r1: destp += size
472
473 @34:
474 cmp r12, r1 // while (destEndp < destp)
475 ldrlt r2, [r0, #-4]! // *(--(vu32 *)(destp)) = *(--(vu32 *)(srcp))
476 strlt r2, [r1, #-4]!
477 blt @34
478
479 bx lr
480 }
481
MIi_CpuMove32(const void * src,void * dest,u32 size)482 void MIi_CpuMove32(const void *src, void *dest, u32 size)
483 {
484 if( ( (u32)dest <= (u32)src )
485 || ( (u32)src + size <= (u32)dest ) )
486 {
487 MIi_CpuCopy32(src, dest, size);
488 }
489 else
490 {
491 CpuCopy32Reverse(src, dest, size);
492 }
493 }
494
495 /*---------------------------------------------------------------------------*
496 Name: MIi_CpuFind32
497
498 Description: Finds memory data (32-bit version).
499
500 Arguments: src: Source address, must be in 4-byte alignment
501 data: Target data
502 size: Size (bytes), must be in 4-byte alignment
503
504 Returns: Pointer to found data or NULL.
505 *---------------------------------------------------------------------------*/
MIi_CpuFind32(const void * src,u32 data,u32 size)506 void* MIi_CpuFind32(const void *src, u32 data, u32 size)
507 {
508 const u32* p = src;
509 u32 i;
510
511 for( i = 0; i < size; i += 4, ++p )
512 {
513 if( *p == data )
514 {
515 return (void*)p;
516 }
517 }
518
519 return NULL;
520 }
521
522 /*---------------------------------------------------------------------------*
523 Name: MIi_CpuComp32
524
525 Description: Compares memory data (32-bit version).
526
527 Arguments: mem1: Target address 1, must be in 4-byte alignment
528 mem2: Target address 2, must be in 4-byte alignment
529 size: Size (bytes), must be in 4-byte alignment
530
531 Returns: < 0: mem1 smaller than mem2.
532 = 0: mem1 equals mem2.
533 > 0: mem1 larger than mem2.
534 *---------------------------------------------------------------------------*/
MIi_CpuComp32(const void * mem1,const void * mem2,u32 size)535 int MIi_CpuComp32(const void *mem1, const void *mem2, u32 size)
536 {
537 const u32* p1 = mem1;
538 const u32* p2 = mem2;
539 const u32* p1end = (const u32*)( (const u8*)p1 + size );
540
541 for( ; p1 < p1end; ++p1, ++p2 )
542 {
543 const u32 v1 = *p1;
544 const u32 v2 = *p2;
545
546 if( v1 != v2 )
547 {
548 return (v1 < v2) ? -1: 1;
549 }
550 }
551
552 return 0;
553 }
554
555
556 /*---------------------------------------------------------------------------*
557 Name: MIi_CpuClearFast
558
559 Description: Fills memory with specified data.
560 High speed by writing 32 bytes at a time using stm.
561
562 Arguments: data: Fill data
563 destp: Destination address
564 size: Size (bytes)
565
566 Returns: None.
567 *---------------------------------------------------------------------------*/
MIi_CpuClearFast(register u32 data,register void * destp,register u32 size)568 asm void MIi_CpuClearFast( register u32 data, register void *destp, register u32 size )
569 {
570 stmfd sp!, {r4-r9}
571
572 add r9, r1, r2 // r9: destEndp = destp + size
573 mov r12, r2, lsr #5 // r12: destBlockEndp = destp + size/32*32
574 add r12, r1, r12, lsl #5
575
576 mov r2, r0
577 mov r3, r2
578 mov r4, r2
579 mov r5, r2
580 mov r6, r2
581 mov r7, r2
582 mov r8, r2
583
584 @40:
585 cmp r1, r12 // while (destp < destBlockEndp)
586 stmltia r1!, {r0, r2-r8} // *((vu32 *)(destp++)) = data
587 blt @40
588 @41:
589 cmp r1, r9 // while (destp < destEndp)
590 stmltia r1!, {r0} // *((vu32 *)(destp++)) = data
591 blt @41
592
593 ldmfd sp!, {r4-r9}
594 bx lr
595 }
596
597 /*---------------------------------------------------------------------------*
598 Name: MIi_CpuCopyFast
599
600 Description: Copies memory by CPU.
601 High speed by loading/writing 32byte at a time using stm/ldm.
602
603 Arguments: srcp: Source address
604 destp: Destination address
605 size: Size (bytes)
606
607 Returns: None.
608 *---------------------------------------------------------------------------*/
MIi_CpuCopyFast(register const void * srcp,register void * destp,register u32 size)609 asm void MIi_CpuCopyFast( register const void *srcp, register void *destp, register u32 size )
610 {
611 stmfd sp!, {r4-r10}
612
613 add r10, r1, r2 // r10: destEndp = destp + size
614 mov r12, r2, lsr #5 // r12: destBlockEndp = destp + size/32*32
615 add r12, r1, r12, lsl #5
616
617 @50:
618 cmp r1, r12 // while (destp < destBlockEndp)
619 ldmltia r0!, {r2-r9} // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
620 stmltia r1!, {r2-r9}
621 blt @50
622 @51:
623 cmp r1, r10 // while (destp < destEndp)
624 ldmltia r0!, {r2} // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
625 stmltia r1!, {r2}
626 blt @51
627
628 ldmfd sp!, {r4-r10}
629 bx lr
630 }
631
632 /*---------------------------------------------------------------------------*
633 Name: MIi_CpuSendFast
634
635 Description: Moves memory data (32 byte version).
636 High speed by loading 32 bytes at a time using ldm.
637
638 Arguments: src: Data stream to send
639 dest: Destination address, not incremented
640 size: Size (bytes)
641
642 Returns: None.
643 *---------------------------------------------------------------------------*/
MIi_CpuSendFast(register const void * srcp,register volatile void * destp,register u32 size)644 asm void MIi_CpuSendFast( register const void *srcp, register volatile void *destp, register u32 size )
645 {
646 stmfd sp!, {r4-r10}
647
648 add r10, r0, r2 // r10: destEndp = destp + size
649 mov r12, r2, lsr #5 // r12: destBlockEndp = destp + size/32*32
650 add r12, r0, r12, lsl #5
651
652 @50:
653 cmp r0, r12 // while (destp < destBlockEndp)
654 ldmltia r0!, {r2-r9} // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
655 strlt r2, [r1]
656 strlt r3, [r1]
657 strlt r4, [r1]
658 strlt r5, [r1]
659 strlt r6, [r1]
660 strlt r7, [r1]
661 strlt r8, [r1]
662 strlt r9, [r1]
663 blt @50
664 @51:
665 cmp r0, r10 // while (destp < destEndp)
666 ldmltia r0!, {r2} // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
667 strlt r2, [r1]
668 blt @51
669
670 ldmfd sp!, {r4-r10}
671 bx lr
672 }
673
674 /*---------------------------------------------------------------------------*
675 Name: MIi_CpuRecvFast
676
677 Description: Moves memory data (32-byte version).
678 High speed by writing 32 bytes at a time using stm.
679
680 Arguments: src: Source address. not incremented
681 dest: Data buffer to receive
682 size: Size (bytes)
683
684 Returns: None.
685 *---------------------------------------------------------------------------*/
MIi_CpuRecvFast(volatile const void * srcp,register void * destp,register u32 size)686 asm void MIi_CpuRecvFast(volatile const void *srcp, register void *destp, register u32 size)
687 {
688 stmfd sp!, {r4-r10}
689
690 add r10, r1, r2 // r10: destEndp = destp + size
691 mov r12, r2, lsr #5 // r12: destBlockEndp = destp + size/32*32
692 add r12, r1, r12, lsl #5
693
694 @50:
695 cmp r1, r12 // while (destp < destBlockEndp)
696 ldrlt r2, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
697 ldrlt r3, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
698 ldrlt r4, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
699 ldrlt r5, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
700 ldrlt r6, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
701 ldrlt r7, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
702 ldrlt r8, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
703 ldrlt r9, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
704 stmltia r1!, {r2-r9}
705 blt @50
706 @51:
707 cmp r1, r10 // while (destp < destEndp)
708 ldrlt r2, [r0] // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
709 stmltia r1!, {r2}
710 blt @51
711
712 ldmfd sp!, {r4-r10}
713 bx lr
714 }
715
716 /*---------------------------------------------------------------------------*
717 Name: MIi_CpuMoveFast
718
719 Description: Moves memory data (32-byte version).
720
721 Arguments: src: Source address, must be in 4-byte alignment
722 dest: Destination address, must be in 4-byte alignment
723 size: Size (byte), must be in 4-byte alignment
724
725 Returns: None.
726 *---------------------------------------------------------------------------*/
CpuCopyFastReverse(register const void * srcp,register void * destp,register u32 size)727 static asm void CpuCopyFastReverse( register const void *srcp, register void *destp, register u32 size )
728 {
729 stmfd sp!, {r4-r10}
730
731 mov r10, r1 // r10: destEndp = destp
732 mov r12, r2, lsr #5 // r12: destBlockBeginp = destp + (size/32)*32
733 add r12, r1, r12, lsl #5
734 add r0, r0, r2 // r0: srcp += size
735 add r1, r1, r2 // r1: destp += size
736
737 @52:
738 cmp r12, r1 // while( destBlockBeginP < destp )
739 ldrlt r2, [r0, #-4]! // *(--(vu32 *)(destp)) = *(--(vu32 *)(srcp))
740 strlt r2, [r1, #-4]!
741 blt @52
742 @53:
743 cmp r10, r1 // while (destEndp < destp)
744 ldmltdb r0!, {r2-r9}
745 stmltdb r1!, {r2-r9}
746 blt @53
747
748 ldmfd sp!, {r4-r10}
749 bx lr
750 }
751
MIi_CpuMoveFast(const void * src,void * dest,u32 size)752 void MIi_CpuMoveFast(const void *src, void *dest, u32 size)
753 {
754 if( ( (u32)dest <= (u32)src )
755 || ( (u32)src + size <= (u32)dest ) )
756 {
757 MIi_CpuCopyFast(src, dest, size);
758 }
759 else
760 {
761 CpuCopyFastReverse(src, dest, size);
762 }
763 }
764
765 //=======================================================================
766 // FOR CONVENIENCE (memory copy)
767 //=======================================================================
768 /*---------------------------------------------------------------------------*
769 Name: MI_Copy16B
770
771 Description: Copies 16-byte data by CPU.
772
773 Arguments: srcp: Source address
774 destp: Destination address
775
776 Returns: None.
777 *---------------------------------------------------------------------------*/
MI_Copy16B(register const void * pSrc,register void * pDest)778 asm void MI_Copy16B(register const void* pSrc, register void* pDest)
779 {
780 ldmia r0!, {r2, r3, r12} // r0-r3, r12 need not saved
781 stmia r1!, {r2, r3, r12}
782 ldmia r0!, {r2}
783 stmia r1!, {r2}
784
785 bx lr
786 }
787
788 /*---------------------------------------------------------------------------*
789 Name: MI_Copy32B
790
791 Description: Copies 32-byte data by CPU.
792
793 Arguments: srcp: Source address
794 destp: Destination address
795
796 Returns: None.
797 *---------------------------------------------------------------------------*/
MI_Copy32B(register const void * pSrc,register void * pDest)798 asm void MI_Copy32B(register const void* pSrc, register void* pDest)
799 {
800 ldmia r0!, {r2, r3, r12} // r0-r3, r12 need not saved
801 stmia r1!, {r2, r3, r12}
802 ldmia r0!, {r2, r3, r12}
803 stmia r1!, {r2, r3, r12}
804 ldmia r0!, {r2, r3}
805 stmia r1!, {r2, r3}
806
807 bx lr
808 }
809
810 /*---------------------------------------------------------------------------*
811 Name: MI_Copy36B
812
813 Description: Copies 36-byte data by CPU.
814
815 Arguments: srcp: Source address
816 destp: Destination address
817
818 Returns: None.
819 *---------------------------------------------------------------------------*/
MI_Copy36B(register const void * pSrc,register void * pDest)820 asm void MI_Copy36B(register const void* pSrc, register void* pDest)
821 {
822 ldmia r0!, {r2, r3, r12} // r0-r3, r12 need not saved
823 stmia r1!, {r2, r3, r12}
824 ldmia r0!, {r2, r3, r12}
825 stmia r1!, {r2, r3, r12}
826 ldmia r0!, {r2, r3, r12}
827 stmia r1!, {r2, r3, r12}
828
829 bx lr
830 }
831
832 /*---------------------------------------------------------------------------*
833 Name: MI_Copy48B
834
835 Description: Copies 48-byte data by CPU.
836
837 Arguments: srcp: Source address
838 destp: Destination address
839
840 Returns: None.
841 *---------------------------------------------------------------------------*/
MI_Copy48B(register const void * pSrc,register void * pDest)842 asm void MI_Copy48B(register const void* pSrc, register void* pDest)
843 {
844 ldmia r0!, {r2, r3, r12} // r0-r3, r12 need not saved
845 stmia r1!, {r2, r3, r12}
846 ldmia r0!, {r2, r3, r12}
847 stmia r1!, {r2, r3, r12}
848 ldmia r0!, {r2, r3, r12}
849 stmia r1!, {r2, r3, r12}
850 ldmia r0!, {r2, r3, r12}
851 stmia r1!, {r2, r3, r12}
852
853 bx lr
854 }
855
856 /*---------------------------------------------------------------------------*
857 Name: MI_Copy64B
858
859 Description: Copies 64-byte data by CPU.
860
861 Arguments: srcp: Source address
862 destp: Destination address
863
864 Returns: None.
865 *---------------------------------------------------------------------------*/
MI_Copy64B(register const void * pSrc,register void * pDest)866 asm void MI_Copy64B(register const void* pSrc, register void* pDest)
867 {
868 ldmia r0!, {r2, r3, r12} // r0-r3, r12 need not saved
869 stmia r1!, {r2, r3, r12}
870 ldmia r0!, {r2, r3, r12}
871 stmia r1!, {r2, r3, r12}
872 ldmia r0!, {r2, r3, r12}
873 stmia r1!, {r2, r3, r12}
874 ldmia r0!, {r2, r3, r12}
875 stmia r1!, {r2, r3, r12}
876 ldmia r0, {r0, r2, r3, r12}
877 stmia r1!, {r0, r2, r3, r12}
878
879 bx lr
880 }
881
882 /*---------------------------------------------------------------------------*
883 Name: MI_Copy128B
884
885 Description: Copies 128-byte data by CPU.
886
887 Arguments: srcp: Source address
888 destp: Destination address
889
890 Returns: None.
891 *---------------------------------------------------------------------------*/
MI_Copy128B(register const void * pSrc,register void * pDest)892 asm void MI_Copy128B(register const void* pSrc, register void* pDest)
893 {
894 stmfd sp!, {r4}
895
896 ldmia r0!, {r2, r3, r4, r12} // r0-r3, r12 need not saved
897 stmia r1!, {r2, r3, r4, r12}
898 ldmia r0!, {r2, r3, r4, r12}
899 stmia r1!, {r2, r3, r4, r12}
900 ldmia r0!, {r2, r3, r4, r12}
901 stmia r1!, {r2, r3, r4, r12}
902 ldmia r0!, {r2, r3, r4, r12}
903 stmia r1!, {r2, r3, r4, r12}
904 ldmia r0!, {r2, r3, r4, r12}
905 stmia r1!, {r2, r3, r4, r12}
906 ldmia r0!, {r2, r3, r4, r12}
907 stmia r1!, {r2, r3, r4, r12}
908 ldmia r0!, {r2, r3, r4, r12}
909 stmia r1!, {r2, r3, r4, r12}
910 ldmia r0!, {r2, r3, r4, r12}
911 stmia r1!, {r2, r3, r4, r12}
912
913 ldmfd sp!, {r4}
914 bx lr
915 }
916
917 //=======================================================================
918 // FOR SDK USE (needless set alignment)
919 //=======================================================================
920 /*---------------------------------------------------------------------------*
921 Name: MI_CpuFill8
922
923 Description: Fills memory with specified data.
924 Consider for alignment automatically.
925
926 Arguments: dstp: destination address
927 data: fill data
928 size: size (bytes)
929
930 Returns: None.
931 *---------------------------------------------------------------------------*/
932 #ifdef SDK_SMALL_BUILD
MI_CpuFill8(register void * dstp,register u8 data,register u32 size)933 asm void MI_CpuFill8( register void *dstp, register u8 data, register u32 size )
934 {
935 mov r12, #0 // n = 0
936 @1:
937 cmp r12, r2 // n < size ?
938 strltb r1, [r0, r12] // *((u8*)( dstp + n ) ) = data
939
940 addlt r12, r12, #1 // n ++
941 blt @1
942
943 bx lr
944 }
945 #else //ifdef SDK_SMALL_BUILD
MI_CpuFill8(register void * dstp,register u8 data,register u32 size)946 asm void MI_CpuFill8( register void *dstp, register u8 data, register u32 size )
947 {
948 cmp r2, #0
949 bxeq lr
950
951 // 16-bit alignment of dstp
952 tst r0, #1
953 beq @_1
954 #ifndef CW_BUG_FOR_LDRH_AND_STRH
955 ldrh r12, [r0, #-1]
956 #else
957 LDRH_AD4( HALFW_CONDAL, 12, 0, 1 ) // *** For CW BUG
958 #endif
959 and r12, r12, #0x00FF
960 orr r3, r12, r1, lsl #8
961 #ifndef CW_BUG_FOR_LDRH_AND_STRH
962 strh r3, [r0, #-1]
963 #else
964 STRH_AD4( HALFW_CONDAL, 3, 0, 1 ) // *** For CW BUG
965 #endif
966 add r0, r0, #1
967 subs r2, r2, #1
968 bxeq lr
969 @_1:
970
971 // 32-bit alignment
972 cmp r2, #2
973 bcc @_6
974 orr r1, r1, r1, lsl #8
975 tst r0, #2
976 beq @_8
977 #ifndef CW_BUG_FOR_LDRH_AND_STRH
978 strh r1, [r0], #2
979 #else
980 STRH_AD1( HALFW_CONDAL, 1, 0, 2 ) // *** For CW BUG
981 #endif
982 subs r2, r2, #2
983 bxeq lr
984 @_8:
985 // 32-bit transfer
986 orr r1, r1, r1, lsl #16
987 bics r3, r2, #3
988 beq @_10
989 sub r2, r2, r3
990 add r12, r3, r0
991 @_9:
992 str r1, [r0], #4
993 cmp r0, r12
994 bcc @_9
995
996 @_10:
997 // Last 16-bit transfer
998 tst r2, #2
999 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1000 strneh r1, [r0], #2
1001 #else
1002 STRH_AD1( HALFW_CONDNE, 1, 0, 2 ) // *** For CW BUG
1003 #endif
1004
1005 @_6:
1006 // Last 8-bit transfer
1007 tst r2, #1
1008 bxeq lr
1009 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1010 ldrh r3, [r0]
1011 #else
1012 LDRH_AD2( HALFW_CONDAL, 3, 0, 0 ) // *** For CW BUG
1013 #endif
1014 and r3, r3, #0xFF00
1015 and r1, r1, #0x00FF
1016 orr r1, r1, r3
1017 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1018 strh r1, [r0]
1019 #else
1020 STRH_AD2( HALFW_CONDAL, 1, 0, 0 ) // *** For CW BUG
1021 #endif
1022 bx lr
1023 }
1024
MI_CpuFill(register void * dstp,register u8 data,register u32 size)1025 asm void MI_CpuFill( register void *dstp, register u8 data, register u32 size )
1026 {
1027 cmp r2, #0
1028 bxeq lr
1029
1030 /* If less than 8 bytes, process directly here */
1031 cmp r2, #8
1032 bgt _fill_and_align
1033
1034 _fill1_less_than_equal_8:
1035 rsb r3, r2, #0x8
1036 add pc, pc, r3, lsl #2
1037 nop
1038 strb r1, [r0], #1
1039 strb r1, [r0], #1
1040 strb r1, [r0], #1
1041 strb r1, [r0], #1
1042 strb r1, [r0], #1
1043 strb r1, [r0], #1
1044 strb r1, [r0], #1
1045 strb r1, [r0], #1
1046 bx lr
1047
1048 _fill_and_align:
1049 /* If more than 8 bytes, process awareness of alignment */
1050
1051 /* Fill the register with 4 bytes */
1052 orr r1, r1, r1, lsl #8
1053 orr r1, r1, r1, lsl #16
1054
1055 /* Process the fractions at the leading end of dst first */
1056 tst r0, #1
1057 subne r2, r2, #1
1058 strneb r1, [r0], #1
1059
1060 tst r0, #2
1061 subne r2, r2, #2
1062 strneh r1, [r0], #2
1063
1064 tst r0, #4
1065 subne r2, r2, #4
1066 strne r1, [r0], #4
1067
1068 _fill32:
1069 cmp r2, #32
1070 blt _fill4
1071
1072 _fill32_pre:
1073 stmfd sp!, {r4-r10}
1074 mov r4, r1
1075 mov r5, r1
1076 mov r6, r1
1077 mov r7, r1
1078 mov r8, r1
1079 mov r9, r1
1080 mov r10, r1
1081 subs r2, r2, #32
1082
1083 _fill32_loop:
1084 stmgeia r0!, {r1,r4-r10}
1085 subges r2, r2, #32
1086 bge _fill32_loop
1087 add r2, r2, #32
1088
1089 _fill32_post:
1090 ldmfd sp!, {r4-r10}
1091
1092 _fill4:
1093 cmp r2, #4
1094 blt _fill1_less_than_4
1095 subs r2, r2, #4
1096
1097 _fill4_loop:
1098 strge r1, [r0], #4
1099 subs r2, r2, #4
1100 bge _fill4_loop
1101 add r2, r2, #4
1102
1103 _fill1_less_than_4:
1104 subs r2, r2, #1
1105 strgeb r1, [r0], #1
1106 subges r2, r2, #1
1107 strgeb r1, [r0], #1
1108 subges r2, r2, #1
1109 strgeb r1, [r0], #1
1110
1111 bx lr
1112 }
1113
1114 #endif // ifdef SDK_SMALL_BUILD
1115
1116 /*---------------------------------------------------------------------------*
1117 Name: MI_CpuCopy8
1118
1119 Description: Copies memory by CPU.
1120 Consider for alignment automatically.
1121
1122 Arguments: srcp: Source address
1123 dstp: Destination address
1124 size: Size (bytes)
1125
1126 Returns: None.
1127 *---------------------------------------------------------------------------*/
1128 #ifdef SDK_SMALL_BUILD
MI_CpuCopy8(register const void * srcp,register void * dstp,register u32 size)1129 asm void MI_CpuCopy8( register const void *srcp, register void *dstp, register u32 size )
1130 {
1131 mov r12, #0 // n = 0
1132 @1:
1133 cmp r12, r2 // n < size ?
1134 ldrltb r3, [r0, r12] // *((vu8 *)(destp + p)) = *((vu8 *)(srcp + n))
1135 strltb r3, [r1, r12]
1136
1137 addlt r12, r12, #1 // n ++
1138 blt @1
1139
1140 bx lr
1141 }
1142
1143 #else //ifdef SDK_SMALL_BUILD
MI_CpuCopy8(register const void * srcp,register void * dstp,register u32 size)1144 asm void MI_CpuCopy8( register const void *srcp, register void *dstp, register u32 size )
1145 {
1146 cmp r2, #0
1147 bxeq lr
1148
1149 // 16-bit alignment of dstp
1150 tst r1, #1
1151 beq @_1
1152 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1153 ldrh r12, [r1, #-1]
1154 #else
1155 LDRH_AD4( HALFW_CONDAL, 12, 1, 1 ) // *** For CW BUG
1156 #endif
1157 and r12, r12, #0x00FF
1158 tst r0, #1
1159 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1160 ldrneh r3, [r0, #-1]
1161 #else
1162 LDRH_AD4( HALFW_CONDNE, 3, 0, 1 ) // *** For CW BUG
1163 #endif
1164 movne r3, r3, lsr #8
1165 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1166 ldreqh r3, [r0]
1167 #else
1168 LDRH_AD2( HALFW_CONDEQ, 3, 0, 0 ) // *** For CW BUG
1169 #endif
1170 orr r3, r12, r3, lsl #8
1171 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1172 strh r3, [r1, #-1]
1173 #else
1174 STRH_AD4( HALFW_CONDAL, 3, 1, 1 ) // *** For CW BUG
1175 #endif
1176 add r0, r0, #1
1177 add r1, r1, #1
1178 subs r2, r2, #1
1179 bxeq lr
1180 @_1:
1181
1182 // Check the 16- or 32-bit synchronization of the address fraction
1183 eor r12, r1, r0
1184 tst r12, #1
1185 beq @_2
1186
1187 // Doesn't synchronize at all, so use irregular 16-bit transfer
1188 // tmp = *(u16*)src++ >> 8;
1189 // while((size -= 2) >= 0) {
1190 // tmp |= (*(u16*)src++ << 8);
1191 // *(u16*)dst++ = (u16)tmp;
1192 // tmp >>= 16;
1193 // }
1194 bic r0, r0, #1
1195 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1196 ldrh r12, [r0], #2
1197 #else
1198 LDRH_AD1( HALFW_CONDAL, 12, 0, 2 ) // *** For CW BUG
1199 #endif
1200 mov r3, r12, lsr #8
1201 subs r2, r2, #2
1202 bcc @_3
1203 @_4:
1204 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1205 ldrh r12, [r0], #2
1206 #else
1207 LDRH_AD1( HALFW_CONDAL, 12, 0, 2 ) // *** For CW BUG
1208 #endif
1209 orr r12, r3, r12, lsl #8
1210 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1211 strh r12, [r1], #2
1212 #else
1213 STRH_AD1( HALFW_CONDAL, 12, 1, 2 ) // *** For CW BUG
1214 #endif
1215 mov r3, r12, lsr #16
1216 subs r2, r2, #2
1217 bcs @_4
1218
1219 @_3:
1220 // if(size & 1)
1221 // *dst = (u16)((*dst & 0xFF00) | tmp);
1222 // return;
1223 tst r2, #1
1224 bxeq lr
1225 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1226 ldrh r12, [r1]
1227 #else
1228 LDRH_AD2( HALFW_CONDAL, 12, 1, 0 ) // *** For CW BUG
1229 #endif
1230 and r12, r12, #0xFF00
1231 orr r12, r12, r3
1232 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1233 strh r12, [r1]
1234 #else
1235 STRH_AD2( HALFW_CONDAL, 12, 1, 0 ) // *** For CW BUG
1236 #endif
1237 bx lr
1238
1239 @_2:
1240 tst r12, #2
1241 beq @_5
1242 // 16-bit transfer
1243 bics r3, r2, #1
1244 beq @_6
1245 sub r2, r2, r3
1246 add r12, r3, r1
1247 @_7:
1248 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1249 ldrh r3, [r0], #2
1250 #else
1251 LDRH_AD1( HALFW_CONDAL, 3, 0, 2 ) // *** For CW BUG
1252 #endif
1253 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1254 strh r3, [r1], #2
1255 #else
1256 STRH_AD1( HALFW_CONDAL, 3, 1, 2 ) // *** For CW BUG
1257 #endif
1258 cmp r1, r12
1259 bcc @_7
1260 b @_6
1261
1262 @_5:
1263 // 32-bit alignment
1264 cmp r2, #2
1265 bcc @_6
1266 tst r1, #2
1267 beq @_8
1268 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1269 ldrh r3, [r0], #2
1270 #else
1271 LDRH_AD1( HALFW_CONDAL, 3, 0, 2 ) // *** For CW BUG
1272 #endif
1273 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1274 strh r3, [r1], #2
1275 #else
1276 STRH_AD1( HALFW_CONDAL, 3, 1, 2 ) // *** For CW BUG
1277 #endif
1278 subs r2, r2, #2
1279 bxeq lr
1280 @_8:
1281 // 32-bit transfer
1282 bics r3, r2, #3
1283 beq @_10
1284 sub r2, r2, r3
1285 add r12, r3, r1
1286 @_9:
1287 ldr r3, [r0], #4
1288 str r3, [r1], #4
1289 cmp r1, r12
1290 bcc @_9
1291
1292 @_10:
1293 // Last 16-bit transfer
1294 tst r2, #2
1295 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1296 ldrneh r3, [r0], #2
1297 strneh r3, [r1], #2
1298 #else
1299 LDRH_AD1( HALFW_CONDNE, 3, 0, 2 ) // *** For CW BUG
1300 STRH_AD1( HALFW_CONDNE, 3, 1, 2 ) // *** For CW BUG
1301 #endif
1302
1303 @_6:
1304 // Last 8-bit transfer
1305 tst r2, #1
1306 bxeq lr
1307 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1308 ldrh r2, [r1]
1309 ldrh r0, [r0]
1310 #else
1311 LDRH_AD2( HALFW_CONDAL, 2, 1, 0 ) // *** For CW BUG
1312 LDRH_AD2( HALFW_CONDAL, 0, 0, 0 ) // *** For CW BUG
1313 #endif
1314 and r2, r2, #0xFF00
1315 and r0, r0, #0x00FF
1316 orr r0, r2, r0
1317 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1318 strh r0, [r1]
1319 #else
1320 STRH_AD2( HALFW_CONDAL, 0, 1, 0 ) // *** For CW BUG
1321 #endif
1322 bx lr
1323 }
1324 #endif //ifdef SDK_SMALL_BUILD
1325
1326
1327
1328 /*---------------------------------------------------------------------------*
1329 Name: MI_CpuFind8
1330
1331 Description: Finds memory data (8-bit version).
1332
1333 Arguments: src: Source address, no limitation for alignment
1334 data: Target data
1335 size: Size (byte), no limitation for alignment
1336
1337 Returns: Pointer to found data or NULL.
1338 *---------------------------------------------------------------------------*/
MI_CpuFind8(const void * src,u8 data,u32 size)1339 void* MI_CpuFind8(const void *src, u8 data, u32 size)
1340 {
1341 const u8* p8 = (const u8*)src;
1342
1343 if( size == 0 )
1344 {
1345 return NULL;
1346 }
1347
1348 // If the address is not 2-byte aligned
1349 // Check only 1 byte and align in 2 bytes
1350 if( ((u32)p8 & 0x1) != 0 )
1351 {
1352 const u16 v = *(u16*)(p8 - 1);
1353
1354 if( (v >> 8) == data )
1355 {
1356 return (void*)p8;
1357 }
1358
1359 size--;
1360 p8++;
1361 }
1362
1363 // Check in 2-byte units
1364 {
1365 const u16* p16 = (const u16*)p8;
1366 const u16* p16end = p16 + MATH_ROUNDDOWN(size, 2);
1367
1368 for( ; p16 < p16end; ++p16 )
1369 {
1370 const u16 v = *p16;
1371
1372 if( (v & 0xFF) == data )
1373 {
1374 return (void*)( (u8*)p16 + 0 );
1375 }
1376 if( (v >> 8) == data )
1377 {
1378 return (void*)( (u8*)p16 + 1 );
1379 }
1380 }
1381 }
1382
1383 // At this point the size is an odd number
1384 // Check remaining 1 byte
1385 if( (size & 0x1) != 0 )
1386 {
1387 const u16 v = *(u16*)(p8 + size - 1);
1388
1389 if( (v & 0xFF) == data )
1390 {
1391 return (void*)(p8 + size - 1);
1392 }
1393 }
1394
1395 return NULL;
1396 }
1397
1398 /*---------------------------------------------------------------------------*
1399 Name: MI_CpuComp8
1400
1401 Description: Compares memory data (8-bit version).
1402
1403 Arguments: mem1: Target address 1, no limitation for alignment
1404 mem2: Target address 2, no limitation for alignment
1405 size: Size (byte), no limitation for alignment
1406
1407 Returns: < 0: mem1 smaller than mem2.
1408 = 0: mem1 equals mem2.
1409 > 0: mem1 larger than mem2.
1410 *---------------------------------------------------------------------------*/
MI_CpuComp8(const void * mem1,const void * mem2,u32 size)1411 int MI_CpuComp8(const void *mem1, const void *mem2, u32 size)
1412 {
1413 const u8* p1 = mem1;
1414 const u8* p2 = mem2;
1415 const u8* p1end = (const u8*)( (const u8*)p1 + size );
1416
1417 while( p1 < p1end )
1418 {
1419 const int d = (int)*p1++ - (int)*p2++;
1420
1421 if( d != 0 )
1422 {
1423 return d;
1424 }
1425 }
1426
1427 return 0;
1428 }
1429
1430 /*---------------------------------------------------------------------------*
1431 Name: MI_CpuCopy
1432
1433 Description: Copies memory by CPU.
1434 Byte access/ldm-stm version.
1435
1436 Arguments: srcp: Source address
1437 destp: Destination address.
1438 size: Size (bytes)
1439
1440 Returns: None.
1441 *---------------------------------------------------------------------------*/
1442 #if PLATFORM_BYTES_ENDIAN == PLATFORM_ENDIAN_LITTLE
1443 /* Little-endian */
1444 #define FORWARD_(n) lsl #((n) * 8)
1445 #define BACKWARD_(n) lsr #(32 - (n) * 8)
1446 #define FORWARD_MASK(n) #((1 << ((n) * 8)) - 1)
1447 #elif PLATFORM_BYTES_ENDIAN == PLATFORM_ENDIAN_BIG
1448 /* Big-endian */
1449 #define FORWARD_(n) lsr #((n) * 8)
1450 #define BACKWARD_(n) lsl #(32 - (n) * 8)
1451 #define FORWARD_MASK(n) #((1 << ((n) * 8)) - 1)
1452 #else
1453 #error
1454 #endif
1455
MI_CpuCopy(register const void * srcp,register void * destp,register u32 size)1456 asm void MI_CpuCopy( register const void *srcp, register void *destp, register u32 size )
1457 {
1458 /* Sequentially transfer from the front */
1459
1460 /* If less than 8 bytes, process directly here */
1461 cmp r2, #8
1462 bgt _forward_blt
1463 rsb r3, r2, #0x8
1464 add pc, pc, r3, lsl #3
1465 nop
1466 ldrb r3, [r0], #1
1467 strb r3, [r1], #1
1468 ldrb r3, [r0], #1
1469 strb r3, [r1], #1
1470 ldrb r3, [r0], #1
1471 strb r3, [r1], #1
1472 ldrb r3, [r0], #1
1473 strb r3, [r1], #1
1474 ldrb r3, [r0], #1
1475 strb r3, [r1], #1
1476 ldrb r3, [r0], #1
1477 strb r3, [r1], #1
1478 ldrb r3, [r0], #1
1479 strb r3, [r1], #1
1480 ldrb r3, [r0], #1
1481 strb r3, [r1], #1
1482 bx lr
1483
1484 _forward_blt:
1485 /* If more than 8 bytes, process awareness of alignment */
1486
1487 /* Process the fractions at the leading end of src first */
1488 tst r0, #1
1489 subne r2, r2, #1
1490 ldrneb r3, [r0], #1
1491 strneb r3, [r1], #1
1492 tst r0, #2
1493 subne r2, r2, #2
1494 ldrneb r3, [r0], #1
1495 strneb r3, [r1], #1
1496 ldrneb r3, [r0], #1
1497 strneb r3, [r1], #1
1498
1499 /* Branch processing according to the phases of src and dst */
1500 and r3, r1, #3
1501 bic r1, r1, #3
1502 cmp r3, #0
1503 beq _forward_blt_0
1504 cmp r3, #1
1505 beq _forward_blt_1
1506 cmp r3, #2
1507 beq _forward_blt_2
1508 b _forward_blt_3
1509
1510 _forward_blt_0:
1511 /* Processing when (dst & 3 == 0) */
1512 stmfd sp!, {r4-r10,lr}
1513 subs r2, r2, #32
1514 _forward_blt_0_32:
1515 ldmgeia r0!, {r4-r10,lr}
1516 stmgeia r1!, {r4-r10,lr}
1517 subges r2, r2, #32
1518 bge _forward_blt_0_32
1519 add r2, r2, #32
1520 ldmfd sp!, {r4-r10,lr}
1521
1522 subs r2, r2, #4
1523 _forward_blt_0_4:
1524 ldrge r3, [r0], #4
1525 strge r3, [r1], #4
1526 subges r2, r2, #4
1527 bge _forward_blt_0_4
1528 add r2, r2, #4
1529 b _forward_blt_end
1530
1531 _forward_blt_1:
1532 /* Processing when (dst & 3 == 1) */
1533 #define SHIFT 1
1534 /* ldm-shift-stm in 32-byte units */
1535 ldr r12, [r1]
1536 mov r12, r12, FORWARD_(4 - SHIFT)
1537 mov r12, r12, BACKWARD_(SHIFT)
1538 stmfd sp!, {r4-r10,lr}
1539 subs r2, r2, #32
1540 _forward_blt_1_32:
1541 ldmgeia r0!, {r4-r10,lr}
1542 movge r3, r4, BACKWARD_(SHIFT)
1543 orrge r4, r12, r4, FORWARD_(SHIFT)
1544 movge r12, r5, BACKWARD_(SHIFT)
1545 orrge r5, r3, r5, FORWARD_(SHIFT)
1546 movge r3, r6, BACKWARD_(SHIFT)
1547 orrge r6, r12, r6, FORWARD_(SHIFT)
1548 movge r12, r7, BACKWARD_(SHIFT)
1549 orrge r7, r3, r7, FORWARD_(SHIFT)
1550 movge r3, r8, BACKWARD_(SHIFT)
1551 orrge r8, r12, r8, FORWARD_(SHIFT)
1552 movge r12, r9, BACKWARD_(SHIFT)
1553 orrge r9, r3, r9, FORWARD_(SHIFT)
1554 movge r3, r10, BACKWARD_(SHIFT)
1555 orrge r10, r12, r10, FORWARD_(SHIFT)
1556 movge r12, lr, BACKWARD_(SHIFT)
1557 orrge lr, r3, lr, FORWARD_(SHIFT)
1558 stmgeia r1!, {r4-r10,lr}
1559 subges r2, r2, #32
1560 bge _forward_blt_1_32
1561 add r2, r2, #32
1562 ldmfd sp!, {r4-r10,lr}
1563 /* ldr-shift-str in 4-byte units */
1564 subs r2, r2, #4
1565 _forward_blt_1_4:
1566 ldrge r3, [r0], #4
1567 orrge r12, r12, r3, FORWARD_(SHIFT)
1568 strge r12, [r1], #4
1569 movge r12, r3, BACKWARD_(SHIFT)
1570 subges r2, r2, #4
1571 bge _forward_blt_1_4
1572 add r2, r2, #4
1573 /* Shared end process */
1574 sub r0, r0, #SHIFT
1575 add r2, r2, #SHIFT
1576 b _forward_blt_end
1577 #undef SHIFT
1578
1579 _forward_blt_2:
1580 /* Processing when (dst & 3 == 2) */
1581 #define SHIFT 2
1582 /* ldm-shift-stm in 32-byte units */
1583 ldr r12, [r1]
1584 mov r12, r12, FORWARD_(4 - SHIFT)
1585 mov r12, r12, BACKWARD_(SHIFT)
1586 stmfd sp!, {r4-r10,lr}
1587 subs r2, r2, #32
1588 _forward_blt_2_32:
1589 ldmgeia r0!, {r4-r10,lr}
1590 movge r3, r4, BACKWARD_(SHIFT)
1591 orrge r4, r12, r4, FORWARD_(SHIFT)
1592 movge r12, r5, BACKWARD_(SHIFT)
1593 orrge r5, r3, r5, FORWARD_(SHIFT)
1594 movge r3, r6, BACKWARD_(SHIFT)
1595 orrge r6, r12, r6, FORWARD_(SHIFT)
1596 movge r12, r7, BACKWARD_(SHIFT)
1597 orrge r7, r3, r7, FORWARD_(SHIFT)
1598 movge r3, r8, BACKWARD_(SHIFT)
1599 orrge r8, r12, r8, FORWARD_(SHIFT)
1600 movge r12, r9, BACKWARD_(SHIFT)
1601 orrge r9, r3, r9, FORWARD_(SHIFT)
1602 movge r3, r10, BACKWARD_(SHIFT)
1603 orrge r10, r12, r10, FORWARD_(SHIFT)
1604 movge r12, lr, BACKWARD_(SHIFT)
1605 orrge lr, r3, lr, FORWARD_(SHIFT)
1606 stmgeia r1!, {r4-r10,lr}
1607 subges r2, r2, #32
1608 bge _forward_blt_2_32
1609 add r2, r2, #32
1610 ldmfd sp!, {r4-r10,lr}
1611 /* ldr-shift-str in 4-byte units */
1612 subs r2, r2, #4
1613 _forward_blt_2_4:
1614 ldrge r3, [r0], #4
1615 orrge r12, r12, r3, FORWARD_(SHIFT)
1616 strge r12, [r1], #4
1617 movge r12, r3, BACKWARD_(SHIFT)
1618 subges r2, r2, #4
1619 bge _forward_blt_2_4
1620 add r2, r2, #4
1621 /* Shared end process */
1622 sub r0, r0, #SHIFT
1623 add r2, r2, #SHIFT
1624 b _forward_blt_end
1625 #undef SHIFT
1626
1627 _forward_blt_3:
1628 /* Processing when (dst & 3 == 3) */
1629 #define SHIFT 3
1630 /* ldm-shift-stm in 32-byte units */
1631 ldr r12, [r1]
1632 mov r12, r12, FORWARD_(4 - SHIFT)
1633 mov r12, r12, BACKWARD_(SHIFT)
1634 stmfd sp!, {r4-r10,lr}
1635 subs r2, r2, #32
1636 _forward_blt_3_32:
1637 ldmgeia r0!, {r4-r10,lr}
1638 movge r3, r4, BACKWARD_(SHIFT)
1639 orrge r4, r12, r4, FORWARD_(SHIFT)
1640 movge r12, r5, BACKWARD_(SHIFT)
1641 orrge r5, r3, r5, FORWARD_(SHIFT)
1642 movge r3, r6, BACKWARD_(SHIFT)
1643 orrge r6, r12, r6, FORWARD_(SHIFT)
1644 movge r12, r7, BACKWARD_(SHIFT)
1645 orrge r7, r3, r7, FORWARD_(SHIFT)
1646 movge r3, r8, BACKWARD_(SHIFT)
1647 orrge r8, r12, r8, FORWARD_(SHIFT)
1648 movge r12, r9, BACKWARD_(SHIFT)
1649 orrge r9, r3, r9, FORWARD_(SHIFT)
1650 movge r3, r10, BACKWARD_(SHIFT)
1651 orrge r10, r12, r10, FORWARD_(SHIFT)
1652 movge r12, lr, BACKWARD_(SHIFT)
1653 orrge lr, r3, lr, FORWARD_(SHIFT)
1654 stmgeia r1!, {r4-r10,lr}
1655 subges r2, r2, #32
1656 bge _forward_blt_3_32
1657 add r2, r2, #32
1658 ldmfd sp!, {r4-r10,lr}
1659 /* ldr-shift-str in 4-byte units */
1660 subs r2, r2, #4
1661 _forward_blt_3_4:
1662 ldrge r3, [r0], #4
1663 orrge r12, r12, r3, FORWARD_(SHIFT)
1664 strge r12, [r1], #4
1665 movge r12, r3, BACKWARD_(SHIFT)
1666 subges r2, r2, #4
1667 bge _forward_blt_3_4
1668 add r2, r2, #4
1669 /* Shared end process */
1670 sub r0, r0, #SHIFT
1671 add r2, r2, #SHIFT
1672 b _forward_blt_end
1673 #undef SHIFT
1674
1675 _forward_blt_end:
1676 /* Transfer the end fraction */
1677 tst r2, #4
1678 ldrneb r3, [r0], #1
1679 strneb r3, [r1], #1
1680 ldrneb r3, [r0], #1
1681 strneb r3, [r1], #1
1682 ldrneb r3, [r0], #1
1683 strneb r3, [r1], #1
1684 ldrneb r3, [r0], #1
1685 strneb r3, [r1], #1
1686 tst r2, #2
1687 ldrneb r3, [r0], #1
1688 strneb r3, [r1], #1
1689 ldrneb r3, [r0], #1
1690 strneb r3, [r1], #1
1691 tst r2, #1
1692 ldrneb r3, [r0], #1
1693 strneb r3, [r1], #1
1694 bx lr
1695 }
1696
1697 /*---------------------------------------------------------------------------*
1698 Name: MI_CpuMove
1699
1700 Description: Moves memory data (mixed version).
1701
1702 Arguments: srcp: Source address
1703 destp: Destination address
1704 size: Size (bytes)
1705
1706 Returns: None.
1707 *---------------------------------------------------------------------------*/
MI_CpuMove(register const void * srcp,register void * destp,register u32 size)1708 asm void MI_CpuMove( register const void *srcp, register void *destp, register u32 size )
1709 {
1710 /* Simply determines whether transfer is really necessary and determines the transfer direction */
1711 cmp r2, #0
1712 subnes r3, r0, r1
1713 bxeq lr
1714 bgt MI_CpuCopy
1715
1716 /*
1717 * NOTE:
1718 * _forward_blt_1, _forward_blt_2, _forward_blt_3 and _backward_blt_1, _backward_blt_2, _backward_blt_3 switch constant SHIFT to 1, 2, 3 and have the same processes.
1719 *
1720 *
1721 * If you know a method to describe several lines of asm code with arm-elf-gcc using a macro, please do it all together.
1722 *
1723 * It is also acceptable to consume one empty register for sharing.
1724 * (However, when the shifting amount is specified with a register, one cycle will be increased, so in exchange for size conservation, processing costs will increase.
1725 *
1726 * If that is true, there is no need to go to the trouble to describe with asm.)
1727 */
1728
1729 _backward:
1730 /* Sequentially transfer from the back end */
1731 add r1, r1, r2
1732 add r0, r0, r2
1733
1734 /* If less than 8 bytes, process directly here */
1735 cmp r2, #8
1736 bgt _backward_blt
1737 rsb r3, r2, #0x8
1738 add pc, pc, r3, lsl #3
1739 nop
1740 ldrb r3, [r0, #-1]!
1741 strb r3, [r1, #-1]!
1742 ldrb r3, [r0, #-1]!
1743 strb r3, [r1, #-1]!
1744 ldrb r3, [r0, #-1]!
1745 strb r3, [r1, #-1]!
1746 ldrb r3, [r0, #-1]!
1747 strb r3, [r1, #-1]!
1748 ldrb r3, [r0, #-1]!
1749 strb r3, [r1, #-1]!
1750 ldrb r3, [r0, #-1]!
1751 strb r3, [r1, #-1]!
1752 ldrb r3, [r0, #-1]!
1753 strb r3, [r1, #-1]!
1754 ldrb r3, [r0, #-1]!
1755 strb r3, [r1, #-1]!
1756 bx lr
1757
1758 _backward_blt:
1759 /* If more than 8 bytes, process awareness of alignment */
1760
1761 /* Process the fractions at the trailing end of src first */
1762 tst r0, #2
1763 subne r2, r2, #2
1764 ldrneb r3, [r0, #-1]!
1765 strneb r3, [r1, #-1]!
1766 ldrneb r3, [r0, #-1]!
1767 strneb r3, [r1, #-1]!
1768 tst r0, #1
1769 subne r2, r2, #1
1770 ldrneb r3, [r0, #-1]!
1771 strneb r3, [r1, #-1]!
1772
1773 /* Branch processing according to the phases of src and dst */
1774 and r3, r1, #3
1775 bic r1, r1, #3
1776 cmp r3, #0
1777 beq _backward_blt_0
1778 cmp r3, #1
1779 beq _backward_blt_1
1780 cmp r3, #2
1781 beq _backward_blt_2
1782 b _backward_blt_3
1783
1784 _backward_blt_0:
1785 /* Processing when (dst & 3 == 0) */
1786 stmfd sp!, {r4-r10,lr}
1787 subs r2, r2, #32
1788 _backward_blt_0_32:
1789 ldmgedb r0!, {r4-r10,lr}
1790 stmgedb r1!, {r4-r10,lr}
1791 subges r2, r2, #32
1792 bge _backward_blt_0_32
1793 add r2, r2, #32
1794 ldmfd sp!, {r4-r10,lr}
1795
1796 subs r2, r2, #4
1797 _backward_blt_0_4:
1798 ldrge r3, [r0, #-4]!
1799 strge r3, [r1, #-4]!
1800 subges r2, r2, #4
1801 bge _backward_blt_0_4
1802 add r2, r2, #4
1803 b _backward_blt_end
1804
1805 _backward_blt_1:
1806 /* Processing when (dst & 3 == 1) */
1807 #define SHIFT 1
1808 /* ldm-shift-stm in 32-byte units */
1809 ldr r12, [r1]
1810 mov r12, r12, BACKWARD_(4 - SHIFT)
1811 mov r12, r12, FORWARD_(SHIFT)
1812 stmfd sp!, {r4-r10,lr}
1813 subs r2, r2, #32
1814 _backward_blt_1_32:
1815 ldmgedb r0!, {r4-r10,lr}
1816 movge r3, lr, FORWARD_(SHIFT)
1817 orrge lr, r12, lr, BACKWARD_(SHIFT)
1818 movge r12, r10, FORWARD_(SHIFT)
1819 orrge r10, r3, r10, BACKWARD_(SHIFT)
1820 movge r3, r9, FORWARD_(SHIFT)
1821 orrge r9, r12, r9, BACKWARD_(SHIFT)
1822 movge r12, r8, FORWARD_(SHIFT)
1823 orrge r8, r3, r8, BACKWARD_(SHIFT)
1824 movge r3, r7, FORWARD_(SHIFT)
1825 orrge r7, r12, r7, BACKWARD_(SHIFT)
1826 movge r12, r6, FORWARD_(SHIFT)
1827 orrge r6, r3, r6, BACKWARD_(SHIFT)
1828 movge r3, r5, FORWARD_(SHIFT)
1829 orrge r5, r12, r5, BACKWARD_(SHIFT)
1830 movge r12, r4, FORWARD_(SHIFT)
1831 orrge r4, r3, r4, BACKWARD_(SHIFT)
1832 stmgeda r1!, {r4-r10,lr}
1833 subges r2, r2, #32
1834 bge _backward_blt_1_32
1835 add r2, r2, #32
1836 ldmfd sp!, {r4-r10,lr}
1837 /* ldr-shift-str in 4-byte units */
1838 subs r2, r2, #4
1839 _backward_blt_1_4:
1840 ldrge r3, [r0, #-4]!
1841 orrge r12, r12, r3, BACKWARD_(SHIFT)
1842 strge r12, [r1], #-4
1843 movge r12, r3, FORWARD_(SHIFT)
1844 subges r2, r2, #4
1845 bge _backward_blt_1_4
1846 add r2, r2, #4
1847 /* Shared end process */
1848 add r1, r1, #4
1849 add r0, r0, #(4 - SHIFT)
1850 add r2, r2, #(4 - SHIFT)
1851 b _backward_blt_end
1852 #undef SHIFT
1853
1854 _backward_blt_2:
1855 /* Processing when (dst & 3 == 2) */
1856 #define SHIFT 2
1857 /* ldm-shift-stm in 32-byte units */
1858 ldr r12, [r1]
1859 mov r12, r12, BACKWARD_(4 - SHIFT)
1860 mov r12, r12, FORWARD_(SHIFT)
1861 stmfd sp!, {r4-r10,lr}
1862 subs r2, r2, #32
1863 _backward_blt_2_32:
1864 ldmgedb r0!, {r4-r10,lr}
1865 movge r3, lr, FORWARD_(SHIFT)
1866 orrge lr, r12, lr, BACKWARD_(SHIFT)
1867 movge r12, r10, FORWARD_(SHIFT)
1868 orrge r10, r3, r10, BACKWARD_(SHIFT)
1869 movge r3, r9, FORWARD_(SHIFT)
1870 orrge r9, r12, r9, BACKWARD_(SHIFT)
1871 movge r12, r8, FORWARD_(SHIFT)
1872 orrge r8, r3, r8, BACKWARD_(SHIFT)
1873 movge r3, r7, FORWARD_(SHIFT)
1874 orrge r7, r12, r7, BACKWARD_(SHIFT)
1875 movge r12, r6, FORWARD_(SHIFT)
1876 orrge r6, r3, r6, BACKWARD_(SHIFT)
1877 movge r3, r5, FORWARD_(SHIFT)
1878 orrge r5, r12, r5, BACKWARD_(SHIFT)
1879 movge r12, r4, FORWARD_(SHIFT)
1880 orrge r4, r3, r4, BACKWARD_(SHIFT)
1881 stmgeda r1!, {r4-r10,lr}
1882 subges r2, r2, #32
1883 bge _backward_blt_2_32
1884 add r2, r2, #32
1885 ldmfd sp!, {r4-r10,lr}
1886 /* ldr-shift-str in 4-byte units */
1887 subs r2, r2, #4
1888 _backward_blt_2_4:
1889 ldrge r3, [r0, #-4]!
1890 orrge r12, r12, r3, BACKWARD_(SHIFT)
1891 strge r12, [r1], #-4
1892 movge r12, r3, FORWARD_(SHIFT)
1893 subges r2, r2, #4
1894 bge _backward_blt_2_4
1895 add r2, r2, #4
1896 /* Shared end process */
1897 add r1, r1, #4
1898 add r0, r0, #(4 - SHIFT)
1899 add r2, r2, #(4 - SHIFT)
1900 b _backward_blt_end
1901 #undef SHIFT
1902
1903 _backward_blt_3:
1904 /* Processing when (dst & 3 == 3) */
1905 #define SHIFT 3
1906 /* ldm-shift-stm in 32-byte units */
1907 ldr r12, [r1]
1908 mov r12, r12, BACKWARD_(4 - SHIFT)
1909 mov r12, r12, FORWARD_(SHIFT)
1910 stmfd sp!, {r4-r10,lr}
1911 subs r2, r2, #32
1912 _backward_blt_3_32:
1913 ldmgedb r0!, {r4-r10,lr}
1914 movge r3, lr, FORWARD_(SHIFT)
1915 orrge lr, r12, lr, BACKWARD_(SHIFT)
1916 movge r12, r10, FORWARD_(SHIFT)
1917 orrge r10, r3, r10, BACKWARD_(SHIFT)
1918 movge r3, r9, FORWARD_(SHIFT)
1919 orrge r9, r12, r9, BACKWARD_(SHIFT)
1920 movge r12, r8, FORWARD_(SHIFT)
1921 orrge r8, r3, r8, BACKWARD_(SHIFT)
1922 movge r3, r7, FORWARD_(SHIFT)
1923 orrge r7, r12, r7, BACKWARD_(SHIFT)
1924 movge r12, r6, FORWARD_(SHIFT)
1925 orrge r6, r3, r6, BACKWARD_(SHIFT)
1926 movge r3, r5, FORWARD_(SHIFT)
1927 orrge r5, r12, r5, BACKWARD_(SHIFT)
1928 movge r12, r4, FORWARD_(SHIFT)
1929 orrge r4, r3, r4, BACKWARD_(SHIFT)
1930 stmgeda r1!, {r4-r10,lr}
1931 subges r2, r2, #32
1932 bge _backward_blt_3_32
1933 add r2, r2, #32
1934 ldmfd sp!, {r4-r10,lr}
1935 /* ldr-shift-str in 4-byte units */
1936 subs r2, r2, #4
1937 _backward_blt_3_4:
1938 ldrge r3, [r0, #-4]!
1939 orrge r12, r12, r3, BACKWARD_(SHIFT)
1940 strge r12, [r1], #-4
1941 movge r12, r3, FORWARD_(SHIFT)
1942 subges r2, r2, #4
1943 bge _backward_blt_3_4
1944 add r2, r2, #4
1945 /* Shared end process */
1946 add r1, r1, #4
1947 add r0, r0, #(4 - SHIFT)
1948 add r2, r2, #(4 - SHIFT)
1949 b _backward_blt_end
1950 #undef SHIFT
1951
1952 _backward_blt_end:
1953 /* Transfer the leading end fraction */
1954 tst r2, #4
1955 ldrneb r3, [r0, #-1]!
1956 strneb r3, [r1, #-1]!
1957 ldrneb r3, [r0, #-1]!
1958 strneb r3, [r1, #-1]!
1959 ldrneb r3, [r0, #-1]!
1960 strneb r3, [r1, #-1]!
1961 ldrneb r3, [r0, #-1]!
1962 strneb r3, [r1, #-1]!
1963 tst r2, #2
1964 ldrneb r3, [r0, #-1]!
1965 strneb r3, [r1, #-1]!
1966 ldrneb r3, [r0, #-1]!
1967 strneb r3, [r1, #-1]!
1968 tst r2, #1
1969 ldrneb r3, [r0, #-1]!
1970 strneb r3, [r1, #-1]!
1971 bx lr
1972 }
1973
1974 #undef FORWARD_
1975 #undef BACKWARD_
1976 #undef FORWARD_MASK
1977
1978 #include <nitro/codereset.h>
1979
1980
1981 #include <nitro/code16.h>
1982 //=======================================================================
1983 // FOR CONVENIENCE (filling zero)
1984 //=======================================================================
1985 /*---------------------------------------------------------------------------*
1986 Name: MI_Zero32B
1987
1988 Description: Fills 32-byte area with 0 by CPU.
1989
1990 Arguments: pDest: Destination address
1991
1992 Returns: None.
1993 *---------------------------------------------------------------------------*/
MI_Zero32B(register void * pDest)1994 asm void MI_Zero32B(register void* pDest)
1995 {
1996 mov r1, #0
1997 mov r2, #0
1998 stmia r0!, {r1, r2}
1999 mov r3, #0
2000 stmia r0!, {r1, r2, r3}
2001 stmia r0!, {r1, r2, r3}
2002
2003 bx lr
2004 }
2005
2006 /*---------------------------------------------------------------------------*
2007 Name: MI_Zero36B
2008
2009 Description: Fills 36-byte area with 0 by CPU.
2010
2011 Arguments: pDest: Destination address
2012
2013 Returns: None.
2014 *---------------------------------------------------------------------------*/
MI_Zero36B(register void * pDest)2015 asm void MI_Zero36B(register void* pDest)
2016 {
2017 mov r1, #0
2018 mov r2, #0
2019 mov r3, #0
2020 stmia r0!, {r1, r2, r3}
2021 stmia r0!, {r1, r2, r3}
2022 stmia r0!, {r1, r2, r3}
2023
2024 bx lr
2025 }
2026
2027 /*---------------------------------------------------------------------------*
2028 Name: MI_Zero48B
2029
2030 Description: Fills 48-byte area with 0 by CPU.
2031
2032 Arguments: pDest: Destination address
2033
2034 Returns: None.
2035 *---------------------------------------------------------------------------*/
MI_Zero48B(register void * pDest)2036 asm void MI_Zero48B(register void* pDest)
2037 {
2038 mov r1, #0
2039 mov r2, #0
2040 mov r3, #0
2041 stmia r0!, {r1, r2, r3}
2042 stmia r0!, {r1, r2, r3}
2043 stmia r0!, {r1, r2, r3}
2044 stmia r0!, {r1, r2, r3}
2045
2046 bx lr
2047 }
2048
2049 /*---------------------------------------------------------------------------*
2050 Name: MI_Zero64B
2051
2052 Description: Fills 64-byte area with 0 by CPU.
2053
2054 Arguments: pDest: Destination address
2055
2056 Returns: None.
2057 *---------------------------------------------------------------------------*/
MI_Zero64B(register void * pDest)2058 asm void MI_Zero64B(register void* pDest)
2059 {
2060 mov r1, #0
2061 mov r2, #0
2062 stmia r0!, {r1, r2}
2063 mov r3, #0
2064 stmia r0!, {r1, r2}
2065 stmia r0!, {r1, r2, r3}
2066 stmia r0!, {r1, r2, r3}
2067 stmia r0!, {r1, r2, r3}
2068 stmia r0!, {r1, r2, r3}
2069
2070 bx lr
2071 }
2072
2073 //---- End limitation of THUMB-Mode
2074 #include <nitro/codereset.h>
2075