1 /*---------------------------------------------------------------------------*
2   Project:  TwlSDK - MI
3   File:     mi_memory.c
4 
5   Copyright 2003-2008 Nintendo. All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law. They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Date:: 2008-09-17#$
14   $Rev: 8556 $
15   $Author: okubata_ryoma $
16  *---------------------------------------------------------------------------*/
17 
18 #include <nitro/types.h>
19 #include <nitro/platform.h>
20 #include <nitro/mi/memory.h>
21 #include <nitro/math/math.h>
22 
23 
24 //****Bug fix****
25 //  Because halfword access instructions such as ldrh and strh are not passed through by the inline assembler due to a CW bug, instruction values are written directly using dcd in order to avoid this bug.
26 //
27 //  When the bug is fixed, the 'define' below will be removed.
28 
29 //  Fixed with CodeWarrior 0.4 or later
30 //#define CW_BUG_FOR_LDRH_AND_STRH
31 
32 
33 #define HALFW_CONDAL  0xe0000000       // Condition(ALL)
34 #define HALFW_CONDNE  0x10000000       // Condition(NE)
35 #define HALFW_CONDEQ  0x00000000       // Condition(EQ)
36 
37 #define HALFW_OFF_PL  0x00800000       // Offset plus
38 #define HALFW_OFF_MI  0x00000000       // Offset minus
39 #define HALFW_LOAD    0x00100000       // Load
40 #define HALFW_STORE   0x00000000       // Store
41 #define HALFW_RN(n)   ((n)<<16)        // Register Rn
42 #define HALFW_RD(n)   ((n)<<12)        // Register Rd
43 
44 #define HALFW_DEF1    0x004000B0       // Fixed
45 #define HALFW_DEF2    0x014000B0       // Fixed
46 
47 #define HALFW_IMM(n)   ( ((n)&0xf)  |  (((n)&0xf0)<<4) )        // Immediate
48 
49 
50 #define HALFW_DCD( cond, d, n, offset, sign, ldst, def ) \
51    dcd (def)|(cond)|(sign)|(ldst)|HALFW_RN(n)|HALFW_RD(d)|HALFW_IMM(offset)
52 
53 //---- ldrh Rn, [Rd], +#offset
54 #define LDRH_AD1( cond, d, n, offset ) \
55    HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_LOAD, HALFW_DEF1 )
56 
57 //---- ldrh Rn, [Rd, +#offset]
58 #define LDRH_AD2( cond, d, n, offset ) \
59    HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_LOAD, HALFW_DEF2 )
60 
61 //---- ldrh Rn, [Rd], -#offset
62 #define LDRH_AD3( cond, d, n, offset ) \
63    HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_LOAD, HALFW_DEF1 )
64 
65 //---- ldrh Rn, [Rd, -#offset]
66 #define LDRH_AD4( cond, d, n, offset ) \
67    HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_LOAD, HALFW_DEF2 )
68 
69 //---- strh Rn, [Rd], +#offset
70 #define STRH_AD1( cond, d, n, offset ) \
71    HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_STORE, HALFW_DEF1 )
72 
73 //---- strh Rn, [Rd, +#offset]
74 #define STRH_AD2( cond, d, n, offset ) \
75    HALFW_DCD( cond, d, n, offset, HALFW_OFF_PL, HALFW_STORE, HALFW_DEF2 )
76 
77 //---- strh Rn, [Rd], -#offset
78 #define STRH_AD3( cond, d, n, offset ) \
79    HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_STORE, HALFW_DEF1 )
80 
81 //---- strh Rn, [Rd, -#offset]
82 #define STRH_AD4( cond, d, n, offset ) \
83    HALFW_DCD( cond, d, n, offset, HALFW_OFF_MI, HALFW_STORE, HALFW_DEF2 )
84 
85 
86 
87 #include <nitro/code32.h>
88 //=======================================================================
89 //           MEMORY OPERATIONS
90 //=======================================================================
91 /*---------------------------------------------------------------------------*
92   Name:         MIi_CpuClear16
93 
94   Description:  Fills memory with specified data.
95                 16-bit version.
96 
97   Arguments:    data: Fill data
98                 destp: Destination address
99                 size: Size (bytes)
100 
101   Returns:      None.
102  *---------------------------------------------------------------------------*/
MIi_CpuClear16(register u16 data,register void * destp,register u32 size)103 asm void MIi_CpuClear16( register u16 data, register void* destp, register u32 size )
104 {
105         mov     r3, #0                  // n = 0
106 
107 @00:
108         cmp     r3, r2                  // n < size ?
109         strlth  r0, [r1, r3]            // *((vu16 *)(destp + n)) = data
110         addlt   r3, r3, #2              // n += 2
111         blt     @00
112 
113         bx      lr
114 }
115 
116 /*---------------------------------------------------------------------------*
117   Name:         MIi_CpuCopy16
118 
119   Description:  Copies memory by CPU.
120                 16-bit version
121 
122   Arguments:    srcp: Source address
123                 destp: Destination address
124                 size: Size (bytes)
125 
126   Returns:      None.
127  *---------------------------------------------------------------------------*/
MIi_CpuCopy16(register const void * srcp,register void * destp,register u32 size)128 asm void MIi_CpuCopy16( register const void *srcp, register void *destp, register u32 size )
129 {
130         mov     r12, #0                 // n = 0
131 
132 @10:
133         cmp     r12, r2                 // n < size ?
134 
135 #ifndef CW_BUG_FOR_LDRH_AND_STRH
136         ldrlth  r3, [r0, r12]           // *((vu16 *)(destp + n)) = *((vu16 *)(srcp + n))
137 #else
138         dcd     0xb19030bc
139 #endif
140 #ifndef CW_BUG_FOR_LDRH_AND_STRH
141         strlth  r3, [r1, r12]
142 #else
143         dcd     0xb18130bc
144 #endif
145         addlt   r12, r12, #2            // n += 2
146         blt     @10
147 
148         bx      lr
149 }
150 
151 /*---------------------------------------------------------------------------*
152   Name:         MIi_CpuSend16
153 
154   Description:  Sends u16 data to fixed address.
155                 16-bit version.
156 
157   Arguments:    src: Data stream to send
158                 dest: Destination address. Not incremented.
159                 size: Size (bytes)
160 
161   Returns:      None.
162  *---------------------------------------------------------------------------*/
MIi_CpuSend16(register const void * srcp,register volatile void * destp,register u32 size)163 asm void MIi_CpuSend16( register const void *srcp, register volatile void* destp, register u32 size )
164 {
165         mov     r12, #0                 // n = 0
166 
167 @11:
168         cmp     r12, r2                 // n < size ?
169 #ifndef CW_BUG_FOR_LDRH_AND_STRH
170         ldrlth  r3, [r0, r12]           // *((vu16 *)(destp + n)) = *((vu16 *)(srcp + n))
171 #else
172         dcd     0xb19030bc
173 #endif
174         strlth  r3, [r1, #0]
175         addlt   r12, r12, #2            // n += 2
176         blt     @11
177 
178         bx      lr
179 }
180 
181 /*---------------------------------------------------------------------------*
182   Name:         MIi_CpuRecv16
183 
184   Description:  Receives u16 data from fixed address.
185                 16-bit version.
186 
187   Arguments:    src: Source address. Not incremented.
188                 dest: Data buffer to receive
189                 size: Size (bytes)
190 
191   Returns:      None.
192  *---------------------------------------------------------------------------*/
MIi_CpuRecv16(register volatile const void * srcp,register void * destp,register u32 size)193 asm void MIi_CpuRecv16( register volatile const void *srcp, register void* destp, register u32 size )
194 {
195         mov     r12, #0                 // n = 0
196 
197 @12:
198         cmp     r12, r2                 // n < size ?
199         ldrlth  r3, [r0]                // *((vu16 *)(destp + n)) = *((vu16 *)(srcp + n))
200         strlth  r3, [r1, r12]
201         addlt   r12, r12, #2            // n += 2
202         blt     @12
203 
204         bx      lr
205 }
206 
207 /*---------------------------------------------------------------------------*
208   Name:         MIi_CpuPipe16
209 
210   Description:  Pipes data from fixed address to fixed address.
211                 16-bit version.
212 
213   Arguments:    src: Source address. Not incremented.
214                 dest: Destination address. Not incremented.
215                 size: Size (bytes)
216 
217   Returns:      None.
218  *---------------------------------------------------------------------------*/
MIi_CpuPipe16(register volatile const void * srcp,register volatile void * destp,register u32 size)219 asm void MIi_CpuPipe16( register volatile const void *srcp, register volatile void* destp, register u32 size )
220 {
221         mov     r12, #0                 // n = 0
222 
223 @13:
224         cmp     r12, r2                 // n < size ?
225         ldrlth  r3, [r0]                // *((vu32 *)(destp)) = *((vu32 *)(srcp))
226         strlth  r3, [r1]
227         addlt   r12, r12, #2            // n += 2
228         blt     @13
229 
230         bx      lr
231 }
232 
233 /*---------------------------------------------------------------------------*
234   Name:         MIi_CpuMove16
235 
236   Description:  Moves memory data (16-bit version).
237 
238   Arguments:    src:  Source address, must be in 2-byte alignment
239                 dest:  Destination address, must be in 2-byte alignment
240                 size:  Size (bytes), must be in 2-byte alignment
241 
242   Returns:      None.
243  *---------------------------------------------------------------------------*/
CpuCopy16Reverse(register const void * srcp,register void * destp,register u32 size)244 static asm void CpuCopy16Reverse( register const void *srcp, register void *destp, register u32 size )
245 {
246         mov     r12, r1                 // r12: destEndp = destp
247         add     r0, r0, r2              // r0:  srcp  += size
248         add     r1, r1, r2              // r1:  destp += size
249 
250 @14:
251         cmp     r12, r1                 // while (destEndp < destp)
252         ldrlth  r2, [r0, #-2]!          // *(--(vu32 *)(destp)) = *(--(vu32 *)(srcp))
253         strlth  r2, [r1, #-2]!
254         blt     @14
255 
256         bx      lr
257 }
258 
MIi_CpuMove16(const void * src,void * dest,u32 size)259 void MIi_CpuMove16(const void *src, void *dest, u32 size)
260 {
261     if( ( (u32)dest <= (u32)src )
262      || ( (u32)src + size <= (u32)dest ) )
263     {
264         MIi_CpuCopy16(src, dest, size);
265     }
266     else
267     {
268         CpuCopy16Reverse(src, dest, size);
269     }
270 }
271 
272 /*---------------------------------------------------------------------------*
273   Name:         MIi_CpuFind16
274 
275   Description:  Finds memory data (16-bit version).
276 
277   Arguments:    src:  Source address, must be in 2 byte alignment
278                 data:  Target data
279                 size:  Size (bytes), must be in 2 byte alignment
280 
281   Returns:      Pointer to found data or NULL.
282  *---------------------------------------------------------------------------*/
MIi_CpuFind16(const void * src,u16 data,u32 size)283 void* MIi_CpuFind16(const void *src, u16 data, u32 size)
284 {
285     const u16* p = src;
286     u32 i;
287 
288     for( i = 0; i < size; i += 2, ++p )
289     {
290         if( *p == data )
291         {
292             return (void*)p;
293         }
294     }
295 
296     return NULL;
297 }
298 
299 /*---------------------------------------------------------------------------*
300   Name:         MIi_CpuComp16
301 
302   Description:  Compares memory data (16-bit version).
303 
304   Arguments:    mem1:  Target address 1, must be in 2-byte alignment
305                 mem2:  Target address 2, must be in 2-byte alignment
306                 size:  Size (bytes), must be in 2-byte alignment
307 
308   Returns:      < 0: mem1 smaller than mem2.
309                 = 0: mem1 equals mem2.
310                 > 0: mem1 larger than mem2.
311  *---------------------------------------------------------------------------*/
MIi_CpuComp16(const void * mem1,const void * mem2,u32 size)312 int MIi_CpuComp16(const void *mem1, const void *mem2, u32 size)
313 {
314     const u16* p1 = mem1;
315     const u16* p2 = mem2;
316     const u16* p1end = (const u16*)( (const u8*)p1 + size );
317 
318     while( p1 < p1end )
319     {
320         int d = (int)*p1++ - (int)*p2++;
321 
322         if( d != 0 )
323         {
324             return d;
325         }
326     }
327 
328     return 0;
329 }
330 
331 
332 /*---------------------------------------------------------------------------*
333   Name:         MIi_CpuClear32
334 
335   Description:  Fills memory with specified data.
336                 32-bit version.
337 
338   Arguments:    data: Fill data
339                 destp: Destination address
340                 size: Size (bytes)
341 
342   Returns:      None.
343  *---------------------------------------------------------------------------*/
MIi_CpuClear32(register u32 data,register void * destp,register u32 size)344 asm void MIi_CpuClear32( register u32 data, register void *destp, register u32 size )
345 {
346         add     r12, r1, r2             // r12: destEndp = destp + size
347 
348 @20:
349         cmp     r1, r12                 // while (destp < destEndp)
350         stmltia r1!, {r0}               // *((vu32 *)(destp++)) = data
351         blt     @20
352         bx      lr
353 }
354 
355 /*---------------------------------------------------------------------------*
356   Name:         MIi_CpuCopy32
357 
358   Description:  Copies memory by CPU.
359                 32-bit version.
360 
361   Arguments:    srcp: Source address
362                 destp: Destination address
363                 size: size (bytes)
364 
365   Returns:      None.
366  *---------------------------------------------------------------------------*/
MIi_CpuCopy32(register const void * srcp,register void * destp,register u32 size)367 asm void MIi_CpuCopy32( register const void *srcp, register void *destp, register u32 size )
368 {
369         add     r12, r1, r2             // r12: destEndp = destp + size
370 
371 @30:
372         cmp     r1, r12                 // while (destp < destEndp)
373         ldmltia r0!, {r2}               // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
374         stmltia r1!, {r2}
375         blt     @30
376 
377         bx      lr
378 }
379 
380 /*---------------------------------------------------------------------------*
381   Name:         MIi_CpuSend32
382 
383   Description:  Sends u32 data to fixed address.
384                 32-bit version.
385 
386   Arguments:    src: Data stream to send
387                 dest: Destination address. Not incremented
388                 size: Size (bytes)
389 
390   Returns:      None.
391  *---------------------------------------------------------------------------*/
MIi_CpuSend32(register const void * srcp,register volatile void * destp,register u32 size)392 asm void MIi_CpuSend32( register const void *srcp, register volatile void *destp, register u32 size )
393 {
394         add     r12, r0, r2             // r12: srcEndp = srcp + size
395 
396 @31:
397         cmp     r0, r12                 // while (srcp < srcEndp)
398         ldmltia r0!, {r2}               // *((vu32 *)(destp)) = *((vu32 *)(srcp)++)
399         strlt   r2, [r1]
400         blt     @31
401 
402         bx      lr
403 }
404 
405 /*---------------------------------------------------------------------------*
406   Name:         MIi_CpuRecv32
407 
408   Description:  Receives u32 data from fixed address.
409                 32-bit version.
410 
411   Arguments:    src: Source address. Not incremented
412                 dest: Data buffer to receive
413                 size: Size (bytes)
414 
415   Returns:      None.
416  *---------------------------------------------------------------------------*/
MIi_CpuRecv32(volatile const void * srcp,register void * destp,register u32 size)417 asm void MIi_CpuRecv32( volatile const void *srcp, register void *destp, register u32 size )
418 {
419         add     r12, r1, r2             // r12: destEndp = destp + size
420 
421 @32:
422         cmp     r1, r12                 // while (dest < destEndp)
423         ldrlt   r2, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
424         stmltia r1!, {r2}
425         blt     @32
426 
427         bx      lr
428 }
429 
430 /*---------------------------------------------------------------------------*
431   Name:         MIi_CpuPipe32
432 
433   Description:  Pipes data from fixed address to fixed address.
434                 32-bit version.
435 
436   Arguments:    src: Source address. Not incremented
437                 dest: Destination address. Not incremented
438                 size: Size (bytes)
439 
440   Returns:      None.
441  *---------------------------------------------------------------------------*/
MIi_CpuPipe32(volatile const void * srcp,register volatile void * destp,register u32 size)442 asm void MIi_CpuPipe32( volatile const void *srcp, register volatile void *destp, register u32 size )
443 {
444         mov     r12, #0                 // n = 0
445 
446 @33:
447         cmp     r12, r2                 // n < size ?
448         ldrlt   r3, [r0]                // *((vu32 *)(destp)) = *((vu32 *)(srcp))
449         strlt   r3, [r1]
450         addlt   r12, r12, #4            // n += 4
451         blt     @33
452 
453         bx      lr
454 }
455 
456 /*---------------------------------------------------------------------------*
457   Name:         MIi_CpuMove32
458 
459   Description:  Moves memory data (32-bit version).
460 
461   Arguments:    src:  Source address, must be in 4-byte alignment
462                 dest:  Destination address, must be in 4-byte alignment
463                 size:  Size (bytes), must be in 4-byte alignment
464 
465   Returns:      None.
466  *---------------------------------------------------------------------------*/
CpuCopy32Reverse(register const void * srcp,register void * destp,register u32 size)467 static asm void CpuCopy32Reverse( register const void *srcp, register void *destp, register u32 size )
468 {
469         mov     r12, r1                 // r12: destEndp = destp
470         add     r0, r0, r2              // r0:  srcp  += size
471         add     r1, r1, r2              // r1:  destp += size
472 
473 @34:
474         cmp     r12, r1                 // while (destEndp < destp)
475         ldrlt   r2, [r0, #-4]!          // *(--(vu32 *)(destp)) = *(--(vu32 *)(srcp))
476         strlt   r2, [r1, #-4]!
477         blt     @34
478 
479         bx      lr
480 }
481 
MIi_CpuMove32(const void * src,void * dest,u32 size)482 void MIi_CpuMove32(const void *src, void *dest, u32 size)
483 {
484     if( ( (u32)dest <= (u32)src )
485      || ( (u32)src + size <= (u32)dest ) )
486     {
487         MIi_CpuCopy32(src, dest, size);
488     }
489     else
490     {
491         CpuCopy32Reverse(src, dest, size);
492     }
493 }
494 
495 /*---------------------------------------------------------------------------*
496   Name:         MIi_CpuFind32
497 
498   Description:  Finds memory data (32-bit version).
499 
500   Arguments:    src:  Source address, must be in 4-byte alignment
501                 data:  Target data
502                 size:  Size (bytes), must be in 4-byte alignment
503 
504   Returns:      Pointer to found data or NULL.
505  *---------------------------------------------------------------------------*/
MIi_CpuFind32(const void * src,u32 data,u32 size)506 void* MIi_CpuFind32(const void *src, u32 data, u32 size)
507 {
508     const u32* p = src;
509     u32 i;
510 
511     for( i = 0; i < size; i += 4, ++p )
512     {
513         if( *p == data )
514         {
515             return (void*)p;
516         }
517     }
518 
519     return NULL;
520 }
521 
522 /*---------------------------------------------------------------------------*
523   Name:         MIi_CpuComp32
524 
525   Description:  Compares memory data (32-bit version).
526 
527   Arguments:    mem1:  Target address 1, must be in 4-byte alignment
528                 mem2:  Target address 2, must be in 4-byte alignment
529                 size:  Size (bytes), must be in 4-byte alignment
530 
531   Returns:      < 0: mem1 smaller than mem2.
532                 = 0: mem1 equals mem2.
533                 > 0: mem1 larger than mem2.
534  *---------------------------------------------------------------------------*/
MIi_CpuComp32(const void * mem1,const void * mem2,u32 size)535 int MIi_CpuComp32(const void *mem1, const void *mem2, u32 size)
536 {
537     const u32* p1 = mem1;
538     const u32* p2 = mem2;
539     const u32* p1end = (const u32*)( (const u8*)p1 + size );
540 
541     for( ; p1 < p1end; ++p1, ++p2 )
542     {
543         const u32 v1 = *p1;
544         const u32 v2 = *p2;
545 
546         if( v1 != v2 )
547         {
548             return (v1 < v2) ? -1: 1;
549         }
550     }
551 
552     return 0;
553 }
554 
555 
556 /*---------------------------------------------------------------------------*
557   Name:         MIi_CpuClearFast
558 
559   Description:  Fills memory with specified data.
560                 High speed by writing 32 bytes at a time using stm.
561 
562   Arguments:    data: Fill data
563                 destp: Destination address
564                 size: Size (bytes)
565 
566   Returns:      None.
567  *---------------------------------------------------------------------------*/
MIi_CpuClearFast(register u32 data,register void * destp,register u32 size)568 asm void MIi_CpuClearFast( register u32 data, register void *destp, register u32 size )
569 {
570         stmfd   sp!, {r4-r9}
571 
572         add     r9, r1, r2              // r9:  destEndp = destp + size
573         mov     r12, r2, lsr #5         // r12: destBlockEndp = destp + size/32*32
574         add     r12, r1, r12, lsl #5
575 
576         mov     r2, r0
577         mov     r3, r2
578         mov     r4, r2
579         mov     r5, r2
580         mov     r6, r2
581         mov     r7, r2
582         mov     r8, r2
583 
584 @40:
585         cmp     r1, r12                 // while (destp < destBlockEndp)
586         stmltia r1!, {r0, r2-r8}        // *((vu32 *)(destp++)) = data
587         blt     @40
588 @41:
589         cmp     r1, r9                  // while (destp < destEndp)
590         stmltia r1!, {r0}               // *((vu32 *)(destp++)) = data
591         blt     @41
592 
593         ldmfd   sp!, {r4-r9}
594         bx      lr
595 }
596 
597 /*---------------------------------------------------------------------------*
598   Name:         MIi_CpuCopyFast
599 
600   Description:  Copies memory by CPU.
601                 High speed by loading/writing 32byte at a time using stm/ldm.
602 
603   Arguments:    srcp: Source address
604                 destp: Destination address
605                 size: Size (bytes)
606 
607   Returns:      None.
608  *---------------------------------------------------------------------------*/
MIi_CpuCopyFast(register const void * srcp,register void * destp,register u32 size)609 asm void MIi_CpuCopyFast( register const void *srcp, register void *destp, register u32 size )
610 {
611         stmfd   sp!, {r4-r10}
612 
613         add     r10, r1, r2             // r10:  destEndp = destp + size
614         mov     r12, r2, lsr #5         // r12: destBlockEndp = destp + size/32*32
615         add     r12, r1, r12, lsl #5
616 
617 @50:
618         cmp     r1, r12                 // while (destp < destBlockEndp)
619         ldmltia r0!, {r2-r9}            // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
620         stmltia r1!, {r2-r9}
621         blt     @50
622 @51:
623         cmp     r1, r10                 // while (destp < destEndp)
624         ldmltia r0!, {r2}               // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
625         stmltia r1!, {r2}
626         blt     @51
627 
628         ldmfd   sp!, {r4-r10}
629         bx      lr
630 }
631 
632 /*---------------------------------------------------------------------------*
633   Name:         MIi_CpuSendFast
634 
635   Description:  Moves memory data (32 byte version).
636                 High speed by loading 32 bytes at a time using ldm.
637 
638   Arguments:    src:  Data stream to send
639                 dest:  Destination address, not incremented
640                 size:  Size (bytes)
641 
642   Returns:      None.
643  *---------------------------------------------------------------------------*/
MIi_CpuSendFast(register const void * srcp,register volatile void * destp,register u32 size)644 asm void MIi_CpuSendFast( register const void *srcp, register volatile void *destp, register u32 size )
645 {
646         stmfd   sp!, {r4-r10}
647 
648         add     r10, r0, r2             // r10:  destEndp = destp + size
649         mov     r12, r2, lsr #5         // r12: destBlockEndp = destp + size/32*32
650         add     r12, r0, r12, lsl #5
651 
652 @50:
653         cmp     r0, r12                 // while (destp < destBlockEndp)
654         ldmltia r0!, {r2-r9}            // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
655         strlt   r2, [r1]
656         strlt   r3, [r1]
657         strlt   r4, [r1]
658         strlt   r5, [r1]
659         strlt   r6, [r1]
660         strlt   r7, [r1]
661         strlt   r8, [r1]
662         strlt   r9, [r1]
663         blt     @50
664 @51:
665         cmp     r0, r10                 // while (destp < destEndp)
666         ldmltia r0!, {r2}               // *((vu32 *)(destp)++) = *((vu32 *)(srcp)++)
667         strlt   r2, [r1]
668         blt     @51
669 
670         ldmfd   sp!, {r4-r10}
671         bx      lr
672 }
673 
674 /*---------------------------------------------------------------------------*
675   Name:         MIi_CpuRecvFast
676 
677   Description:  Moves memory data (32-byte version).
678                 High speed by writing 32 bytes at a time using stm.
679 
680   Arguments:    src:  Source address. not incremented
681                 dest:  Data buffer to receive
682                 size:  Size (bytes)
683 
684   Returns:      None.
685  *---------------------------------------------------------------------------*/
MIi_CpuRecvFast(volatile const void * srcp,register void * destp,register u32 size)686 asm void MIi_CpuRecvFast(volatile const void *srcp, register void *destp, register u32 size)
687 {
688         stmfd   sp!, {r4-r10}
689 
690         add     r10, r1, r2             // r10:  destEndp = destp + size
691         mov     r12, r2, lsr #5         // r12: destBlockEndp = destp + size/32*32
692         add     r12, r1, r12, lsl #5
693 
694 @50:
695         cmp     r1, r12                 // while (destp < destBlockEndp)
696         ldrlt   r2, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
697         ldrlt   r3, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
698         ldrlt   r4, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
699         ldrlt   r5, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
700         ldrlt   r6, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
701         ldrlt   r7, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
702         ldrlt   r8, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
703         ldrlt   r9, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
704         stmltia r1!, {r2-r9}
705         blt     @50
706 @51:
707         cmp     r1, r10                 // while (destp < destEndp)
708         ldrlt   r2, [r0]                // *((vu32 *)(destp)++) = *((vu32 *)(srcp))
709         stmltia r1!, {r2}
710         blt     @51
711 
712         ldmfd   sp!, {r4-r10}
713         bx      lr
714 }
715 
716 /*---------------------------------------------------------------------------*
717   Name:         MIi_CpuMoveFast
718 
719   Description:  Moves memory data (32-byte version).
720 
721   Arguments:    src:  Source address, must be in 4-byte alignment
722                 dest:  Destination address, must be in 4-byte alignment
723                 size:  Size (byte), must be in 4-byte alignment
724 
725   Returns:      None.
726  *---------------------------------------------------------------------------*/
CpuCopyFastReverse(register const void * srcp,register void * destp,register u32 size)727 static asm void CpuCopyFastReverse( register const void *srcp, register void *destp, register u32 size )
728 {
729         stmfd   sp!, {r4-r10}
730 
731         mov     r10, r1                 // r10: destEndp = destp
732         mov     r12, r2, lsr #5         // r12: destBlockBeginp = destp + (size/32)*32
733         add     r12, r1, r12, lsl #5
734         add     r0, r0, r2              // r0:  srcp  += size
735         add     r1, r1, r2              // r1:  destp += size
736 
737 @52:
738         cmp     r12, r1                 // while( destBlockBeginP < destp )
739         ldrlt   r2, [r0, #-4]!          // *(--(vu32 *)(destp)) = *(--(vu32 *)(srcp))
740         strlt   r2, [r1, #-4]!
741         blt     @52
742 @53:
743         cmp     r10, r1                 // while (destEndp < destp)
744         ldmltdb r0!, {r2-r9}
745         stmltdb r1!, {r2-r9}
746         blt     @53
747 
748         ldmfd   sp!, {r4-r10}
749         bx      lr
750 }
751 
MIi_CpuMoveFast(const void * src,void * dest,u32 size)752 void MIi_CpuMoveFast(const void *src, void *dest, u32 size)
753 {
754     if( ( (u32)dest <= (u32)src )
755      || ( (u32)src + size <= (u32)dest ) )
756     {
757         MIi_CpuCopyFast(src, dest, size);
758     }
759     else
760     {
761         CpuCopyFastReverse(src, dest, size);
762     }
763 }
764 
765 //=======================================================================
766 //           FOR CONVENIENCE (memory copy)
767 //=======================================================================
768 /*---------------------------------------------------------------------------*
769   Name:         MI_Copy16B
770 
771   Description:  Copies 16-byte data by CPU.
772 
773   Arguments:    srcp: Source address
774                 destp: Destination address
775 
776   Returns:      None.
777  *---------------------------------------------------------------------------*/
MI_Copy16B(register const void * pSrc,register void * pDest)778 asm void MI_Copy16B(register const void* pSrc, register void* pDest)
779 {
780     ldmia   r0!, {r2, r3, r12}         // r0-r3, r12 need not saved
781     stmia   r1!, {r2, r3, r12}
782     ldmia   r0!, {r2}
783     stmia   r1!, {r2}
784 
785     bx      lr
786 }
787 
788 /*---------------------------------------------------------------------------*
789   Name:         MI_Copy32B
790 
791   Description:  Copies 32-byte data by CPU.
792 
793   Arguments:    srcp: Source address
794                 destp: Destination address
795 
796   Returns:      None.
797  *---------------------------------------------------------------------------*/
MI_Copy32B(register const void * pSrc,register void * pDest)798 asm void MI_Copy32B(register const void* pSrc, register void* pDest)
799 {
800     ldmia   r0!, {r2, r3, r12}         // r0-r3, r12 need not saved
801     stmia   r1!, {r2, r3, r12}
802     ldmia   r0!, {r2, r3, r12}
803     stmia   r1!, {r2, r3, r12}
804     ldmia   r0!, {r2, r3}
805     stmia   r1!, {r2, r3}
806 
807     bx      lr
808 }
809 
810 /*---------------------------------------------------------------------------*
811   Name:         MI_Copy36B
812 
813   Description:  Copies 36-byte data by CPU.
814 
815   Arguments:    srcp: Source address
816                 destp: Destination address
817 
818   Returns:      None.
819  *---------------------------------------------------------------------------*/
MI_Copy36B(register const void * pSrc,register void * pDest)820 asm void MI_Copy36B(register const void* pSrc, register void* pDest)
821 {
822     ldmia   r0!, {r2, r3, r12}         // r0-r3, r12 need not saved
823     stmia   r1!, {r2, r3, r12}
824     ldmia   r0!, {r2, r3, r12}
825     stmia   r1!, {r2, r3, r12}
826     ldmia   r0!, {r2, r3, r12}
827     stmia   r1!, {r2, r3, r12}
828 
829     bx      lr
830 }
831 
832 /*---------------------------------------------------------------------------*
833   Name:         MI_Copy48B
834 
835   Description:  Copies 48-byte data by CPU.
836 
837   Arguments:    srcp: Source address
838                 destp: Destination address
839 
840   Returns:      None.
841  *---------------------------------------------------------------------------*/
MI_Copy48B(register const void * pSrc,register void * pDest)842 asm void MI_Copy48B(register const void* pSrc, register void* pDest)
843 {
844     ldmia   r0!, {r2, r3, r12}         // r0-r3, r12 need not saved
845     stmia   r1!, {r2, r3, r12}
846     ldmia   r0!, {r2, r3, r12}
847     stmia   r1!, {r2, r3, r12}
848     ldmia   r0!, {r2, r3, r12}
849     stmia   r1!, {r2, r3, r12}
850     ldmia   r0!, {r2, r3, r12}
851     stmia   r1!, {r2, r3, r12}
852 
853     bx      lr
854 }
855 
856 /*---------------------------------------------------------------------------*
857   Name:         MI_Copy64B
858 
859   Description:  Copies 64-byte data by CPU.
860 
861   Arguments:    srcp: Source address
862                 destp: Destination address
863 
864   Returns:      None.
865  *---------------------------------------------------------------------------*/
MI_Copy64B(register const void * pSrc,register void * pDest)866 asm void MI_Copy64B(register const void* pSrc, register void* pDest)
867 {
868     ldmia   r0!, {r2, r3, r12}         // r0-r3, r12 need not saved
869     stmia   r1!, {r2, r3, r12}
870     ldmia   r0!, {r2, r3, r12}
871     stmia   r1!, {r2, r3, r12}
872     ldmia   r0!, {r2, r3, r12}
873     stmia   r1!, {r2, r3, r12}
874     ldmia   r0!, {r2, r3, r12}
875     stmia   r1!, {r2, r3, r12}
876     ldmia   r0,  {r0, r2, r3, r12}
877     stmia   r1!, {r0, r2, r3, r12}
878 
879     bx      lr
880 }
881 
882 /*---------------------------------------------------------------------------*
883   Name:         MI_Copy128B
884 
885   Description:  Copies 128-byte data by CPU.
886 
887   Arguments:    srcp: Source address
888                 destp: Destination address
889 
890   Returns:      None.
891  *---------------------------------------------------------------------------*/
MI_Copy128B(register const void * pSrc,register void * pDest)892 asm void MI_Copy128B(register const void* pSrc, register void* pDest)
893 {
894     stmfd   sp!, {r4}
895 
896     ldmia   r0!, {r2, r3, r4, r12}         // r0-r3, r12 need not saved
897     stmia   r1!, {r2, r3, r4, r12}
898     ldmia   r0!, {r2, r3, r4, r12}
899     stmia   r1!, {r2, r3, r4, r12}
900     ldmia   r0!, {r2, r3, r4, r12}
901     stmia   r1!, {r2, r3, r4, r12}
902     ldmia   r0!, {r2, r3, r4, r12}
903     stmia   r1!, {r2, r3, r4, r12}
904     ldmia   r0!, {r2, r3, r4, r12}
905     stmia   r1!, {r2, r3, r4, r12}
906     ldmia   r0!, {r2, r3, r4, r12}
907     stmia   r1!, {r2, r3, r4, r12}
908     ldmia   r0!, {r2, r3, r4, r12}
909     stmia   r1!, {r2, r3, r4, r12}
910     ldmia   r0!, {r2, r3, r4, r12}
911     stmia   r1!, {r2, r3, r4, r12}
912 
913     ldmfd   sp!, {r4}
914     bx      lr
915 }
916 
917 //=======================================================================
918 //           FOR SDK USE (needless set alignment)
919 //=======================================================================
920 /*---------------------------------------------------------------------------*
921   Name:         MI_CpuFill8
922 
923   Description:  Fills memory with specified data.
924                 Consider for alignment automatically.
925 
926   Arguments:    dstp: destination address
927                 data: fill data
928                 size: size (bytes)
929 
930   Returns:      None.
931  *---------------------------------------------------------------------------*/
932 #ifdef SDK_SMALL_BUILD
MI_CpuFill8(register void * dstp,register u8 data,register u32 size)933 asm void MI_CpuFill8( register void *dstp, register u8 data, register u32 size )
934 {
935         mov     r12, #0                 // n = 0
936 @1:
937         cmp     r12, r2                 // n < size ?
938         strltb  r1, [r0, r12]           // *((u8*)( dstp + n ) ) = data
939 
940         addlt   r12, r12, #1            // n ++
941         blt     @1
942 
943         bx      lr
944 }
945 #else  //ifdef SDK_SMALL_BUILD
MI_CpuFill8(register void * dstp,register u8 data,register u32 size)946 asm void MI_CpuFill8( register void *dstp, register u8 data, register u32 size )
947 {
948     cmp     r2, #0
949     bxeq    lr
950 
951     // 16-bit alignment of dstp
952     tst     r0, #1
953     beq     @_1
954 #ifndef CW_BUG_FOR_LDRH_AND_STRH
955     ldrh    r12, [r0, #-1]
956 #else
957     LDRH_AD4( HALFW_CONDAL, 12, 0, 1 ) // *** For CW BUG
958 #endif
959     and     r12, r12, #0x00FF
960     orr     r3, r12, r1, lsl #8
961 #ifndef CW_BUG_FOR_LDRH_AND_STRH
962     strh    r3, [r0, #-1]
963 #else
964     STRH_AD4( HALFW_CONDAL, 3, 0, 1 ) // *** For CW BUG
965 #endif
966     add     r0, r0, #1
967     subs    r2, r2, #1
968     bxeq    lr
969 @_1:
970 
971     // 32-bit alignment
972     cmp     r2, #2
973     bcc     @_6
974     orr     r1, r1, r1, lsl #8
975     tst     r0, #2
976     beq     @_8
977 #ifndef CW_BUG_FOR_LDRH_AND_STRH
978     strh    r1, [r0], #2
979 #else
980     STRH_AD1( HALFW_CONDAL, 1, 0, 2 ) // *** For CW BUG
981 #endif
982     subs    r2, r2, #2
983     bxeq    lr
984 @_8:
985     // 32-bit transfer
986     orr     r1, r1, r1, lsl #16
987     bics    r3, r2, #3
988     beq     @_10
989     sub     r2, r2, r3
990     add     r12, r3, r0
991 @_9:
992     str     r1, [r0], #4
993     cmp     r0, r12
994     bcc     @_9
995 
996 @_10:
997     //  Last 16-bit transfer
998     tst     r2, #2
999 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1000     strneh  r1, [r0], #2
1001 #else
1002     STRH_AD1( HALFW_CONDNE, 1, 0, 2 ) // *** For CW BUG
1003 #endif
1004 
1005 @_6:
1006     //  Last 8-bit transfer
1007     tst     r2, #1
1008     bxeq    lr
1009 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1010     ldrh    r3, [r0]
1011 #else
1012     LDRH_AD2( HALFW_CONDAL, 3, 0, 0 ) // *** For CW BUG
1013 #endif
1014     and     r3, r3, #0xFF00
1015     and     r1, r1, #0x00FF
1016     orr     r1, r1, r3
1017 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1018     strh    r1, [r0]
1019 #else
1020     STRH_AD2( HALFW_CONDAL, 1, 0, 0 ) // *** For CW BUG
1021 #endif
1022     bx      lr
1023 }
1024 
MI_CpuFill(register void * dstp,register u8 data,register u32 size)1025 asm void MI_CpuFill( register void *dstp, register u8 data, register u32 size )
1026 {
1027     cmp     r2, #0
1028     bxeq    lr
1029 
1030     /* If less than 8 bytes, process directly here */
1031     cmp     r2, #8
1032     bgt	    _fill_and_align
1033 
1034 _fill1_less_than_equal_8:
1035     rsb     r3, r2, #0x8
1036     add     pc, pc, r3, lsl #2
1037     nop
1038     strb    r1, [r0], #1
1039     strb    r1, [r0], #1
1040     strb    r1, [r0], #1
1041     strb    r1, [r0], #1
1042     strb    r1, [r0], #1
1043     strb    r1, [r0], #1
1044     strb    r1, [r0], #1
1045     strb    r1, [r0], #1
1046     bx      lr
1047 
1048 _fill_and_align:
1049     /* If more than 8 bytes, process awareness of alignment */
1050 
1051     /* Fill the register with 4 bytes */
1052     orr     r1, r1, r1, lsl #8
1053     orr     r1, r1, r1, lsl #16
1054 
1055     /* Process the fractions at the leading end of dst first */
1056     tst     r0, #1
1057     subne   r2, r2, #1
1058     strneb  r1, [r0], #1
1059 
1060     tst     r0, #2
1061     subne   r2, r2, #2
1062     strneh  r1, [r0], #2
1063 
1064     tst     r0, #4
1065     subne   r2, r2, #4
1066     strne   r1, [r0], #4
1067 
1068 _fill32:
1069     cmp     r2, #32
1070     blt	    _fill4
1071 
1072 _fill32_pre:
1073     stmfd   sp!, {r4-r10}
1074     mov     r4, r1
1075     mov     r5, r1
1076     mov     r6, r1
1077     mov     r7, r1
1078     mov     r8, r1
1079     mov     r9, r1
1080     mov     r10, r1
1081     subs    r2, r2, #32
1082 
1083 _fill32_loop:
1084     stmgeia r0!, {r1,r4-r10}
1085     subges  r2, r2, #32
1086     bge     _fill32_loop
1087     add     r2, r2, #32
1088 
1089 _fill32_post:
1090     ldmfd   sp!, {r4-r10}
1091 
1092 _fill4:
1093     cmp     r2, #4
1094     blt     _fill1_less_than_4
1095     subs    r2, r2, #4
1096 
1097 _fill4_loop:
1098     strge   r1, [r0], #4
1099     subs    r2, r2, #4
1100     bge     _fill4_loop
1101     add     r2, r2, #4
1102 
1103 _fill1_less_than_4:
1104     subs    r2, r2, #1
1105     strgeb  r1, [r0], #1
1106     subges  r2, r2, #1
1107     strgeb  r1, [r0], #1
1108     subges  r2, r2, #1
1109     strgeb  r1, [r0], #1
1110 
1111     bx      lr
1112 }
1113 
1114 #endif // ifdef SDK_SMALL_BUILD
1115 
1116 /*---------------------------------------------------------------------------*
1117   Name:         MI_CpuCopy8
1118 
1119   Description:  Copies memory by CPU.
1120                 Consider for alignment automatically.
1121 
1122   Arguments:    srcp: Source address
1123                 dstp: Destination address
1124                 size: Size (bytes)
1125 
1126   Returns:      None.
1127  *---------------------------------------------------------------------------*/
1128 #ifdef SDK_SMALL_BUILD
MI_CpuCopy8(register const void * srcp,register void * dstp,register u32 size)1129 asm void MI_CpuCopy8( register const void *srcp, register void *dstp, register u32 size )
1130 {
1131         mov     r12, #0                 // n = 0
1132 @1:
1133         cmp     r12, r2                 // n < size ?
1134         ldrltb  r3, [r0, r12]           // *((vu8 *)(destp + p)) = *((vu8 *)(srcp + n))
1135         strltb  r3, [r1, r12]
1136 
1137         addlt   r12, r12, #1            // n ++
1138         blt     @1
1139 
1140         bx      lr
1141 }
1142 
1143 #else  //ifdef SDK_SMALL_BUILD
MI_CpuCopy8(register const void * srcp,register void * dstp,register u32 size)1144 asm void MI_CpuCopy8( register const void *srcp, register void *dstp, register u32 size )
1145 {
1146     cmp     r2, #0
1147     bxeq    lr
1148 
1149     // 16-bit alignment of dstp
1150     tst     r1, #1
1151     beq     @_1
1152 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1153     ldrh    r12, [r1, #-1]
1154 #else
1155     LDRH_AD4( HALFW_CONDAL, 12, 1, 1 ) // *** For CW BUG
1156 #endif
1157     and     r12, r12, #0x00FF
1158     tst     r0, #1
1159 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1160     ldrneh  r3, [r0, #-1]
1161 #else
1162     LDRH_AD4( HALFW_CONDNE, 3, 0, 1 ) // *** For CW BUG
1163 #endif
1164     movne   r3, r3, lsr #8
1165 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1166     ldreqh  r3, [r0]
1167 #else
1168     LDRH_AD2( HALFW_CONDEQ, 3, 0, 0 ) // *** For CW BUG
1169 #endif
1170     orr     r3, r12, r3, lsl #8
1171 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1172     strh    r3, [r1, #-1]
1173 #else
1174     STRH_AD4( HALFW_CONDAL, 3, 1, 1 ) // *** For CW BUG
1175 #endif
1176     add     r0, r0, #1
1177     add     r1, r1, #1
1178     subs    r2, r2, #1
1179     bxeq    lr
1180 @_1:
1181 
1182     // Check the 16- or 32-bit synchronization of the address fraction
1183     eor     r12, r1, r0
1184     tst     r12, #1
1185     beq     @_2
1186 
1187     // Doesn't synchronize at all, so use irregular 16-bit transfer
1188     //  tmp = *(u16*)src++ >> 8;
1189     //  while((size -= 2) >= 0) {
1190     //      tmp |= (*(u16*)src++ << 8);
1191     //      *(u16*)dst++ = (u16)tmp;
1192     //      tmp >>= 16;
1193     //  }
1194     bic     r0, r0, #1
1195 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1196     ldrh    r12, [r0], #2
1197 #else
1198         LDRH_AD1( HALFW_CONDAL, 12, 0, 2 ) // *** For CW BUG
1199 #endif
1200     mov     r3, r12, lsr #8
1201     subs    r2, r2, #2
1202     bcc     @_3
1203 @_4:
1204 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1205     ldrh    r12, [r0], #2
1206 #else
1207     LDRH_AD1( HALFW_CONDAL, 12, 0, 2 ) // *** For CW BUG
1208 #endif
1209     orr     r12, r3, r12, lsl #8
1210 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1211     strh    r12, [r1], #2
1212 #else
1213     STRH_AD1( HALFW_CONDAL, 12, 1, 2 ) // *** For CW BUG
1214 #endif
1215     mov     r3, r12, lsr #16
1216     subs    r2, r2, #2
1217     bcs     @_4
1218 
1219 @_3:
1220     //  if(size & 1)
1221     //      *dst = (u16)((*dst & 0xFF00) | tmp);
1222     //  return;
1223     tst     r2, #1
1224     bxeq    lr
1225 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1226     ldrh    r12, [r1]
1227 #else
1228     LDRH_AD2( HALFW_CONDAL, 12, 1, 0 ) // *** For CW BUG
1229 #endif
1230     and     r12, r12, #0xFF00
1231     orr     r12, r12, r3
1232 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1233     strh    r12, [r1]
1234 #else
1235     STRH_AD2( HALFW_CONDAL, 12, 1, 0 ) // *** For CW BUG
1236 #endif
1237     bx      lr
1238 
1239 @_2:
1240     tst     r12, #2
1241     beq     @_5
1242     // 16-bit transfer
1243     bics    r3, r2, #1
1244     beq     @_6
1245     sub     r2, r2, r3
1246     add     r12, r3, r1
1247 @_7:
1248 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1249     ldrh    r3, [r0], #2
1250 #else
1251     LDRH_AD1( HALFW_CONDAL, 3, 0, 2 ) // *** For CW BUG
1252 #endif
1253 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1254     strh    r3, [r1], #2
1255 #else
1256     STRH_AD1( HALFW_CONDAL, 3, 1, 2 ) // *** For CW BUG
1257 #endif
1258     cmp     r1, r12
1259     bcc     @_7
1260     b       @_6
1261 
1262 @_5:
1263     // 32-bit alignment
1264     cmp     r2, #2
1265     bcc     @_6
1266     tst     r1, #2
1267     beq     @_8
1268 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1269     ldrh    r3, [r0], #2
1270 #else
1271     LDRH_AD1( HALFW_CONDAL, 3, 0, 2 ) // *** For CW BUG
1272 #endif
1273 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1274     strh    r3, [r1], #2
1275 #else
1276         STRH_AD1( HALFW_CONDAL, 3, 1, 2 ) // *** For CW BUG
1277 #endif
1278     subs    r2, r2, #2
1279     bxeq    lr
1280 @_8:
1281     // 32-bit transfer
1282     bics    r3, r2, #3
1283     beq     @_10
1284     sub     r2, r2, r3
1285     add     r12, r3, r1
1286 @_9:
1287     ldr     r3, [r0], #4
1288     str     r3, [r1], #4
1289     cmp     r1, r12
1290     bcc     @_9
1291 
1292 @_10:
1293     //  Last 16-bit transfer
1294     tst     r2, #2
1295 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1296     ldrneh  r3, [r0], #2
1297     strneh  r3, [r1], #2
1298 #else
1299     LDRH_AD1( HALFW_CONDNE, 3, 0, 2 ) // *** For CW BUG
1300     STRH_AD1( HALFW_CONDNE, 3, 1, 2 ) // *** For CW BUG
1301 #endif
1302 
1303 @_6:
1304     //  Last 8-bit transfer
1305     tst     r2, #1
1306     bxeq    lr
1307 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1308     ldrh    r2, [r1]
1309     ldrh    r0, [r0]
1310 #else
1311     LDRH_AD2( HALFW_CONDAL, 2, 1, 0 ) // *** For CW BUG
1312     LDRH_AD2( HALFW_CONDAL, 0, 0, 0 ) // *** For CW BUG
1313 #endif
1314     and     r2, r2, #0xFF00
1315     and     r0, r0, #0x00FF
1316     orr     r0, r2, r0
1317 #ifndef CW_BUG_FOR_LDRH_AND_STRH
1318     strh    r0, [r1]
1319 #else
1320     STRH_AD2( HALFW_CONDAL, 0, 1, 0 ) // *** For CW BUG
1321 #endif
1322     bx      lr
1323 }
1324 #endif //ifdef SDK_SMALL_BUILD
1325 
1326 
1327 
1328 /*---------------------------------------------------------------------------*
1329   Name:         MI_CpuFind8
1330 
1331   Description:  Finds memory data (8-bit version).
1332 
1333   Arguments:    src:  Source address, no limitation for alignment
1334                 data:  Target data
1335                 size:  Size (byte), no limitation for alignment
1336 
1337   Returns:      Pointer to found data or NULL.
1338  *---------------------------------------------------------------------------*/
MI_CpuFind8(const void * src,u8 data,u32 size)1339 void*   MI_CpuFind8(const void *src, u8 data, u32 size)
1340 {
1341     const u8* p8 = (const u8*)src;
1342 
1343     if( size == 0 )
1344     {
1345         return NULL;
1346     }
1347 
1348     // If the address is not 2-byte aligned
1349     // Check only 1 byte and align in 2 bytes
1350     if( ((u32)p8 & 0x1) != 0 )
1351     {
1352         const u16 v = *(u16*)(p8 - 1);
1353 
1354         if( (v >> 8) == data )
1355         {
1356             return (void*)p8;
1357         }
1358 
1359         size--;
1360         p8++;
1361     }
1362 
1363     // Check in 2-byte units
1364     {
1365         const u16* p16 = (const u16*)p8;
1366         const u16* p16end = p16 + MATH_ROUNDDOWN(size, 2);
1367 
1368         for( ;  p16 < p16end; ++p16 )
1369         {
1370             const u16 v = *p16;
1371 
1372             if( (v & 0xFF) == data )
1373             {
1374                 return (void*)( (u8*)p16 + 0 );
1375             }
1376             if( (v >> 8) == data )
1377             {
1378                 return (void*)( (u8*)p16 + 1 );
1379             }
1380         }
1381     }
1382 
1383     // At this point the size is an odd number
1384     // Check remaining 1 byte
1385     if( (size & 0x1) != 0 )
1386     {
1387         const u16 v = *(u16*)(p8 + size - 1);
1388 
1389         if( (v & 0xFF) == data )
1390         {
1391             return (void*)(p8 + size - 1);
1392         }
1393     }
1394 
1395     return NULL;
1396 }
1397 
1398 /*---------------------------------------------------------------------------*
1399   Name:         MI_CpuComp8
1400 
1401   Description:  Compares memory data (8-bit version).
1402 
1403   Arguments:    mem1:  Target address 1, no limitation for alignment
1404                 mem2:  Target address 2, no limitation for alignment
1405                 size:  Size (byte), no limitation for alignment
1406 
1407   Returns:      < 0: mem1 smaller than mem2.
1408                 = 0: mem1 equals mem2.
1409                 > 0: mem1 larger than mem2.
1410  *---------------------------------------------------------------------------*/
MI_CpuComp8(const void * mem1,const void * mem2,u32 size)1411 int     MI_CpuComp8(const void *mem1, const void *mem2, u32 size)
1412 {
1413     const u8* p1 = mem1;
1414     const u8* p2 = mem2;
1415     const u8* p1end = (const u8*)( (const u8*)p1 + size );
1416 
1417     while( p1 < p1end )
1418     {
1419         const int d = (int)*p1++ - (int)*p2++;
1420 
1421         if( d != 0 )
1422         {
1423             return d;
1424         }
1425     }
1426 
1427     return 0;
1428 }
1429 
1430 /*---------------------------------------------------------------------------*
1431   Name:         MI_CpuCopy
1432 
1433   Description:  Copies memory by CPU.
1434                 Byte access/ldm-stm version.
1435 
1436   Arguments:    srcp: Source address
1437                 destp: Destination address.
1438                 size: Size (bytes)
1439 
1440   Returns:      None.
1441  *---------------------------------------------------------------------------*/
1442 #if PLATFORM_BYTES_ENDIAN == PLATFORM_ENDIAN_LITTLE
1443 /* Little-endian */
1444 #define FORWARD_(n)         lsl #((n) * 8)
1445 #define BACKWARD_(n)        lsr #(32 - (n) * 8)
1446 #define FORWARD_MASK(n)     #((1 << ((n) * 8)) - 1)
1447 #elif PLATFORM_BYTES_ENDIAN == PLATFORM_ENDIAN_BIG
1448 /* Big-endian */
1449 #define FORWARD_(n)         lsr #((n) * 8)
1450 #define BACKWARD_(n)        lsl #(32 - (n) * 8)
1451 #define FORWARD_MASK(n)     #((1 << ((n) * 8)) - 1)
1452 #else
1453 #error
1454 #endif
1455 
MI_CpuCopy(register const void * srcp,register void * destp,register u32 size)1456 asm void MI_CpuCopy( register const void *srcp, register void *destp, register u32 size )
1457 {
1458     /* Sequentially transfer from the front */
1459 
1460     /* If less than 8 bytes, process directly here */
1461     cmp     r2, #8
1462     bgt	    _forward_blt
1463     rsb     r3, r2, #0x8
1464     add     pc, pc, r3, lsl #3
1465     nop
1466     ldrb    r3, [r0], #1
1467     strb    r3, [r1], #1
1468     ldrb    r3, [r0], #1
1469     strb    r3, [r1], #1
1470     ldrb    r3, [r0], #1
1471     strb    r3, [r1], #1
1472     ldrb    r3, [r0], #1
1473     strb    r3, [r1], #1
1474     ldrb    r3, [r0], #1
1475     strb    r3, [r1], #1
1476     ldrb    r3, [r0], #1
1477     strb    r3, [r1], #1
1478     ldrb    r3, [r0], #1
1479     strb    r3, [r1], #1
1480     ldrb    r3, [r0], #1
1481     strb    r3, [r1], #1
1482     bx      lr
1483 
1484 _forward_blt:
1485     /* If more than 8 bytes, process awareness of alignment */
1486 
1487     /* Process the fractions at the leading end of src first */
1488     tst     r0, #1
1489     subne   r2, r2, #1
1490     ldrneb  r3, [r0], #1
1491     strneb  r3, [r1], #1
1492     tst     r0, #2
1493     subne   r2, r2, #2
1494     ldrneb  r3, [r0], #1
1495     strneb  r3, [r1], #1
1496     ldrneb  r3, [r0], #1
1497     strneb  r3, [r1], #1
1498 
1499     /* Branch processing according to the phases of src and dst */
1500     and     r3, r1, #3
1501     bic     r1, r1, #3
1502     cmp     r3, #0
1503     beq     _forward_blt_0
1504     cmp     r3, #1
1505     beq     _forward_blt_1
1506     cmp     r3, #2
1507     beq     _forward_blt_2
1508     b       _forward_blt_3
1509 
1510 _forward_blt_0:
1511     /* Processing when (dst & 3 == 0) */
1512     stmfd   sp!, {r4-r10,lr}
1513     subs    r2, r2, #32
1514 _forward_blt_0_32:
1515     ldmgeia r0!, {r4-r10,lr}
1516     stmgeia r1!, {r4-r10,lr}
1517     subges  r2, r2, #32
1518     bge     _forward_blt_0_32
1519     add     r2, r2, #32
1520     ldmfd   sp!, {r4-r10,lr}
1521 
1522     subs    r2, r2, #4
1523 _forward_blt_0_4:
1524     ldrge   r3, [r0], #4
1525     strge   r3, [r1], #4
1526     subges  r2, r2, #4
1527     bge     _forward_blt_0_4
1528     add     r2, r2, #4
1529     b       _forward_blt_end
1530 
1531 _forward_blt_1:
1532     /* Processing when (dst & 3 == 1) */
1533 #define SHIFT  1
1534     /* ldm-shift-stm in 32-byte units */
1535     ldr     r12, [r1]
1536     mov     r12, r12, FORWARD_(4 - SHIFT)
1537     mov     r12, r12, BACKWARD_(SHIFT)
1538     stmfd   sp!, {r4-r10,lr}
1539     subs    r2, r2, #32
1540 _forward_blt_1_32:
1541     ldmgeia r0!, {r4-r10,lr}
1542     movge   r3, r4, BACKWARD_(SHIFT)
1543     orrge   r4, r12, r4, FORWARD_(SHIFT)
1544     movge   r12, r5, BACKWARD_(SHIFT)
1545     orrge   r5, r3, r5, FORWARD_(SHIFT)
1546     movge   r3, r6, BACKWARD_(SHIFT)
1547     orrge   r6, r12, r6, FORWARD_(SHIFT)
1548     movge   r12, r7, BACKWARD_(SHIFT)
1549     orrge   r7, r3, r7, FORWARD_(SHIFT)
1550     movge   r3, r8, BACKWARD_(SHIFT)
1551     orrge   r8, r12, r8, FORWARD_(SHIFT)
1552     movge   r12, r9, BACKWARD_(SHIFT)
1553     orrge   r9, r3, r9, FORWARD_(SHIFT)
1554     movge   r3, r10, BACKWARD_(SHIFT)
1555     orrge   r10, r12, r10, FORWARD_(SHIFT)
1556     movge   r12, lr, BACKWARD_(SHIFT)
1557     orrge   lr, r3, lr, FORWARD_(SHIFT)
1558     stmgeia r1!, {r4-r10,lr}
1559     subges  r2, r2, #32
1560     bge     _forward_blt_1_32
1561     add     r2, r2, #32
1562     ldmfd   sp!, {r4-r10,lr}
1563     /* ldr-shift-str in 4-byte units */
1564     subs    r2, r2, #4
1565 _forward_blt_1_4:
1566     ldrge   r3, [r0], #4
1567     orrge   r12, r12, r3, FORWARD_(SHIFT)
1568     strge   r12, [r1], #4
1569     movge   r12, r3, BACKWARD_(SHIFT)
1570     subges  r2, r2, #4
1571     bge     _forward_blt_1_4
1572     add     r2, r2, #4
1573     /* Shared end process */
1574     sub     r0, r0, #SHIFT
1575     add     r2, r2, #SHIFT
1576     b       _forward_blt_end
1577 #undef SHIFT
1578 
1579 _forward_blt_2:
1580     /* Processing when (dst & 3 == 2) */
1581 #define SHIFT  2
1582     /* ldm-shift-stm in 32-byte units */
1583     ldr     r12, [r1]
1584     mov     r12, r12, FORWARD_(4 - SHIFT)
1585     mov     r12, r12, BACKWARD_(SHIFT)
1586     stmfd   sp!, {r4-r10,lr}
1587     subs    r2, r2, #32
1588 _forward_blt_2_32:
1589     ldmgeia r0!, {r4-r10,lr}
1590     movge   r3, r4, BACKWARD_(SHIFT)
1591     orrge   r4, r12, r4, FORWARD_(SHIFT)
1592     movge   r12, r5, BACKWARD_(SHIFT)
1593     orrge   r5, r3, r5, FORWARD_(SHIFT)
1594     movge   r3, r6, BACKWARD_(SHIFT)
1595     orrge   r6, r12, r6, FORWARD_(SHIFT)
1596     movge   r12, r7, BACKWARD_(SHIFT)
1597     orrge   r7, r3, r7, FORWARD_(SHIFT)
1598     movge   r3, r8, BACKWARD_(SHIFT)
1599     orrge   r8, r12, r8, FORWARD_(SHIFT)
1600     movge   r12, r9, BACKWARD_(SHIFT)
1601     orrge   r9, r3, r9, FORWARD_(SHIFT)
1602     movge   r3, r10, BACKWARD_(SHIFT)
1603     orrge   r10, r12, r10, FORWARD_(SHIFT)
1604     movge   r12, lr, BACKWARD_(SHIFT)
1605     orrge   lr, r3, lr, FORWARD_(SHIFT)
1606     stmgeia r1!, {r4-r10,lr}
1607     subges  r2, r2, #32
1608     bge     _forward_blt_2_32
1609     add     r2, r2, #32
1610     ldmfd   sp!, {r4-r10,lr}
1611     /* ldr-shift-str in 4-byte units */
1612     subs    r2, r2, #4
1613 _forward_blt_2_4:
1614     ldrge   r3, [r0], #4
1615     orrge   r12, r12, r3, FORWARD_(SHIFT)
1616     strge   r12, [r1], #4
1617     movge   r12, r3, BACKWARD_(SHIFT)
1618     subges  r2, r2, #4
1619     bge     _forward_blt_2_4
1620     add     r2, r2, #4
1621     /* Shared end process */
1622     sub     r0, r0, #SHIFT
1623     add     r2, r2, #SHIFT
1624     b       _forward_blt_end
1625 #undef SHIFT
1626 
1627 _forward_blt_3:
1628     /* Processing when (dst & 3 == 3) */
1629 #define SHIFT  3
1630     /* ldm-shift-stm in 32-byte units */
1631     ldr     r12, [r1]
1632     mov     r12, r12, FORWARD_(4 - SHIFT)
1633     mov     r12, r12, BACKWARD_(SHIFT)
1634     stmfd   sp!, {r4-r10,lr}
1635     subs    r2, r2, #32
1636 _forward_blt_3_32:
1637     ldmgeia r0!, {r4-r10,lr}
1638     movge   r3, r4, BACKWARD_(SHIFT)
1639     orrge   r4, r12, r4, FORWARD_(SHIFT)
1640     movge   r12, r5, BACKWARD_(SHIFT)
1641     orrge   r5, r3, r5, FORWARD_(SHIFT)
1642     movge   r3, r6, BACKWARD_(SHIFT)
1643     orrge   r6, r12, r6, FORWARD_(SHIFT)
1644     movge   r12, r7, BACKWARD_(SHIFT)
1645     orrge   r7, r3, r7, FORWARD_(SHIFT)
1646     movge   r3, r8, BACKWARD_(SHIFT)
1647     orrge   r8, r12, r8, FORWARD_(SHIFT)
1648     movge   r12, r9, BACKWARD_(SHIFT)
1649     orrge   r9, r3, r9, FORWARD_(SHIFT)
1650     movge   r3, r10, BACKWARD_(SHIFT)
1651     orrge   r10, r12, r10, FORWARD_(SHIFT)
1652     movge   r12, lr, BACKWARD_(SHIFT)
1653     orrge   lr, r3, lr, FORWARD_(SHIFT)
1654     stmgeia r1!, {r4-r10,lr}
1655     subges  r2, r2, #32
1656     bge     _forward_blt_3_32
1657     add     r2, r2, #32
1658     ldmfd   sp!, {r4-r10,lr}
1659     /* ldr-shift-str in 4-byte units */
1660     subs    r2, r2, #4
1661 _forward_blt_3_4:
1662     ldrge   r3, [r0], #4
1663     orrge   r12, r12, r3, FORWARD_(SHIFT)
1664     strge   r12, [r1], #4
1665     movge   r12, r3, BACKWARD_(SHIFT)
1666     subges  r2, r2, #4
1667     bge     _forward_blt_3_4
1668     add     r2, r2, #4
1669     /* Shared end process */
1670     sub     r0, r0, #SHIFT
1671     add     r2, r2, #SHIFT
1672     b       _forward_blt_end
1673 #undef SHIFT
1674 
1675 _forward_blt_end:
1676     /* Transfer the end fraction */
1677     tst     r2, #4
1678     ldrneb  r3, [r0], #1
1679     strneb  r3, [r1], #1
1680     ldrneb  r3, [r0], #1
1681     strneb  r3, [r1], #1
1682     ldrneb  r3, [r0], #1
1683     strneb  r3, [r1], #1
1684     ldrneb  r3, [r0], #1
1685     strneb  r3, [r1], #1
1686     tst     r2, #2
1687     ldrneb  r3, [r0], #1
1688     strneb  r3, [r1], #1
1689     ldrneb  r3, [r0], #1
1690     strneb  r3, [r1], #1
1691     tst     r2, #1
1692     ldrneb  r3, [r0], #1
1693     strneb  r3, [r1], #1
1694     bx      lr
1695 }
1696 
1697 /*---------------------------------------------------------------------------*
1698   Name:         MI_CpuMove
1699 
1700   Description:  Moves memory data (mixed version).
1701 
1702   Arguments:    srcp: Source address
1703                 destp: Destination address
1704                 size: Size (bytes)
1705 
1706   Returns:      None.
1707  *---------------------------------------------------------------------------*/
MI_CpuMove(register const void * srcp,register void * destp,register u32 size)1708 asm void MI_CpuMove( register const void *srcp, register void *destp, register u32 size )
1709 {
1710     /* Simply determines whether transfer is really necessary and determines the transfer direction */
1711     cmp     r2, #0
1712     subnes  r3, r0, r1
1713     bxeq    lr
1714     bgt     MI_CpuCopy
1715 
1716     /*
1717      * NOTE:
1718      * _forward_blt_1, _forward_blt_2, _forward_blt_3 and _backward_blt_1, _backward_blt_2, _backward_blt_3 switch constant SHIFT to 1, 2, 3 and have the same processes.
1719      *
1720      *
1721      * If you know a method to describe several lines of asm code with arm-elf-gcc using a macro, please do it all together.
1722      *
1723      * It is also acceptable to consume one empty register for sharing.
1724      * (However, when the shifting amount is specified with a register, one cycle will be increased, so in exchange for size conservation, processing costs will increase.
1725      *
1726      *  If that is true, there is no need to go to the trouble to describe with asm.)
1727      */
1728 
1729 _backward:
1730     /* Sequentially transfer from the back end */
1731     add     r1, r1, r2
1732     add     r0, r0, r2
1733 
1734     /* If less than 8 bytes, process directly here */
1735     cmp     r2, #8
1736     bgt	    _backward_blt
1737     rsb     r3, r2, #0x8
1738     add     pc, pc, r3, lsl #3
1739     nop
1740     ldrb    r3, [r0, #-1]!
1741     strb    r3, [r1, #-1]!
1742     ldrb    r3, [r0, #-1]!
1743     strb    r3, [r1, #-1]!
1744     ldrb    r3, [r0, #-1]!
1745     strb    r3, [r1, #-1]!
1746     ldrb    r3, [r0, #-1]!
1747     strb    r3, [r1, #-1]!
1748     ldrb    r3, [r0, #-1]!
1749     strb    r3, [r1, #-1]!
1750     ldrb    r3, [r0, #-1]!
1751     strb    r3, [r1, #-1]!
1752     ldrb    r3, [r0, #-1]!
1753     strb    r3, [r1, #-1]!
1754     ldrb    r3, [r0, #-1]!
1755     strb    r3, [r1, #-1]!
1756     bx      lr
1757 
1758 _backward_blt:
1759     /* If more than 8 bytes, process awareness of alignment */
1760 
1761     /* Process the fractions at the trailing end of src first */
1762     tst     r0, #2
1763     subne   r2, r2, #2
1764     ldrneb  r3, [r0, #-1]!
1765     strneb  r3, [r1, #-1]!
1766     ldrneb  r3, [r0, #-1]!
1767     strneb  r3, [r1, #-1]!
1768     tst     r0, #1
1769     subne   r2, r2, #1
1770     ldrneb  r3, [r0, #-1]!
1771     strneb  r3, [r1, #-1]!
1772 
1773     /* Branch processing according to the phases of src and dst */
1774     and     r3, r1, #3
1775     bic     r1, r1, #3
1776     cmp     r3, #0
1777     beq     _backward_blt_0
1778     cmp     r3, #1
1779     beq     _backward_blt_1
1780     cmp     r3, #2
1781     beq     _backward_blt_2
1782     b       _backward_blt_3
1783 
1784 _backward_blt_0:
1785     /* Processing when (dst & 3 == 0) */
1786     stmfd   sp!, {r4-r10,lr}
1787     subs    r2, r2, #32
1788 _backward_blt_0_32:
1789     ldmgedb r0!, {r4-r10,lr}
1790     stmgedb r1!, {r4-r10,lr}
1791     subges  r2, r2, #32
1792     bge     _backward_blt_0_32
1793     add     r2, r2, #32
1794     ldmfd   sp!, {r4-r10,lr}
1795 
1796     subs    r2, r2, #4
1797 _backward_blt_0_4:
1798     ldrge   r3, [r0, #-4]!
1799     strge   r3, [r1, #-4]!
1800     subges  r2, r2, #4
1801     bge     _backward_blt_0_4
1802     add     r2, r2, #4
1803     b       _backward_blt_end
1804 
1805 _backward_blt_1:
1806     /* Processing when (dst & 3 == 1) */
1807 #define SHIFT  1
1808     /* ldm-shift-stm in 32-byte units */
1809     ldr     r12, [r1]
1810     mov     r12, r12, BACKWARD_(4 - SHIFT)
1811     mov     r12, r12, FORWARD_(SHIFT)
1812     stmfd   sp!, {r4-r10,lr}
1813     subs    r2, r2, #32
1814 _backward_blt_1_32:
1815     ldmgedb r0!, {r4-r10,lr}
1816     movge   r3, lr, FORWARD_(SHIFT)
1817     orrge   lr, r12, lr, BACKWARD_(SHIFT)
1818     movge   r12, r10, FORWARD_(SHIFT)
1819     orrge   r10, r3, r10, BACKWARD_(SHIFT)
1820     movge   r3, r9, FORWARD_(SHIFT)
1821     orrge   r9, r12, r9, BACKWARD_(SHIFT)
1822     movge   r12, r8, FORWARD_(SHIFT)
1823     orrge   r8, r3, r8, BACKWARD_(SHIFT)
1824     movge   r3, r7, FORWARD_(SHIFT)
1825     orrge   r7, r12, r7, BACKWARD_(SHIFT)
1826     movge   r12, r6, FORWARD_(SHIFT)
1827     orrge   r6, r3, r6, BACKWARD_(SHIFT)
1828     movge   r3, r5, FORWARD_(SHIFT)
1829     orrge   r5, r12, r5, BACKWARD_(SHIFT)
1830     movge   r12, r4, FORWARD_(SHIFT)
1831     orrge   r4, r3, r4, BACKWARD_(SHIFT)
1832     stmgeda r1!, {r4-r10,lr}
1833     subges  r2, r2, #32
1834     bge     _backward_blt_1_32
1835     add     r2, r2, #32
1836     ldmfd   sp!, {r4-r10,lr}
1837     /* ldr-shift-str in 4-byte units */
1838     subs    r2, r2, #4
1839 _backward_blt_1_4:
1840     ldrge   r3, [r0, #-4]!
1841     orrge   r12, r12, r3, BACKWARD_(SHIFT)
1842     strge   r12, [r1], #-4
1843     movge   r12, r3, FORWARD_(SHIFT)
1844     subges  r2, r2, #4
1845     bge     _backward_blt_1_4
1846     add     r2, r2, #4
1847     /* Shared end process */
1848     add     r1, r1, #4
1849     add     r0, r0, #(4 - SHIFT)
1850     add     r2, r2, #(4 - SHIFT)
1851     b       _backward_blt_end
1852 #undef SHIFT
1853 
1854 _backward_blt_2:
1855     /* Processing when (dst & 3 == 2) */
1856 #define SHIFT  2
1857     /* ldm-shift-stm in 32-byte units */
1858     ldr     r12, [r1]
1859     mov     r12, r12, BACKWARD_(4 - SHIFT)
1860     mov     r12, r12, FORWARD_(SHIFT)
1861     stmfd   sp!, {r4-r10,lr}
1862     subs    r2, r2, #32
1863 _backward_blt_2_32:
1864     ldmgedb r0!, {r4-r10,lr}
1865     movge   r3, lr, FORWARD_(SHIFT)
1866     orrge   lr, r12, lr, BACKWARD_(SHIFT)
1867     movge   r12, r10, FORWARD_(SHIFT)
1868     orrge   r10, r3, r10, BACKWARD_(SHIFT)
1869     movge   r3, r9, FORWARD_(SHIFT)
1870     orrge   r9, r12, r9, BACKWARD_(SHIFT)
1871     movge   r12, r8, FORWARD_(SHIFT)
1872     orrge   r8, r3, r8, BACKWARD_(SHIFT)
1873     movge   r3, r7, FORWARD_(SHIFT)
1874     orrge   r7, r12, r7, BACKWARD_(SHIFT)
1875     movge   r12, r6, FORWARD_(SHIFT)
1876     orrge   r6, r3, r6, BACKWARD_(SHIFT)
1877     movge   r3, r5, FORWARD_(SHIFT)
1878     orrge   r5, r12, r5, BACKWARD_(SHIFT)
1879     movge   r12, r4, FORWARD_(SHIFT)
1880     orrge   r4, r3, r4, BACKWARD_(SHIFT)
1881     stmgeda r1!, {r4-r10,lr}
1882     subges  r2, r2, #32
1883     bge     _backward_blt_2_32
1884     add     r2, r2, #32
1885     ldmfd   sp!, {r4-r10,lr}
1886     /* ldr-shift-str in 4-byte units */
1887     subs    r2, r2, #4
1888 _backward_blt_2_4:
1889     ldrge   r3, [r0, #-4]!
1890     orrge   r12, r12, r3, BACKWARD_(SHIFT)
1891     strge   r12, [r1], #-4
1892     movge   r12, r3, FORWARD_(SHIFT)
1893     subges  r2, r2, #4
1894     bge     _backward_blt_2_4
1895     add     r2, r2, #4
1896     /* Shared end process */
1897     add     r1, r1, #4
1898     add     r0, r0, #(4 - SHIFT)
1899     add     r2, r2, #(4 - SHIFT)
1900     b       _backward_blt_end
1901 #undef SHIFT
1902 
1903 _backward_blt_3:
1904     /* Processing when (dst & 3 == 3) */
1905 #define SHIFT  3
1906     /* ldm-shift-stm in 32-byte units */
1907     ldr     r12, [r1]
1908     mov     r12, r12, BACKWARD_(4 - SHIFT)
1909     mov     r12, r12, FORWARD_(SHIFT)
1910     stmfd   sp!, {r4-r10,lr}
1911     subs    r2, r2, #32
1912 _backward_blt_3_32:
1913     ldmgedb r0!, {r4-r10,lr}
1914     movge   r3, lr, FORWARD_(SHIFT)
1915     orrge   lr, r12, lr, BACKWARD_(SHIFT)
1916     movge   r12, r10, FORWARD_(SHIFT)
1917     orrge   r10, r3, r10, BACKWARD_(SHIFT)
1918     movge   r3, r9, FORWARD_(SHIFT)
1919     orrge   r9, r12, r9, BACKWARD_(SHIFT)
1920     movge   r12, r8, FORWARD_(SHIFT)
1921     orrge   r8, r3, r8, BACKWARD_(SHIFT)
1922     movge   r3, r7, FORWARD_(SHIFT)
1923     orrge   r7, r12, r7, BACKWARD_(SHIFT)
1924     movge   r12, r6, FORWARD_(SHIFT)
1925     orrge   r6, r3, r6, BACKWARD_(SHIFT)
1926     movge   r3, r5, FORWARD_(SHIFT)
1927     orrge   r5, r12, r5, BACKWARD_(SHIFT)
1928     movge   r12, r4, FORWARD_(SHIFT)
1929     orrge   r4, r3, r4, BACKWARD_(SHIFT)
1930     stmgeda r1!, {r4-r10,lr}
1931     subges  r2, r2, #32
1932     bge     _backward_blt_3_32
1933     add     r2, r2, #32
1934     ldmfd   sp!, {r4-r10,lr}
1935     /* ldr-shift-str in 4-byte units */
1936     subs    r2, r2, #4
1937 _backward_blt_3_4:
1938     ldrge   r3, [r0, #-4]!
1939     orrge   r12, r12, r3, BACKWARD_(SHIFT)
1940     strge   r12, [r1], #-4
1941     movge   r12, r3, FORWARD_(SHIFT)
1942     subges  r2, r2, #4
1943     bge     _backward_blt_3_4
1944     add     r2, r2, #4
1945     /* Shared end process */
1946     add     r1, r1, #4
1947     add     r0, r0, #(4 - SHIFT)
1948     add     r2, r2, #(4 - SHIFT)
1949     b       _backward_blt_end
1950 #undef SHIFT
1951 
1952 _backward_blt_end:
1953     /* Transfer the leading end fraction */
1954     tst     r2, #4
1955     ldrneb  r3, [r0, #-1]!
1956     strneb  r3, [r1, #-1]!
1957     ldrneb  r3, [r0, #-1]!
1958     strneb  r3, [r1, #-1]!
1959     ldrneb  r3, [r0, #-1]!
1960     strneb  r3, [r1, #-1]!
1961     ldrneb  r3, [r0, #-1]!
1962     strneb  r3, [r1, #-1]!
1963     tst     r2, #2
1964     ldrneb  r3, [r0, #-1]!
1965     strneb  r3, [r1, #-1]!
1966     ldrneb  r3, [r0, #-1]!
1967     strneb  r3, [r1, #-1]!
1968     tst     r2, #1
1969     ldrneb  r3, [r0, #-1]!
1970     strneb  r3, [r1, #-1]!
1971     bx      lr
1972 }
1973 
1974 #undef FORWARD_
1975 #undef BACKWARD_
1976 #undef FORWARD_MASK
1977 
1978 #include <nitro/codereset.h>
1979 
1980 
1981 #include <nitro/code16.h>
1982 //=======================================================================
1983 //           FOR CONVENIENCE (filling zero)
1984 //=======================================================================
1985 /*---------------------------------------------------------------------------*
1986   Name:         MI_Zero32B
1987 
1988   Description:  Fills 32-byte area with 0 by CPU.
1989 
1990   Arguments:    pDest: Destination address
1991 
1992   Returns:      None.
1993  *---------------------------------------------------------------------------*/
MI_Zero32B(register void * pDest)1994 asm void MI_Zero32B(register void* pDest)
1995 {
1996     mov     r1,  #0
1997     mov     r2,  #0
1998     stmia   r0!, {r1, r2}
1999     mov     r3,  #0
2000     stmia   r0!, {r1, r2, r3}
2001     stmia   r0!, {r1, r2, r3}
2002 
2003     bx      lr
2004 }
2005 
2006 /*---------------------------------------------------------------------------*
2007   Name:         MI_Zero36B
2008 
2009   Description:  Fills 36-byte area with 0 by CPU.
2010 
2011   Arguments:    pDest: Destination address
2012 
2013   Returns:      None.
2014  *---------------------------------------------------------------------------*/
MI_Zero36B(register void * pDest)2015 asm void MI_Zero36B(register void* pDest)
2016 {
2017     mov     r1,  #0
2018     mov     r2,  #0
2019     mov     r3,  #0
2020     stmia   r0!, {r1, r2, r3}
2021     stmia   r0!, {r1, r2, r3}
2022     stmia   r0!, {r1, r2, r3}
2023 
2024     bx      lr
2025 }
2026 
2027 /*---------------------------------------------------------------------------*
2028   Name:         MI_Zero48B
2029 
2030   Description:  Fills 48-byte area with 0 by CPU.
2031 
2032   Arguments:    pDest: Destination address
2033 
2034   Returns:      None.
2035  *---------------------------------------------------------------------------*/
MI_Zero48B(register void * pDest)2036 asm void MI_Zero48B(register void* pDest)
2037 {
2038     mov     r1,  #0
2039     mov     r2,  #0
2040     mov     r3,  #0
2041     stmia   r0!, {r1, r2, r3}
2042     stmia   r0!, {r1, r2, r3}
2043     stmia   r0!, {r1, r2, r3}
2044     stmia   r0!, {r1, r2, r3}
2045 
2046     bx      lr
2047 }
2048 
2049 /*---------------------------------------------------------------------------*
2050   Name:         MI_Zero64B
2051 
2052   Description:  Fills 64-byte area with 0 by CPU.
2053 
2054   Arguments:    pDest: Destination address
2055 
2056   Returns:      None.
2057  *---------------------------------------------------------------------------*/
MI_Zero64B(register void * pDest)2058 asm void MI_Zero64B(register void* pDest)
2059 {
2060     mov     r1,  #0
2061     mov     r2,  #0
2062     stmia   r0!, {r1, r2}
2063     mov     r3,  #0
2064     stmia   r0!, {r1, r2}
2065     stmia   r0!, {r1, r2, r3}
2066     stmia   r0!, {r1, r2, r3}
2067     stmia   r0!, {r1, r2, r3}
2068     stmia   r0!, {r1, r2, r3}
2069 
2070     bx      lr
2071 }
2072 
2073 //---- End limitation of THUMB-Mode
2074 #include <nitro/codereset.h>
2075