1 /*---------------------------------------------------------------------------*
2   Project:  TwlSDK - MATH -
3   File:     math.c
4 
5   Copyright 2003-2008 Nintendo. All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law. They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Date:: 2008-09-17#$
14   $Rev: 8556 $
15   $Author:$
16  *---------------------------------------------------------------------------*/
17 
18 #include <nitro/math.h>
19 
20 /*---------------------------------------------------------------------------*
21   Name:         MATH_CountLeadingZerosFunc
22 
23   Description:  Determines how many high-order bits in a 32-bit binary expression have a value of 0.
24                 (For ARM9 Thumb or ARM7 which do not have CLZ command)
25 
26   Arguments:    x
27 
28   Returns:      The number of contiguous 0 bits from the top
29  *---------------------------------------------------------------------------*/
30 #if defined(SDK_ARM9) && (defined(SDK_CW) || defined(SDK_RX) || defined(__MWERKS__))
31 
32 #pragma thumb off
MATH_CountLeadingZerosFunc(u32 x)33 u32 MATH_CountLeadingZerosFunc(u32 x)
34 {
35     // Even if the call source on ARM9 was Thumb, switching to ARM mode and calling a CLZ command is better for speed and size
36     //
37     //
38     asm
39     {
40     clz     x, x}
41     return  x;
42 }
43 #pragma thumb reset
44 
45 #else // !ARM9 || !(CW || __MWERKS__)
46 
MATH_CountLeadingZerosFunc(u32 x)47 u32 MATH_CountLeadingZerosFunc(u32 x)
48 {
49     u32     y;
50     u32     n = 32;
51 
52     // Use binary search to find the location where 0's end.
53     y = x >> 16;
54     if (y != 0)
55     {
56         n -= 16;
57         x = y;
58     }
59     y = x >> 8;
60     if (y != 0)
61     {
62         n -= 8;
63         x = y;
64     }
65     y = x >> 4;
66     if (y != 0)
67     {
68         n -= 4;
69         x = y;
70     }
71     y = x >> 2;
72     if (y != 0)
73     {
74         n -= 2;
75         x = y;
76     }
77     y = x >> 1;
78     if (y != 0)
79     {
80         n -= 2;
81     }                                  // x == 0b10 or 0b11 -> n -= 2
82     else
83     {
84         n -= x;
85     }                                  // x == 0b00 or 0b01 -> n -= x
86 
87     return n;
88 }
89 
90 #endif // ARM9 && (CW || __MWERKS__)
91 
92 /*---------------------------------------------------------------------------*
93   Name:         MATH_CountPopulation
94 
95   Description:  Determines the number of '1' bits in a binary 32-bit expression.
96 
97   Arguments:    x
98 
99   Returns:      The number of '1' bits in the binary expression
100  *---------------------------------------------------------------------------*/
MATH_CountPopulation(u32 x)101 u8 MATH_CountPopulation(u32 x)
102 {
103     // Note that in the ARM code below, shift and arithmetic operations can be done simultaneously.
104     // With the Release Build and over, it will be 13 commands without stalls + bx lr.
105 
106     // Rather than counting 32 bits directly, first store the number of 1's for every 2 bits in the same location as those 2 bits.
107     //
108     // In other words, every 2 bits are converted such that 00 -> 00, 01 -> 01, 10 -> 01, and 11 -> 10.
109     // When x -> x', for a 2-bit value we have x' = x - (x >> 1).
110     x -= ((x >> 1) & 0x55555555);
111 
112     // When counting in 4-bit units, add the number of 1's stored in the upper and lower 2 bits, and then store this as the number of 1's in that original 4-bit location.
113     //
114     x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
115 
116     // Do the same for 8-bit units.
117     // However, the maximum result for each digit is 8, and this fits in 4 bits, it is not necessary to mask ahead of time.
118     //
119     x += (x >> 4);
120 
121     // Mask unnecessary parts in preparation for the next operations.
122     x &= 0x0f0f0f0f;
123 
124     // Get the sum of the upper 8 bits and lower 8 bits in 16-bit units.
125     x += (x >> 8);
126 
127     // Do the same for 32-bit units.
128     x += (x >> 16);
129 
130     // The lower 8-bit value is the result.
131     return (u8)x;
132 }
133 
134 #if 0
135 // Reference implementation in assembler.
136 // This is faster when the number of unset bits ('0') is greater than the number of set bits ('1').
137 /*---------------------------------------------------------------------------*
138   Name:         MATH_CountPopulation_Asm
139 
140   Description:  Counts bits that are set to 1 when a value is expressed as 32 bits.
141 
142   Arguments:    value   -   Value to examine.
143 
144   Returns:      u32     -   Returns the number of bits. An integer between 0 and 32.
145  *---------------------------------------------------------------------------*/
146 #include <nitro/code32.h>
147 asm u32 MATH_CountPopulation_Asm(u32 value)
148 {
149     mov     r1 ,    #0
150     mov     r2 ,    #1
151 
152 @loop:
153     clz     r3 ,    r0
154     rsbs    r3 ,    r3 ,    #31
155     bcc     @end
156     add     r1 ,    r1 ,    #1
157     mvn     r3 ,    r2 ,    LSL r3
158     and     r0 ,    r0 ,    r3
159     b       @loop
160 
161 @end:
162     mov     r0 ,    r1
163     bx      lr
164 }
165 #include <nitro/codereset.h>
166 #endif
167