/*---------------------------------------------------------------------------* Project: TwlSDK - MATH - File: math.c Copyright 2003-2008 Nintendo. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. $Date:: 2008-09-17#$ $Rev: 8556 $ $Author:$ *---------------------------------------------------------------------------*/ #include /*---------------------------------------------------------------------------* Name: MATH_CountLeadingZerosFunc Description: Determines how many high-order bits in a 32-bit binary expression have a value of 0. (For ARM9 Thumb or ARM7 which do not have CLZ command) Arguments: x Returns: The number of contiguous 0 bits from the top *---------------------------------------------------------------------------*/ #if defined(SDK_ARM9) && (defined(SDK_CW) || defined(SDK_RX) || defined(__MWERKS__)) #pragma thumb off u32 MATH_CountLeadingZerosFunc(u32 x) { // Even if the call source on ARM9 was Thumb, switching to ARM mode and calling a CLZ command is better for speed and size // // asm { clz x, x} return x; } #pragma thumb reset #else // !ARM9 || !(CW || __MWERKS__) u32 MATH_CountLeadingZerosFunc(u32 x) { u32 y; u32 n = 32; // Use binary search to find the location where 0's end. y = x >> 16; if (y != 0) { n -= 16; x = y; } y = x >> 8; if (y != 0) { n -= 8; x = y; } y = x >> 4; if (y != 0) { n -= 4; x = y; } y = x >> 2; if (y != 0) { n -= 2; x = y; } y = x >> 1; if (y != 0) { n -= 2; } // x == 0b10 or 0b11 -> n -= 2 else { n -= x; } // x == 0b00 or 0b01 -> n -= x return n; } #endif // ARM9 && (CW || __MWERKS__) /*---------------------------------------------------------------------------* Name: MATH_CountPopulation Description: Determines the number of '1' bits in a binary 32-bit expression. Arguments: x Returns: The number of '1' bits in the binary expression *---------------------------------------------------------------------------*/ u8 MATH_CountPopulation(u32 x) { // Note that in the ARM code below, shift and arithmetic operations can be done simultaneously. // With the Release Build and over, it will be 13 commands without stalls + bx lr. // Rather than counting 32 bits directly, first store the number of 1's for every 2 bits in the same location as those 2 bits. // // In other words, every 2 bits are converted such that 00 -> 00, 01 -> 01, 10 -> 01, and 11 -> 10. // When x -> x', for a 2-bit value we have x' = x - (x >> 1). x -= ((x >> 1) & 0x55555555); // When counting in 4-bit units, add the number of 1's stored in the upper and lower 2 bits, and then store this as the number of 1's in that original 4-bit location. // x = (x & 0x33333333) + ((x >> 2) & 0x33333333); // Do the same for 8-bit units. // However, the maximum result for each digit is 8, and this fits in 4 bits, it is not necessary to mask ahead of time. // x += (x >> 4); // Mask unnecessary parts in preparation for the next operations. x &= 0x0f0f0f0f; // Get the sum of the upper 8 bits and lower 8 bits in 16-bit units. x += (x >> 8); // Do the same for 32-bit units. x += (x >> 16); // The lower 8-bit value is the result. return (u8)x; } #if 0 // Reference implementation in assembler. // This is faster when the number of unset bits ('0') is greater than the number of set bits ('1'). /*---------------------------------------------------------------------------* Name: MATH_CountPopulation_Asm Description: Counts bits that are set to 1 when a value is expressed as 32 bits. Arguments: value - Value to examine. Returns: u32 - Returns the number of bits. An integer between 0 and 32. *---------------------------------------------------------------------------*/ #include asm u32 MATH_CountPopulation_Asm(u32 value) { mov r1 , #0 mov r2 , #1 @loop: clz r3 , r0 rsbs r3 , r3 , #31 bcc @end add r1 , r1 , #1 mvn r3 , r2 , LSL r3 and r0 , r0 , r3 b @loop @end: mov r0 , r1 bx lr } #include #endif