/*---------------------------------------------------------------------------* Project: Horizon File: util_Float24.h Copyright (C)2009-2012 Nintendo Co., Ltd. All rights reserved. These coded instructions, statements, and computer programs contain proprietary information of Nintendo of America Inc. and/or Nintendo Company Ltd., and are protected by Federal copyright law. They may not be disclosed to third parties or copied or duplicated in any form, in whole or in part, without the prior written consent of Nintendo. $Rev: 46347 $ *---------------------------------------------------------------------------*/ #ifndef NN_UTIL_UTIL_FLOAT24_H_ #define NN_UTIL_UTIL_FLOAT24_H_ #include #ifdef __cplusplus namespace nn { namespace util { //-------------------------------------------------------------------------- // // // The 32-bit, floating point bit format is as follows. // | sign | exponent | fraction | // sign : Sign 1 bit. // exponent : Exponent 8 bit. bias 127. // fraction : mantissa 23 bit. //--------------------------------------------------------------------------- class Float32 { public: //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u32 Float32ToBits32(f32 value) { return *reinterpret_cast(&value); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Bits32ToFloat32(u32 value) { return *reinterpret_cast(&value); } }; //-------------------------------------------------------------------------- // // // The 24-bit, floating point bit format is as follows. // | sign | exponent | fraction | // sign : Sign 1 bit. // exponent : Exponent 7 bit. bias 63. // fraction : mantissa 16 bit. //--------------------------------------------------------------------------- class Float24 { public: //-------------------------------------------------------------------------- // //-------------------------------------------------------------------------- Float24() : m_Float32( 0.0f ) {} //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float24( u32 bits24 ) { m_Float32 = Bits24ToFloat32( bits24 ); } //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float24( f32 value ) : m_Float32( value ) {} f32 GetFloat32Value() const { return m_Float32; } u32 GetFloat24Value() const { return Float32ToBits24( m_Float32 ); } Float24& operator =(f32 value) { this->m_Float32 = value; return *this; } Float24& operator =(u32 bits24) { this->m_Float32 = Bits24ToFloat32( bits24 ); return *this; } operator f32() const { return m_Float32; } f32 operator +(f32 right) const { return this->m_Float32 + right; } f32 operator -(f32 right) const { return this->m_Float32 - right; } f32 operator *(f32 right) const { return this->m_Float32 * right; } f32 operator /(f32 right) const { return this->m_Float32 / right; } Float24& operator +=(f32 rhs) { this->m_Float32 += rhs; return *this; } Float24& operator -=(f32 rhs) { this->m_Float32 -= rhs; return *this; } Float24& operator *=(f32 rhs) { this->m_Float32 *= rhs; return *this; } Float24& operator /=(f32 rhs) { this->m_Float32 /= rhs; return *this; } bool operator ==(f32 rhs) const { return (rhs == this->m_Float32); } bool operator !=(f32 rhs) const { return !(*this == rhs); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Bits24ToFloat32(u32 bits24) { u32 sign = bits24 & SIGN24; int exp = (int)((bits24 & EXP_MASK24) >> FRACTION_WIDTH24); u32 fraction = bits24 & FRACTION_MASK24; u32 bits32 = 0; bits32 |= (sign != 0) ? SIGN32 : 0; if ((bits24 & ~SIGN24) == 0) { exp = 0; } else { exp = exp - EXP_BIAS24 + EXP_BIAS32; } fraction = fraction << (FRACTION_WIDTH32 - FRACTION_WIDTH24); // This is a bit extension, so 'exp' will not overflow. bits32 |= fraction & FRACTION_MASK32; bits32 |= ((u32)exp & 0xFF) << FRACTION_WIDTH32; return *reinterpret_cast(&bits32); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u32 Float32ToBits24(f32 value) { u32 bits32 = *reinterpret_cast(&value); u32 sign = bits32 & SIGN32; int exp = (int)((bits32 & EXP_MASK32) >> FRACTION_WIDTH32); u32 fraction = bits32 & FRACTION_MASK32; u32 bits24 = 0; bits24 |= (sign != 0) ? SIGN24 : 0; if ((bits32 & ~SIGN32) == 0) { exp = 0; } else { exp = exp - EXP_BIAS32 + EXP_BIAS24; } fraction = fraction >> (FRACTION_WIDTH32 - FRACTION_WIDTH24); if (exp < 0) { // Use +0 or -0 without change. } else if (exp > 127) { // Infinity process // TODO: Need to check whether expressions of IEEE float infinity are valid on the GPU bits24 = (u32)0x7F << FRACTION_WIDTH24; } else { bits24 |= fraction & FRACTION_MASK24; bits24 |= ((u32)exp & 0x7F) << FRACTION_WIDTH24; } return bits24; } private: f32 m_Float32; enum { SIGN32 = 0x80000000, SIGN24 = 0x00800000, EXP_BIAS32 = 127, EXP_BIAS24 = 63, EXP_MASK32 = 0x7F800000, EXP_MASK24 = 0x007F0000, FRACTION_WIDTH32 = 23, FRACTION_MASK32 = 0x007FFFFF, FRACTION_WIDTH24 = 16, FRACTION_MASK24 = 0x0000FFFF }; }; //-------------------------------------------------------------------------- // // // The 31-bit, floating point bit format is as follows. // | sign | exponent | fraction | // sign : Sign 1 bit. // exponent : Exponent 7 bit. bias 63. // fraction : mantissa 23 bit. //--------------------------------------------------------------------------- class Float31 { public: //-------------------------------------------------------------------------- // //-------------------------------------------------------------------------- Float31() : m_Float32( 0.0f ) {} //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float31( u32 bits31 ) { m_Float32 = Bits31ToFloat32( bits31 ); } //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float31( f32 value ) : m_Float32( value ) {} f32 GetFloat32Value() const { return m_Float32; } u32 GetFloat31Value() const { return Float32ToBits31( m_Float32 ); } Float31& operator =(f32 value) { this->m_Float32 = value; return *this; } Float31& operator =(u32 bits31) { this->m_Float32 = Bits31ToFloat32( bits31 ); return *this; } operator f32() const { return m_Float32; } f32 operator +(f32 right) const { return this->m_Float32 + right; } f32 operator -(f32 right) const { return this->m_Float32 - right; } f32 operator *(f32 right) const { return this->m_Float32 * right; } f32 operator /(f32 right) const { return this->m_Float32 / right; } Float31& operator +=(f32 rhs) { this->m_Float32 += rhs; return *this; } Float31& operator -=(f32 rhs) { this->m_Float32 -= rhs; return *this; } Float31& operator *=(f32 rhs) { this->m_Float32 *= rhs; return *this; } Float31& operator /=(f32 rhs) { this->m_Float32 /= rhs; return *this; } bool operator ==(f32 rhs) const { return (rhs == this->m_Float32); } bool operator !=(f32 rhs) const { return !(*this == rhs); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Bits31ToFloat32(u32 bits31) { u32 sign = bits31 & SIGN31; int exp = (int)((bits31 & EXP_MASK31) >> FRACTION_WIDTH31); u32 fraction = bits31 & FRACTION_MASK31; u32 bits32 = 0; bits32 |= (sign != 0) ? SIGN32 : 0; if ((bits31 & ~SIGN31) == 0) { exp = 0; } else { exp = exp - EXP_BIAS31 + EXP_BIAS32; } // This is a bit extension, so 'exp' will not overflow. bits32 |= fraction & FRACTION_MASK32; bits32 |= ((u32)exp & 0xFF) << FRACTION_WIDTH32; return *reinterpret_cast(&bits32); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u32 Float32ToBits31(f32 value) { u32 bits32 = *reinterpret_cast(&value); u32 sign = bits32 & SIGN32; int exp = (int)((bits32 & EXP_MASK32) >> FRACTION_WIDTH32); u32 fraction = bits32 & FRACTION_MASK32; u32 bits31 = 0; bits31 |= (sign != 0) ? SIGN31 : 0; if ((bits32 & ~SIGN32) == 0) { exp = 0; } else { exp = exp - EXP_BIAS32 + EXP_BIAS31; } if (exp < 0) { // Use +0 or -0 without change. } else if (exp > 127) { // Infinity process // TODO: Need to check whether expressions of IEEE float infinity are valid on the GPU bits31 = (u32)0x7F << FRACTION_WIDTH31; } else { bits31 |= fraction & FRACTION_MASK31; bits31 |= ((u32)exp & 0x7F) << FRACTION_WIDTH31; } return bits31; } private: f32 m_Float32; enum { SIGN32 = 0x80000000, SIGN31 = 0x40000000, EXP_BIAS32 = 127, EXP_BIAS31 = 63, EXP_MASK32 = 0x7F800000, EXP_MASK31 = 0x3F800000, FRACTION_WIDTH32 = 23, FRACTION_MASK32 = 0x007FFFFF, FRACTION_WIDTH31 = 23, FRACTION_MASK31 = 0x007FFFFF }; }; //-------------------------------------------------------------------------- // // // The 20-bit, floating point bit format is as follows. // | sign | exponent | fraction | // sign : Sign 1 bit. // exponent : Exponent 7 bit. bias 63. // fraction : mantissa 12 bit. //--------------------------------------------------------------------------- class Float20 { public: //-------------------------------------------------------------------------- // //-------------------------------------------------------------------------- Float20() : m_Float32( 0.0f ) {} //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float20( u32 bits20 ) { m_Float32 = Bits20ToFloat32( bits20 ); } //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float20( f32 value ) : m_Float32( value ) {} f32 GetFloat32Value() const { return m_Float32; } u32 GetFloat20Value() const { return Float32ToBits20( m_Float32 ); } Float20& operator =(f32 value) { this->m_Float32 = value; return *this; } Float20& operator =(u32 bits20) { this->m_Float32 = Bits20ToFloat32( bits20 ); return *this; } operator f32() const { return m_Float32; } f32 operator +(f32 right) const { return this->m_Float32 + right; } f32 operator -(f32 right) const { return this->m_Float32 - right; } f32 operator *(f32 right) const { return this->m_Float32 * right; } f32 operator /(f32 right) const { return this->m_Float32 / right; } Float20& operator +=(f32 rhs) { this->m_Float32 += rhs; return *this; } Float20& operator -=(f32 rhs) { this->m_Float32 -= rhs; return *this; } Float20& operator *=(f32 rhs) { this->m_Float32 *= rhs; return *this; } Float20& operator /=(f32 rhs) { this->m_Float32 /= rhs; return *this; } bool operator ==(f32 rhs) const { return (rhs == this->m_Float32); } bool operator !=(f32 rhs) const { return !(*this == rhs); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Bits20ToFloat32(u32 bits20) { u32 sign = bits20 & SIGN20; int exp = (int)((bits20 & EXP_MASK20) >> FRACTION_WIDTH20); u32 fraction = bits20 & FRACTION_MASK20; u32 bits32 = 0; bits32 |= (sign != 0) ? SIGN32 : 0; if ((bits20 & ~SIGN20) == 0) { exp = 0; } else { exp = exp - EXP_BIAS20 + EXP_BIAS32; } fraction = fraction << (FRACTION_WIDTH32 - FRACTION_WIDTH20); // This is a bit extension, so 'exp' will not overflow. bits32 |= fraction & FRACTION_MASK32; bits32 |= ((u32)exp & 0xFF) << FRACTION_WIDTH32; return *reinterpret_cast(&bits32); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u32 Float32ToBits20(f32 value) { u32 bits32 = *reinterpret_cast(&value); u32 sign = bits32 & SIGN32; int exp = (int)((bits32 & EXP_MASK32) >> FRACTION_WIDTH32); u32 fraction = bits32 & FRACTION_MASK32; u32 bits20 = 0; bits20 |= (sign != 0) ? SIGN20 : 0; if ((bits32 & ~SIGN32) == 0) { exp = 0; } else { exp = exp - EXP_BIAS32 + EXP_BIAS20; } fraction = fraction >> (FRACTION_WIDTH32 - FRACTION_WIDTH20); if (exp < 0) { // Use +0 or -0 without change. } else if (exp > 127) { // Infinity process // TODO: Need to check whether expressions of IEEE float infinity are valid on the GPU bits20 = (u32)0x7F << FRACTION_WIDTH20; } else { bits20 |= fraction & FRACTION_MASK20; bits20 |= ((u32)exp & 0x7F) << FRACTION_WIDTH20; } return bits20; } private: f32 m_Float32; enum { SIGN32 = 0x80000000, SIGN20 = 0x00080000, EXP_BIAS32 = 127, EXP_BIAS20 = 63, EXP_MASK32 = 0x7F800000, EXP_MASK20 = 0x0007F000, FRACTION_WIDTH32 = 23, FRACTION_MASK32 = 0x007FFFFF, FRACTION_WIDTH20 = 12, FRACTION_MASK20 = 0x00000FFF }; }; //-------------------------------------------------------------------------- // // // The 16-bit, floating point bit format is as follows. // | sign | exponent | fraction | // sign : Sign 1 bit. // exponent : Exponent 5 bit. bias 15. // fraction : mantissa 10 bit. //--------------------------------------------------------------------------- class Float16 { public: //-------------------------------------------------------------------------- // //-------------------------------------------------------------------------- Float16() : m_Float32( 0.0f ) {} //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float16( u32 bits16 ) { m_Float32 = Bits16ToFloat32( bits16 ); } //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- /* implicit */ Float16( f32 value ) : m_Float32( value ) {} f32 GetFloat32Value() const { return m_Float32; } u16 GetFloat16Value() const { return Float32ToBits16( m_Float32 ); } Float16& operator =(f32 value) { this->m_Float32 = value; return *this; } Float16& operator =(u32 bits16) { this->m_Float32 = Bits16ToFloat32( bits16 ); return *this; } operator f32() const { return m_Float32; } operator u16() const { return GetFloat16Value(); } f32 operator +(f32 right) const { return this->m_Float32 + right; } f32 operator -(f32 right) const { return this->m_Float32 - right; } f32 operator *(f32 right) const { return this->m_Float32 * right; } f32 operator /(f32 right) const { return this->m_Float32 / right; } Float16& operator +=(f32 rhs) { this->m_Float32 += rhs; return *this; } Float16& operator -=(f32 rhs) { this->m_Float32 -= rhs; return *this; } Float16& operator *=(f32 rhs) { this->m_Float32 *= rhs; return *this; } Float16& operator /=(f32 rhs) { this->m_Float32 /= rhs; return *this; } bool operator ==(f32 rhs) const { return (rhs == this->m_Float32); } bool operator !=(f32 rhs) const { return !(*this == rhs); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Bits16ToFloat32(u32 bits16) { u32 sign = bits16 & SIGN16; int exp = (int)((bits16 & EXP_MASK16) >> FRACTION_WIDTH16); u32 fraction = bits16 & FRACTION_MASK16; u32 bits32 = 0; bits32 |= (sign != 0) ? SIGN32 : 0; if ((bits16 & ~SIGN16) == 0) { exp = 0; } else { exp = exp - EXP_BIAS16 + EXP_BIAS32; } fraction = fraction << (FRACTION_WIDTH32 - FRACTION_WIDTH16); // This is a bit extension, so 'exp' will not overflow. bits32 |= fraction & FRACTION_MASK32; bits32 |= ((u32)exp & 0xFF) << FRACTION_WIDTH32; return *reinterpret_cast(&bits32); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u16 Float32ToBits16(f32 value) { u32 bits32 = *reinterpret_cast(&value); u32 sign = bits32 & SIGN32; int exp = (int)((bits32 & EXP_MASK32) >> FRACTION_WIDTH32); u32 fraction = bits32 & FRACTION_MASK32; u32 bits16 = 0; bits16 |= (sign != 0) ? SIGN16 : 0; if ((bits32 & ~SIGN32) == 0) { exp = 0; } else { exp = exp - EXP_BIAS32 + EXP_BIAS16; } fraction = fraction >> (FRACTION_WIDTH32 - FRACTION_WIDTH16); if (exp < 0) { // Use +0 or -0 without change. } else if (exp > 31) { // Infinity process // TODO: Need to check whether expressions of IEEE float infinity are valid on the GPU bits16 = (u32)0x1F << FRACTION_WIDTH16; } else { bits16 |= fraction & FRACTION_MASK16; bits16 |= ((u32)exp & 0x1F) << FRACTION_WIDTH16; } return static_cast(bits16); } private: f32 m_Float32; enum { SIGN32 = 0x80000000, SIGN16 = 0x00008000, EXP_BIAS32 = 127, EXP_BIAS16 = 15, EXP_MASK32 = 0x7F800000, EXP_MASK16 = 0x00007C00, FRACTION_WIDTH32 = 23, FRACTION_MASK32 = 0x007FFFFF, FRACTION_WIDTH16 = 10, FRACTION_MASK16 = 0x000003FF }; }; //-------------------------------------------------------------------------- // // // The 13-bit, fixed-point bit format is as follows. // | int | decimal | (2's complement) // int : Integer portion 2 bit. // decimal : Fractional portion 11 bit. //--------------------------------------------------------------------------- class Fixed13 { public: //-------------------------------------------------------------------------- // //-------------------------------------------------------------------------- Fixed13() : m_Float32( 0.0f ) {} //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- explicit Fixed13( u32 fixed13 ) { m_Float32 = Fixed13ToFloat32( fixed13 ); } //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- explicit Fixed13( f32 fvalue ) { m_Float32 = fvalue; } f32 GetFloat32Value() const { return m_Float32; } u16 GetFixed13Value() const { return Float32ToFixed13( m_Float32 ); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Fixed13ToFloat32(u32 fixed13) { f32 float32 = static_cast(fixed13); if (fixed13 & (0x1 << (TOTAL_WIDTH - 1))) { float32 -= (0x1 << TOTAL_WIDTH); } return float32 / (0x1 << DECIMAL_WIDTH); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u16 Float32ToFixed13(f32 value) { f32 fixed = value; fixed += ((0x1 << INT_WIDTH) / 2); fixed *= (0x1 << DECIMAL_WIDTH); if (fixed < 0) { fixed = 0.0f; } else if (fixed >= (0x1 << TOTAL_WIDTH)) { fixed = (0x1 << TOTAL_WIDTH) - 1; } fixed -= 0x1 << (TOTAL_WIDTH - 1); return static_cast(static_cast(fixed) & MASK); } private: f32 m_Float32; enum { INT_WIDTH = 2, DECIMAL_WIDTH = 11, TOTAL_WIDTH = 13, MASK = (0x1 << TOTAL_WIDTH) - 1 }; }; //-------------------------------------------------------------------------- // // // The 11-bit, fixed-point bit format is as follows. // decimal : Fractional portion 11 bit. //--------------------------------------------------------------------------- class Fixed11 { public: //-------------------------------------------------------------------------- // //-------------------------------------------------------------------------- Fixed11() : m_Float32( 0.0f ) {} //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- explicit Fixed11( u32 fixed11 ) { m_Float32 = Fixed11ToFloat32( fixed11 ); } //-------------------------------------------------------------------------- // // // //-------------------------------------------------------------------------- explicit Fixed11( f32 fvalue ) { m_Float32 = fvalue; } f32 GetFloat32Value() const { return m_Float32; } u16 GetFixed11Value() const { return Float32ToFixed11( m_Float32 ); } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static f32 Fixed11ToFloat32(u32 fixed11) { f32 float32 = static_cast(fixed11); return float32 / MASK; } //-------------------------------------------------------------------------- // // // // // //-------------------------------------------------------------------------- static u16 Float32ToFixed11(f32 value) { u32 fixed; u32 v_ = *(u32*)&value; if (value <= 0 || (v_ & 0x7f800000) == 0x7f800000) { fixed = 0; } else { value *= 1 << (DECIMAL_WIDTH - 0); if (value >= (1 << DECIMAL_WIDTH)) { fixed = (1 << DECIMAL_WIDTH) - 1; } else { fixed = (unsigned)(value); } } return static_cast(static_cast(fixed) & MASK); } private: f32 m_Float32; enum { INT_WIDTH = 2, DECIMAL_WIDTH = 11, TOTAL_WIDTH = 11, MASK = (0x1 << TOTAL_WIDTH) - 1 }; }; } /* namespace util */ } /* namespace nn */ #endif // __cplusplus #endif // NN_UTIL_UTIL_FLOAT24_H_