1 /*---------------------------------------------------------------------------*
2   Project: OS - Fast F32 cast using gekko
3   File:    OSFastCast.h
4 
5   Copyright 1998, 1999 Nintendo. All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Log: OSFastCast.h,v $
14   Revision 1.1.1.1  2005/12/29 06:53:28  hiratsu
15   Initial import.
16 
17   Revision 1.1.1.1  2005/05/12 02:41:07  yasuh-to
18   Ported from dolphin source tree.
19 
20 
21     11    2002/09/02 21:41 Shiki
22     Clean up.
23 
24     10    2002/08/21 22:24 Hashida
25     Made SN compliant by SN-Phil.
26 
27     9     2002/08/09 10:52 Hirose
28     Added OSSetGQR* functions and related definitions.
29 
30     8     2002/07/03 10:29 Hirose
31     Workaround for problems with CW1.3.2 strong optimization.
32 
33     7    2001/06/11 7:52p Tian
34     Integrated SN changes
35 
36     6     2001/04/17 5:29p Tian
37     Changed all inlines to static inline
38 
39     5     2000/07/25 7:43p Tian
40     Updated to avoid GQR1, so we do not collide with Metrowerks' proposed
41     Gekko ABI.
42 
43     4     2000/3/28 12:05p Tian
44     Fixed typo that was accidentally checked in.  It was Paul's fault.
45 
46     3     2000/03/27 4:28p Tian
47     Ifdefed out for win32
48 
49     2     2000/3/27 2:25p Tian
50     Fixed bug in non-gekko code
51 
52     1     2000/03/27 2:00p Tian
53     Initial check-in.
54   $NoKeywords: $
55  *---------------------------------------------------------------------------*/
56 
57 #ifndef __OSFASTCAST_H__
58 #define __OSFASTCAST_H__
59 
60 #ifndef _WIN32
61 
62 
63 #ifdef __cplusplus
64 extern "C" {
65 #endif
66 #ifdef GEKKO
67 
68 // GQR formats we use
69 #define OS_GQR_F32 0x0000
70 #define OS_GQR_U8  0x0004
71 #define OS_GQR_U16 0x0005
72 #define OS_GQR_S8  0x0006
73 #define OS_GQR_S16 0x0007
74 
75 // GQR scale factors
76 #define OS_GQR_SCALE_NONE   0
77 
78 #define OS_GQR_SCALE_2      1
79 #define OS_GQR_SCALE_4      2
80 #define OS_GQR_SCALE_8      3
81 #define OS_GQR_SCALE_16     4
82 #define OS_GQR_SCALE_32     5
83 #define OS_GQR_SCALE_64     6
84 #define OS_GQR_SCALE_128    7
85 #define OS_GQR_SCALE_256    8
86 #define OS_GQR_SCALE_512    9
87 #define OS_GQR_SCALE_1024   10
88 #define OS_GQR_SCALE_2048   11
89 #define OS_GQR_SCALE_4096   12
90 #define OS_GQR_SCALE_8192   13
91 #define OS_GQR_SCALE_16384  14
92 #define OS_GQR_SCALE_32768  15
93 #define OS_GQR_SCALE_65536  16
94 #define OS_GQR_SCALE_MAX    31
95 
96 #define OS_GQR_DIVIDE_2     63
97 #define OS_GQR_DIVIDE_4     62
98 #define OS_GQR_DIVIDE_8     61
99 #define OS_GQR_DIVIDE_16    60
100 #define OS_GQR_DIVIDE_32    59
101 #define OS_GQR_DIVIDE_64    58
102 #define OS_GQR_DIVIDE_128   57
103 #define OS_GQR_DIVIDE_256   56
104 #define OS_GQR_DIVIDE_512   55
105 #define OS_GQR_DIVIDE_1024  54
106 #define OS_GQR_DIVIDE_2048  53
107 #define OS_GQR_DIVIDE_4096  52
108 #define OS_GQR_DIVIDE_8192  51
109 #define OS_GQR_DIVIDE_16384 50
110 #define OS_GQR_DIVIDE_32768 49
111 #define OS_GQR_DIVIDE_65536 48
112 #define OS_GQR_DIVIDE_MAX   32
113 
114 
115 
116 // The GQRs that we use for FastCast.  Note that in the future, the compiler
117 // will reserve GQRs 0 and 1, so we avoid using GQR1.
118 #define OS_FASTCAST_U8  2
119 #define OS_FASTCAST_U16 3
120 #define OS_FASTCAST_S8  4
121 #define OS_FASTCAST_S16 5
122 
123 
124 // Function to set up GQRs.
125 
126 #ifdef __SN__
127 #define __OSMtGQR(gqrId, val)       \
128     asm volatile ("mtspr   GQR%0,%1 # __OSMtGQR" : : "i" (gqrId), "b" (val) )
129 #else
130 #define __OSMtGQR(gqrId, val)       \
131     asm                             \
132     {                               \
133         mtspr   GQR ## gqrId, val   \
134     }
135 #endif
136 
OSSetGQR2(u32 type,u32 scale)137 static inline void OSSetGQR2( u32 type, u32 scale )
138 {
139     register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
140     __OSMtGQR(2, val);
141 }
142 
OSSetGQR3(u32 type,u32 scale)143 static inline void OSSetGQR3( u32 type, u32 scale )
144 {
145     register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
146     __OSMtGQR(3, val);
147 }
148 
OSSetGQR4(u32 type,u32 scale)149 static inline void OSSetGQR4( u32 type, u32 scale )
150 {
151     register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
152     __OSMtGQR(4, val);
153 }
154 
OSSetGQR5(u32 type,u32 scale)155 static inline void OSSetGQR5( u32 type, u32 scale )
156 {
157     register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
158     __OSMtGQR(5, val);
159 }
160 
OSSetGQR6(u32 type,u32 scale)161 static inline void OSSetGQR6( u32 type, u32 scale )
162 {
163     register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
164     __OSMtGQR(6, val);
165 }
166 
OSSetGQR7(u32 type,u32 scale)167 static inline void OSSetGQR7( u32 type, u32 scale )
168 {
169     register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
170     __OSMtGQR(7, val);
171 }
172 
173 
174 // This initializes the fast casting facility.
175 // GQRs 1-4 are set to load/store u8, u16, s8, s16 respectively
OSInitFastCast(void)176 static inline void OSInitFastCast ( void )
177 {
178 #ifdef __SN__
179           asm volatile ("
180         li      3, 0x0004
181         oris    3, 3, 0x0004
182         mtspr   GQR2, 3
183 
184         li      3, 0x0005
185         oris    3, 3, 0x0005
186         mtspr   GQR3, 3
187 
188         li      3, 0x0006
189         oris    3, 3, 0x0006
190         mtspr   GQR4, 3
191 
192         li      3, 0x0007
193         oris    3, 3, 0x0007
194         mtspr   GQR5, 3
195     " : : : "r3" );
196 #else
197     asm
198     {
199         li      r3, OS_GQR_U8
200         oris    r3, r3, OS_GQR_U8
201         mtspr   GQR2, r3
202 
203         li      r3, OS_GQR_U16
204         oris    r3, r3, OS_GQR_U16
205         mtspr   GQR3, r3
206 
207         li      r3, OS_GQR_S8
208         oris    r3, r3, OS_GQR_S8
209         mtspr   GQR4, r3
210 
211         li      r3, OS_GQR_S16
212         oris    r3, r3, OS_GQR_S16
213         mtspr   GQR5, r3
214     }
215 #endif
216 }
217 
218 
219 /*---------------------------------------------------------------------------*
220   int to float
221  *---------------------------------------------------------------------------*/
222 
223 #ifdef __SN__
224 
225 #define OSu8tof32(in,out)   asm volatile ("psq_l   %0, 0(%1), 1, 2  " : "=f" (*(out)) : "b" (in) )
226 #define OSu16tof32(in,out)  asm volatile ("psq_l   %0, 0(%1), 1, 3  " : "=f" (*(out)) : "b" (in) )
227 #define OSs8tof32(in,out)   asm volatile ("psq_l   %0, 0(%1), 1, 4  " : "=f" (*(out)) : "b" (in) )
228 #define OSs16tof32(in,out)  asm volatile ("psq_l   %0, 0(%1), 1, 5  " : "=f" (*(out)) : "b" (in) )
229 
230 #else
231 
__OSu8tof32(register u8 * in)232 static inline f32 __OSu8tof32(register u8* in)
233 {
234     register f32   r;
235     asm
236     {
237         psq_l      r, 0(in), 1, OS_FASTCAST_U8
238     }
239     return r;
240 }
241 
OSu8tof32(register u8 * in,volatile register f32 * out)242 static inline void OSu8tof32(register u8* in, volatile register f32* out)
243 {
244     *out = __OSu8tof32(in);
245 }
246 
247 
__OSu16tof32(register u16 * in)248 static inline f32 __OSu16tof32(register u16* in)
249 {
250     register f32   r;
251     asm
252     {
253         psq_l      r, 0(in), 1, OS_FASTCAST_U16
254     }
255     return r;
256 }
257 
OSu16tof32(register u16 * in,volatile register f32 * out)258 static inline void OSu16tof32(register u16* in, volatile register f32* out)
259 {
260     *out = __OSu16tof32(in);
261 }
262 
263 
__OSs8tof32(register s8 * in)264 static inline f32 __OSs8tof32(register s8* in)
265 {
266     register f32   r;
267     asm
268     {
269         psq_l      r, 0(in), 1, OS_FASTCAST_S8
270     }
271     return r;
272 }
273 
OSs8tof32(register s8 * in,volatile register f32 * out)274 static inline void OSs8tof32(register s8* in, volatile register f32* out)
275 {
276     *out = __OSs8tof32(in);
277 }
278 
279 
__OSs16tof32(register s16 * in)280 static inline f32 __OSs16tof32(register s16* in)
281 {
282     register f32   r;
283     asm
284     {
285         psq_l      r, 0(in), 1, OS_FASTCAST_S16
286     }
287     return r;
288 }
289 
OSs16tof32(register s16 * in,volatile register f32 * out)290 static inline void OSs16tof32(register s16* in, volatile register f32* out)
291 {
292     *out = __OSs16tof32(in);
293 }
294 
295 
296 #endif
297 /*---------------------------------------------------------------------------*
298   float to int
299 
300   Note that due to a compiler bug, we need to use addresses for the FP
301   value.  Theoretically, we can simply use psq_st to store out values, but
302   the compiler does not recognize the psq_st as touching static values, and may
303   compile out static values.
304  *---------------------------------------------------------------------------*/
305 #ifdef __SN__
306 
307 #define OSf32tou8(in,out)   asm volatile ("psq_st   %1, 0(%0), 1, 2 " : : "b" (out) , "f" (*(in)) : "memory")
308 #define OSf32tou16(in,out)  asm volatile ("psq_st   %1, 0(%0), 1, 3 " : : "b" (out) , "f" (*(in)) : "memory")
309 #define OSf32tos8(in,out)   asm volatile ("psq_st   %1, 0(%0), 1, 4 " : : "b" (out) , "f" (*(in)) : "memory")
310 #define OSf32tos16(in,out)  asm volatile ("psq_st   %1, 0(%0), 1, 5 " : : "b" (out) , "f" (*(in)) : "memory")
311 
312 #else
313 
__OSf32tou8(register f32 in)314 static inline u8 __OSf32tou8(register f32 in)
315 {
316     f32           a;
317     register f32* ptr = &a;
318     register u8   r;
319 
320     asm
321     {
322         psq_st  in, 0(ptr), 1, OS_FASTCAST_U8
323         lbz     r, 0(ptr)
324     }
325     return r;
326 }
327 
OSf32tou8(register f32 * in,volatile register u8 * out)328 static inline void OSf32tou8(register f32* in, volatile register u8* out)
329 {
330     *out = __OSf32tou8(*in);
331 }
332 
333 
__OSf32tou16(register f32 in)334 static inline u16 __OSf32tou16(register f32 in)
335 {
336     f32           a;
337     register f32* ptr = &a;
338     register u16  r;
339 
340     asm
341     {
342         psq_st  in, 0(ptr), 1, OS_FASTCAST_U16
343         lhz     r, 0(ptr)
344     }
345     return r;
346 }
347 
OSf32tou16(register f32 * in,volatile register u16 * out)348 static inline void OSf32tou16(register f32* in, volatile register u16* out)
349 {
350     *out = __OSf32tou16(*in);
351 }
352 
353 
__OSf32tos8(register f32 in)354 static inline s8 __OSf32tos8(register f32 in)
355 {
356     f32           a;
357     register f32* ptr = &a;
358     register s8   r;
359 
360     asm
361     {
362         psq_st  in, 0(ptr), 1, OS_FASTCAST_S8
363         lbz     r, 0(ptr)
364         extsb   r, r
365     }
366     return r;
367 }
368 
OSf32tos8(register f32 * in,volatile register s8 * out)369 static inline void OSf32tos8(register f32* in, volatile register s8* out)
370 {
371     *out = __OSf32tos8(*in);
372 }
373 
374 
__OSf32tos16(register f32 in)375 static inline s16 __OSf32tos16(register f32 in)
376 {
377     f32           a;
378     register f32* ptr = &a;
379     register s16  r;
380 
381     asm
382     {
383         psq_st  in, 0(ptr), 1, OS_FASTCAST_S16
384         lha     r, 0(ptr)
385     }
386     return r;
387 }
388 
OSf32tos16(register f32 * in,volatile register s16 * out)389 static inline void OSf32tos16(register f32* in, volatile register s16* out)
390 {
391     *out = __OSf32tos16(*in);
392 }
393 
394 #endif
395 
396 #else
397 /*---------------------------------------------------------------------------*
398   Non-Gekko code
399  *---------------------------------------------------------------------------*/
400 static inline void OSInitFastCast ( void )
401 {
402     while(0)
403     {
404     }
405 }
406 
407 
408 /*---------------------------------------------------------------------------*
409   int to float
410  *---------------------------------------------------------------------------*/
411 static inline void OSs16tof32(s16* in, f32* out)
412 {
413     *out = (f32) *in;
414 }
415 
416 static inline void OSs8tof32(s8* in, f32* out)
417 {
418     *out = (f32) *in;
419 }
420 
421 
422 static inline void OSu16tof32(u16* in, f32* out)
423 {
424     *out = (f32) *in;
425 }
426 
427 
428 static inline void OSu8tof32(u8* in, f32* out)
429 {
430     *out = (f32) *in;
431 }
432 
433 /*---------------------------------------------------------------------------*
434   float to int
435  *---------------------------------------------------------------------------*/
436 
437 static inline void OSf32tou8(register f32* in, register u8* out)
438 {
439     *out = (u8)*in;
440 }
441 
442 static inline void OSf32tou16(register f32* in, register u16* out)
443 {
444     *out = (u16)*in;
445 }
446 
447 static inline void OSf32tos8(register f32* in, register s8* out)
448 {
449     *out = (s8)*in;
450 }
451 
452 static inline void OSf32tos16(register f32* in, register s16* out)
453 {
454     *out = (s16)*in;
455 }
456 
457 
458 #endif // GEKKO
459 
460 #ifdef __cplusplus
461 }
462 #endif
463 #endif
464 
465 #endif // _WIN32
466