1 /*---------------------------------------------------------------------------*
2 Project: OS - Fast F32 cast using gekko
3 File: OSFastCast.h
4
5 Copyright 1998, 1999 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Log: OSFastCast.h,v $
14 Revision 1.1.1.1 2005/12/29 06:53:28 hiratsu
15 Initial import.
16
17 Revision 1.1.1.1 2005/05/12 02:41:07 yasuh-to
18 Ported from dolphin source tree.
19
20
21 11 2002/09/02 21:41 Shiki
22 Clean up.
23
24 10 2002/08/21 22:24 Hashida
25 Made SN compliant by SN-Phil.
26
27 9 2002/08/09 10:52 Hirose
28 Added OSSetGQR* functions and related definitions.
29
30 8 2002/07/03 10:29 Hirose
31 Workaround for problems with CW1.3.2 strong optimization.
32
33 7 2001/06/11 7:52p Tian
34 Integrated SN changes
35
36 6 2001/04/17 5:29p Tian
37 Changed all inlines to static inline
38
39 5 2000/07/25 7:43p Tian
40 Updated to avoid GQR1, so we do not collide with Metrowerks' proposed
41 Gekko ABI.
42
43 4 2000/3/28 12:05p Tian
44 Fixed typo that was accidentally checked in. It was Paul's fault.
45
46 3 2000/03/27 4:28p Tian
47 Ifdefed out for win32
48
49 2 2000/3/27 2:25p Tian
50 Fixed bug in non-gekko code
51
52 1 2000/03/27 2:00p Tian
53 Initial check-in.
54 $NoKeywords: $
55 *---------------------------------------------------------------------------*/
56
57 #ifndef __OSFASTCAST_H__
58 #define __OSFASTCAST_H__
59
60 #ifndef _WIN32
61
62
63 #ifdef __cplusplus
64 extern "C" {
65 #endif
66 #ifdef GEKKO
67
68 // GQR formats we use
69 #define OS_GQR_F32 0x0000
70 #define OS_GQR_U8 0x0004
71 #define OS_GQR_U16 0x0005
72 #define OS_GQR_S8 0x0006
73 #define OS_GQR_S16 0x0007
74
75 // GQR scale factors
76 #define OS_GQR_SCALE_NONE 0
77
78 #define OS_GQR_SCALE_2 1
79 #define OS_GQR_SCALE_4 2
80 #define OS_GQR_SCALE_8 3
81 #define OS_GQR_SCALE_16 4
82 #define OS_GQR_SCALE_32 5
83 #define OS_GQR_SCALE_64 6
84 #define OS_GQR_SCALE_128 7
85 #define OS_GQR_SCALE_256 8
86 #define OS_GQR_SCALE_512 9
87 #define OS_GQR_SCALE_1024 10
88 #define OS_GQR_SCALE_2048 11
89 #define OS_GQR_SCALE_4096 12
90 #define OS_GQR_SCALE_8192 13
91 #define OS_GQR_SCALE_16384 14
92 #define OS_GQR_SCALE_32768 15
93 #define OS_GQR_SCALE_65536 16
94 #define OS_GQR_SCALE_MAX 31
95
96 #define OS_GQR_DIVIDE_2 63
97 #define OS_GQR_DIVIDE_4 62
98 #define OS_GQR_DIVIDE_8 61
99 #define OS_GQR_DIVIDE_16 60
100 #define OS_GQR_DIVIDE_32 59
101 #define OS_GQR_DIVIDE_64 58
102 #define OS_GQR_DIVIDE_128 57
103 #define OS_GQR_DIVIDE_256 56
104 #define OS_GQR_DIVIDE_512 55
105 #define OS_GQR_DIVIDE_1024 54
106 #define OS_GQR_DIVIDE_2048 53
107 #define OS_GQR_DIVIDE_4096 52
108 #define OS_GQR_DIVIDE_8192 51
109 #define OS_GQR_DIVIDE_16384 50
110 #define OS_GQR_DIVIDE_32768 49
111 #define OS_GQR_DIVIDE_65536 48
112 #define OS_GQR_DIVIDE_MAX 32
113
114
115
116 // The GQRs that we use for FastCast. Note that in the future, the compiler
117 // will reserve GQRs 0 and 1, so we avoid using GQR1.
118 #define OS_FASTCAST_U8 2
119 #define OS_FASTCAST_U16 3
120 #define OS_FASTCAST_S8 4
121 #define OS_FASTCAST_S16 5
122
123
124 // Function to set up GQRs.
125
126 #ifdef __SN__
127 #define __OSMtGQR(gqrId, val) \
128 asm volatile ("mtspr GQR%0,%1 # __OSMtGQR" : : "i" (gqrId), "b" (val) )
129 #else
130 #define __OSMtGQR(gqrId, val) \
131 asm \
132 { \
133 mtspr GQR ## gqrId, val \
134 }
135 #endif
136
OSSetGQR2(u32 type,u32 scale)137 static inline void OSSetGQR2( u32 type, u32 scale )
138 {
139 register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
140 __OSMtGQR(2, val);
141 }
142
OSSetGQR3(u32 type,u32 scale)143 static inline void OSSetGQR3( u32 type, u32 scale )
144 {
145 register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
146 __OSMtGQR(3, val);
147 }
148
OSSetGQR4(u32 type,u32 scale)149 static inline void OSSetGQR4( u32 type, u32 scale )
150 {
151 register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
152 __OSMtGQR(4, val);
153 }
154
OSSetGQR5(u32 type,u32 scale)155 static inline void OSSetGQR5( u32 type, u32 scale )
156 {
157 register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
158 __OSMtGQR(5, val);
159 }
160
OSSetGQR6(u32 type,u32 scale)161 static inline void OSSetGQR6( u32 type, u32 scale )
162 {
163 register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
164 __OSMtGQR(6, val);
165 }
166
OSSetGQR7(u32 type,u32 scale)167 static inline void OSSetGQR7( u32 type, u32 scale )
168 {
169 register u32 val = (((scale<<8)|type)<<16)|((scale<<8)|type);
170 __OSMtGQR(7, val);
171 }
172
173
174 // This initializes the fast casting facility.
175 // GQRs 1-4 are set to load/store u8, u16, s8, s16 respectively
OSInitFastCast(void)176 static inline void OSInitFastCast ( void )
177 {
178 #ifdef __SN__
179 asm volatile ("
180 li 3, 0x0004
181 oris 3, 3, 0x0004
182 mtspr GQR2, 3
183
184 li 3, 0x0005
185 oris 3, 3, 0x0005
186 mtspr GQR3, 3
187
188 li 3, 0x0006
189 oris 3, 3, 0x0006
190 mtspr GQR4, 3
191
192 li 3, 0x0007
193 oris 3, 3, 0x0007
194 mtspr GQR5, 3
195 " : : : "r3" );
196 #else
197 asm
198 {
199 li r3, OS_GQR_U8
200 oris r3, r3, OS_GQR_U8
201 mtspr GQR2, r3
202
203 li r3, OS_GQR_U16
204 oris r3, r3, OS_GQR_U16
205 mtspr GQR3, r3
206
207 li r3, OS_GQR_S8
208 oris r3, r3, OS_GQR_S8
209 mtspr GQR4, r3
210
211 li r3, OS_GQR_S16
212 oris r3, r3, OS_GQR_S16
213 mtspr GQR5, r3
214 }
215 #endif
216 }
217
218
219 /*---------------------------------------------------------------------------*
220 int to float
221 *---------------------------------------------------------------------------*/
222
223 #ifdef __SN__
224
225 #define OSu8tof32(in,out) asm volatile ("psq_l %0, 0(%1), 1, 2 " : "=f" (*(out)) : "b" (in) )
226 #define OSu16tof32(in,out) asm volatile ("psq_l %0, 0(%1), 1, 3 " : "=f" (*(out)) : "b" (in) )
227 #define OSs8tof32(in,out) asm volatile ("psq_l %0, 0(%1), 1, 4 " : "=f" (*(out)) : "b" (in) )
228 #define OSs16tof32(in,out) asm volatile ("psq_l %0, 0(%1), 1, 5 " : "=f" (*(out)) : "b" (in) )
229
230 #else
231
__OSu8tof32(register u8 * in)232 static inline f32 __OSu8tof32(register u8* in)
233 {
234 register f32 r;
235 asm
236 {
237 psq_l r, 0(in), 1, OS_FASTCAST_U8
238 }
239 return r;
240 }
241
OSu8tof32(register u8 * in,volatile register f32 * out)242 static inline void OSu8tof32(register u8* in, volatile register f32* out)
243 {
244 *out = __OSu8tof32(in);
245 }
246
247
__OSu16tof32(register u16 * in)248 static inline f32 __OSu16tof32(register u16* in)
249 {
250 register f32 r;
251 asm
252 {
253 psq_l r, 0(in), 1, OS_FASTCAST_U16
254 }
255 return r;
256 }
257
OSu16tof32(register u16 * in,volatile register f32 * out)258 static inline void OSu16tof32(register u16* in, volatile register f32* out)
259 {
260 *out = __OSu16tof32(in);
261 }
262
263
__OSs8tof32(register s8 * in)264 static inline f32 __OSs8tof32(register s8* in)
265 {
266 register f32 r;
267 asm
268 {
269 psq_l r, 0(in), 1, OS_FASTCAST_S8
270 }
271 return r;
272 }
273
OSs8tof32(register s8 * in,volatile register f32 * out)274 static inline void OSs8tof32(register s8* in, volatile register f32* out)
275 {
276 *out = __OSs8tof32(in);
277 }
278
279
__OSs16tof32(register s16 * in)280 static inline f32 __OSs16tof32(register s16* in)
281 {
282 register f32 r;
283 asm
284 {
285 psq_l r, 0(in), 1, OS_FASTCAST_S16
286 }
287 return r;
288 }
289
OSs16tof32(register s16 * in,volatile register f32 * out)290 static inline void OSs16tof32(register s16* in, volatile register f32* out)
291 {
292 *out = __OSs16tof32(in);
293 }
294
295
296 #endif
297 /*---------------------------------------------------------------------------*
298 float to int
299
300 Note that due to a compiler bug, we need to use addresses for the FP
301 value. Theoretically, we can simply use psq_st to store out values, but
302 the compiler does not recognize the psq_st as touching static values, and may
303 compile out static values.
304 *---------------------------------------------------------------------------*/
305 #ifdef __SN__
306
307 #define OSf32tou8(in,out) asm volatile ("psq_st %1, 0(%0), 1, 2 " : : "b" (out) , "f" (*(in)) : "memory")
308 #define OSf32tou16(in,out) asm volatile ("psq_st %1, 0(%0), 1, 3 " : : "b" (out) , "f" (*(in)) : "memory")
309 #define OSf32tos8(in,out) asm volatile ("psq_st %1, 0(%0), 1, 4 " : : "b" (out) , "f" (*(in)) : "memory")
310 #define OSf32tos16(in,out) asm volatile ("psq_st %1, 0(%0), 1, 5 " : : "b" (out) , "f" (*(in)) : "memory")
311
312 #else
313
__OSf32tou8(register f32 in)314 static inline u8 __OSf32tou8(register f32 in)
315 {
316 f32 a;
317 register f32* ptr = &a;
318 register u8 r;
319
320 asm
321 {
322 psq_st in, 0(ptr), 1, OS_FASTCAST_U8
323 lbz r, 0(ptr)
324 }
325 return r;
326 }
327
OSf32tou8(register f32 * in,volatile register u8 * out)328 static inline void OSf32tou8(register f32* in, volatile register u8* out)
329 {
330 *out = __OSf32tou8(*in);
331 }
332
333
__OSf32tou16(register f32 in)334 static inline u16 __OSf32tou16(register f32 in)
335 {
336 f32 a;
337 register f32* ptr = &a;
338 register u16 r;
339
340 asm
341 {
342 psq_st in, 0(ptr), 1, OS_FASTCAST_U16
343 lhz r, 0(ptr)
344 }
345 return r;
346 }
347
OSf32tou16(register f32 * in,volatile register u16 * out)348 static inline void OSf32tou16(register f32* in, volatile register u16* out)
349 {
350 *out = __OSf32tou16(*in);
351 }
352
353
__OSf32tos8(register f32 in)354 static inline s8 __OSf32tos8(register f32 in)
355 {
356 f32 a;
357 register f32* ptr = &a;
358 register s8 r;
359
360 asm
361 {
362 psq_st in, 0(ptr), 1, OS_FASTCAST_S8
363 lbz r, 0(ptr)
364 extsb r, r
365 }
366 return r;
367 }
368
OSf32tos8(register f32 * in,volatile register s8 * out)369 static inline void OSf32tos8(register f32* in, volatile register s8* out)
370 {
371 *out = __OSf32tos8(*in);
372 }
373
374
__OSf32tos16(register f32 in)375 static inline s16 __OSf32tos16(register f32 in)
376 {
377 f32 a;
378 register f32* ptr = &a;
379 register s16 r;
380
381 asm
382 {
383 psq_st in, 0(ptr), 1, OS_FASTCAST_S16
384 lha r, 0(ptr)
385 }
386 return r;
387 }
388
OSf32tos16(register f32 * in,volatile register s16 * out)389 static inline void OSf32tos16(register f32* in, volatile register s16* out)
390 {
391 *out = __OSf32tos16(*in);
392 }
393
394 #endif
395
396 #else
397 /*---------------------------------------------------------------------------*
398 Non-Gekko code
399 *---------------------------------------------------------------------------*/
400 static inline void OSInitFastCast ( void )
401 {
402 while(0)
403 {
404 }
405 }
406
407
408 /*---------------------------------------------------------------------------*
409 int to float
410 *---------------------------------------------------------------------------*/
411 static inline void OSs16tof32(s16* in, f32* out)
412 {
413 *out = (f32) *in;
414 }
415
416 static inline void OSs8tof32(s8* in, f32* out)
417 {
418 *out = (f32) *in;
419 }
420
421
422 static inline void OSu16tof32(u16* in, f32* out)
423 {
424 *out = (f32) *in;
425 }
426
427
428 static inline void OSu8tof32(u8* in, f32* out)
429 {
430 *out = (f32) *in;
431 }
432
433 /*---------------------------------------------------------------------------*
434 float to int
435 *---------------------------------------------------------------------------*/
436
437 static inline void OSf32tou8(register f32* in, register u8* out)
438 {
439 *out = (u8)*in;
440 }
441
442 static inline void OSf32tou16(register f32* in, register u16* out)
443 {
444 *out = (u16)*in;
445 }
446
447 static inline void OSf32tos8(register f32* in, register s8* out)
448 {
449 *out = (s8)*in;
450 }
451
452 static inline void OSf32tos16(register f32* in, register s16* out)
453 {
454 *out = (s16)*in;
455 }
456
457
458 #endif // GEKKO
459
460 #ifdef __cplusplus
461 }
462 #endif
463 #endif
464
465 #endif // _WIN32
466