1 /*---------------------------------------------------------------------------*
2 Project: TwlSDK - MATH - demos
3 File: main.c
4
5 Copyright 2007-2008 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Date:: 2008-09-17#$
14 $Rev: 8556 $
15 $Author: okubata_ryoma $
16 *---------------------------------------------------------------------------*/
17 /*---------------------------------------------------------------------------*
18 MATH library: Demo to confirm operation of fast Fourier transform
19 *---------------------------------------------------------------------------*/
20
21 #include <nitro.h>
22 #include <nitro/fx/fx_trig.h>
23
24
25 static void VBlankIntr(void);
26 static void DisplayInit(void);
27 static void FillScreen(u16 col);
28 static BOOL FFTTest(void);
29 static void PrintFX32(fx32 f);
30 static void PrintRealArray(fx32 *data);
31 static void PrintHalfComplexArray(fx32 *data);
32 static void PrintComplexArray(fx32 *data);
33 static BOOL PrintError(fx32 *orig, fx32 *data);
34
35 static void InitBitRevTable(void);
36 static void FFT(fx32 *data, int nShift);
37 static void IFFT(fx32 *data, int nShift);
38
39 /*---------------------------------------------------------------------------*
40 Variable Definitions
41 *---------------------------------------------------------------------------*/
42 #define FFT_NSHIFT 10
43 #define FFT_N (1 << FFT_NSHIFT)
44 #define FFT_VALUE_MAX ((1 << (31 - FFT_NSHIFT))-1)
45 #define FFT_VALUE_MIN (-(1 << (31 - FFT_NSHIFT)))
46 #define FFT_VALUE_RANGE (1U << (32 - FFT_NSHIFT))
47
48 #define N_RANDOM_INPUT 16
49 #define ERROR_THRETHOLD (MATH_MAX(((double)FFT_VALUE_RANGE)/(1 << 23), (double)(1 << (FFT_NSHIFT-12))))
50
51 static fx32 gFFTCos[FFT_N], gFFTSin[FFT_N];
52 static int gBitRevTable[FFT_N];
53
54 /*---------------------------------------------------------------------------*
55 Function definitions
56 *---------------------------------------------------------------------------*/
57
58 /*---------------------------------------------------------------------------*
59 Name: NitroMain
60
61 Description: Initialization and main loop
62
63 Arguments: None
64
65 Returns: None
66 *---------------------------------------------------------------------------*/
NitroMain(void)67 void NitroMain(void)
68 {
69 // Various types of initialization
70 OS_Init();
71 OS_InitTick();
72
73 DisplayInit();
74
75 if (FFTTest())
76 {
77 // Success
78 OS_TPrintf("------ Test Succeeded ------\n");
79 FillScreen(GX_RGB(0, 31, 0));
80 }
81 else
82 {
83 // Failed
84 OS_TPrintf("****** Test Failed ******\n");
85 FillScreen(GX_RGB(31, 0, 0));
86 }
87
88 // Main loop
89 while (TRUE)
90 {
91 // Waiting for the V-Blank
92 OS_WaitVBlankIntr();
93 }
94 }
95
96 /*---------------------------------------------------------------------------*
97 Name: VBlankIntr
98
99 Description: V-Blank interrupt vector
100
101 Arguments: None
102
103 Returns: None
104 *---------------------------------------------------------------------------*/
VBlankIntr(void)105 static void VBlankIntr(void)
106 {
107 // Sets the IRQ check flag
108 OS_SetIrqCheckFlag(OS_IE_V_BLANK);
109 }
110
111 /*---------------------------------------------------------------------------*
112 Name: DisplayInit
113
114 Description: Graphics Initialization
115
116 Arguments: None
117
118 Returns: None
119 *---------------------------------------------------------------------------*/
DisplayInit(void)120 static void DisplayInit(void)
121 {
122
123 GX_Init();
124 FX_Init();
125
126 GX_DispOff();
127 GXS_DispOff();
128
129 GX_SetDispSelect(GX_DISP_SELECT_SUB_MAIN);
130
131 OS_SetIrqFunction(OS_IE_V_BLANK, VBlankIntr);
132 (void)OS_EnableIrqMask(OS_IE_V_BLANK);
133 (void)GX_VBlankIntr(TRUE); // To generate V-Blank interrupt request
134 (void)OS_EnableIrq();
135
136
137 GX_SetBankForLCDC(GX_VRAM_LCDC_ALL);
138 MI_CpuClearFast((void *)HW_LCDC_VRAM, HW_LCDC_VRAM_SIZE);
139
140 MI_CpuFillFast((void *)HW_OAM, 192, HW_OAM_SIZE); // Clear OAM
141 MI_CpuClearFast((void *)HW_PLTT, HW_PLTT_SIZE); // Clear the standard palette
142
143 MI_CpuFillFast((void *)HW_DB_OAM, 192, HW_DB_OAM_SIZE); // Clear OAM
144 MI_CpuClearFast((void *)HW_DB_PLTT, HW_DB_PLTT_SIZE); // Clear the standard palette
145 MI_DmaFill32(3, (void *)HW_LCDC_VRAM_C, 0x7FFF7FFF, 256 * 192 * sizeof(u16));
146
147
148 GX_SetBankForOBJ(GX_VRAM_OBJ_256_AB); // Set VRAM-A,B for OBJ
149
150 GX_SetGraphicsMode(GX_DISPMODE_VRAM_C, // VRAM mode
151 (GXBGMode)0, // Dummy
152 (GXBG0As)0); // Dummy
153
154 GX_SetVisiblePlane(GX_PLANEMASK_OBJ); // Make OBJs visible
155 GX_SetOBJVRamModeBmp(GX_OBJVRAMMODE_BMP_1D_128K); // 2D mapping OBJ
156
157 OS_WaitVBlankIntr(); // Waiting for the end of the V-Blank interrupt
158 GX_DispOn();
159
160 }
161
162
163 /*---------------------------------------------------------------------------*
164 Name: FillScreen
165
166 Description: Fills the screen
167
168 Arguments: col - FillColor
169
170 Returns: None
171 *---------------------------------------------------------------------------*/
FillScreen(u16 col)172 static void FillScreen(u16 col)
173 {
174 MI_CpuFill16((void *)HW_LCDC_VRAM_C, col, 256 * 192 * 2);
175 }
176
177 /*---------------------------------------------------------------------------*
178 Name: FFTTest
179
180 Description: Test routine for fast Fourier transform
181
182 Arguments: None
183
184 Returns: TRUE if test succeeds
185 *---------------------------------------------------------------------------*/
186 #define PrintResultEq( a, b, f ) \
187 { OS_TPrintf( ((a) == (b)) ? "[--OK--] " : "[**NG**] " ); (f) = (f) && ((a) == (b)); }
188
FFTTest(void)189 static BOOL FFTTest(void)
190 {
191 static fx32 data[FFT_N * 2];
192 static fx32 orig[FFT_N * 2];
193 static fx16 sinTable[FFT_N - FFT_N / 4];
194 static fx16 sinTable2[(FFT_N - FFT_N / 4) / 2];
195
196 BOOL flag = TRUE;
197 int i;
198 OSTick start, end;
199
200 MATH_MakeFFTSinTable(sinTable, FFT_NSHIFT);
201 MATH_MakeFFTSinTable(sinTable2, FFT_NSHIFT - 1);
202
203 OS_TPrintf("N = %d\n", FFT_N);
204 OS_TPrintf("\nMATH_FFT: Sin\n");
205 {
206 for (i = 0; i < FFT_N; i++)
207 {
208 orig[i * 2] = data[i * 2] = FX_SinIdx((65536 / FFT_N) * i);
209 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
210 }
211 start = OS_GetTick();
212 MATH_FFT(data, FFT_NSHIFT, sinTable);
213 end = OS_GetTick();
214 // PrintComplexArray(data);
215 MATH_IFFT(data, FFT_NSHIFT, sinTable);
216 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
217 flag = PrintError(orig, data) && flag;
218 }
219
220 OS_TPrintf("\nMATH_FFT: Cos\n");
221 {
222 for (i = 0; i < FFT_N; i++)
223 {
224 orig[i * 2] = data[i * 2] = FX_CosIdx((65536 / FFT_N) * i);
225 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
226 }
227 start = OS_GetTick();
228 MATH_FFT(data, FFT_NSHIFT, sinTable);
229 end = OS_GetTick();
230 // PrintComplexArray(data);
231 MATH_IFFT(data, FFT_NSHIFT, sinTable);
232 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
233 flag = PrintError(orig, data) && flag;
234 }
235
236 OS_TPrintf("\nMATH_FFT: Cos + Sin\n");
237 {
238 for (i = 0; i < FFT_N; i++)
239 {
240 orig[i * 2] = data[i * 2] =
241 FX_CosIdx((65536 / FFT_N) * i) + FX_SinIdx((65536 / FFT_N) * i);
242 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
243 }
244 start = OS_GetTick();
245 MATH_FFT(data, FFT_NSHIFT, sinTable);
246 end = OS_GetTick();
247 // PrintComplexArray(data);
248 MATH_IFFT(data, FFT_NSHIFT, sinTable);
249 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
250 flag = PrintError(orig, data) && flag;
251 }
252
253 OS_TPrintf("\nMATH_FFT: Highest Freqency (Real)\n");
254 {
255 for (i = 0; i < FFT_N; i++)
256 {
257 orig[i * 2] = data[i * 2] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
258 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
259 }
260 start = OS_GetTick();
261 MATH_FFT(data, FFT_NSHIFT, sinTable);
262 end = OS_GetTick();
263 // PrintComplexArray(data);
264 MATH_IFFT(data, FFT_NSHIFT, sinTable);
265 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
266 flag = PrintError(orig, data) && flag;
267 }
268
269 OS_TPrintf("\nMATH_FFT: Highest Freqency (Complex)\n");
270 {
271 for (i = 0; i < FFT_N; i++)
272 {
273 orig[i * 2] = data[i * 2] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
274 orig[i * 2 + 1] = data[i * 2 + 1] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
275 }
276 start = OS_GetTick();
277 MATH_FFT(data, FFT_NSHIFT, sinTable);
278 end = OS_GetTick();
279 // PrintComplexArray(data);
280 MATH_IFFT(data, FFT_NSHIFT, sinTable);
281 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
282 flag = PrintError(orig, data) && flag;
283 }
284
285 OS_TPrintf("\nMATH_FFT: Constant\n");
286 {
287 for (i = 0; i < FFT_N; i++)
288 {
289 orig[i * 2] = data[i * 2] = FFT_VALUE_MAX;
290 orig[i * 2 + 1] = data[i * 2 + 1] = FFT_VALUE_MAX;
291 }
292 start = OS_GetTick();
293 MATH_FFT(data, FFT_NSHIFT, sinTable);
294 end = OS_GetTick();
295 // PrintComplexArray(data);
296 MATH_IFFT(data, FFT_NSHIFT, sinTable);
297 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
298 flag = PrintError(orig, data) && flag;
299 }
300
301 OS_TPrintf("\nMATH_FFT: Random Input\n");
302 {
303 u32 seed;
304 for (seed = 0; seed < N_RANDOM_INPUT; seed++)
305 {
306 MATHRandContext32 rand;
307
308 MATH_InitRand32(&rand, seed);
309 for (i = 0; i < FFT_N; i++)
310 {
311 orig[i * 2] = data[i * 2] =
312 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
313 orig[i * 2 + 1] = data[i * 2 + 1] =
314 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
315 }
316 start = OS_GetTick();
317 MATH_FFT(data, FFT_NSHIFT, sinTable);
318 end = OS_GetTick();
319 // PrintComplexArray(data);
320 MATH_IFFT(data, FFT_NSHIFT, sinTable);
321 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
322 flag = PrintError(orig, data) && flag;
323 }
324 }
325
326 OS_TPrintf("\nMATH_FFTReal: Sin\n");
327 {
328 for (i = 0; i < FFT_N; i++)
329 {
330 orig[i] = data[i] = FX_SinIdx((65536 / FFT_N) * i);
331 }
332 for (; i < FFT_N * 2; i++)
333 {
334 orig[i] = data[i] = 0;
335 }
336 start = OS_GetTick();
337 MATH_FFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
338 end = OS_GetTick();
339 // PrintHalfComplexArray(data);
340 MATH_IFFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
341 // PrintRealArray(data);
342 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
343 flag = PrintError(orig, data) && flag;
344 }
345
346 OS_TPrintf("\nMATH_FFTReal: Random Input\n");
347 {
348 u32 seed;
349 for (seed = 0; seed < N_RANDOM_INPUT; seed++)
350 {
351 MATHRandContext32 rand;
352
353 MATH_InitRand32(&rand, seed);
354 for (i = 0; i < FFT_N; i++)
355 {
356 orig[i] = data[i] =
357 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
358 }
359 for (; i < FFT_N * 2; i++)
360 {
361 orig[i] = data[i] = 0;
362 }
363 start = OS_GetTick();
364 MATH_FFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
365 end = OS_GetTick();
366 // PrintComplexArray(data);
367 MATH_IFFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
368 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
369 flag = PrintError(orig, data) && flag;
370 }
371 }
372
373 #if 0
374 OS_TPrintf("\nLocal FFT Function\n");
375 InitBitRevTable();
376 {
377 for (i = 0; i < FFT_N; i++)
378 {
379 data[i * 2] = FX_SinIdx((65536 / FFT_N) * i);
380 data[i * 2 + 1] = 0;
381 }
382 FFT(data, FFT_NSHIFT);
383 // PrintComplexArray(data);
384 }
385
386 {
387 u32 seed;
388 for (seed = 0; seed < N_RANDOM_INPUT; seed++)
389 {
390 MATHRandContext32 rand;
391
392 MATH_InitRand32(&rand, seed);
393 for (i = 0; i < FFT_N; i++)
394 {
395 orig[i * 2] = data[i * 2] =
396 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
397 orig[i * 2 + 1] = data[i * 2 + 1] =
398 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
399 }
400 FFT(data, FFT_NSHIFT);
401 // PrintComplexArray(data);
402 IFFT(data, FFT_NSHIFT);
403 flag = PrintError(orig, data) && flag;
404 }
405 }
406
407 #endif
408
409
410 return flag;
411 }
412
PrintRealArray(fx32 * data)413 static void PrintRealArray(fx32 *data)
414 {
415 u32 i;
416 for (i = 0; i < FFT_N; i++)
417 {
418 OS_TPrintf("%3x: ", i);
419 PrintFX32(data[i]);
420 OS_TPrintf("\n");
421 }
422 }
423
PrintHalfComplexArray(fx32 * data)424 static void PrintHalfComplexArray(fx32 *data)
425 {
426 u32 i;
427
428 i = 0;
429 OS_TPrintf("%3x: ", i);
430 PrintFX32(data[0]);
431 OS_TPrintf("\n");
432 for (i = 1; i < FFT_N / 2; i++)
433 {
434 OS_TPrintf("%3x: ", i);
435 PrintFX32(data[i * 2]);
436 OS_TPrintf(", ");
437 PrintFX32(data[i * 2 + 1]);
438 OS_TPrintf("\n");
439 }
440 OS_TPrintf("%3x: ", i);
441 PrintFX32(data[1]);
442 OS_TPrintf("\n");
443 }
444
PrintComplexArray(fx32 * data)445 static void PrintComplexArray(fx32 *data)
446 {
447 u32 i;
448 for (i = 0; i < FFT_N; i++)
449 {
450 OS_TPrintf("%3x: ", i);
451 PrintFX32(data[i * 2]);
452 OS_TPrintf(", ");
453 PrintFX32(data[i * 2 + 1]);
454 OS_TPrintf("\n");
455 }
456 }
457
PrintFX32(fx32 f)458 static void PrintFX32(fx32 f)
459 {
460 #pragma unused(f) // Required for FINALROM build
461 OS_Printf("%f", FX_FX32_TO_F32(f));
462 #if 0
463 if (f >= 0)
464 {
465 OS_TPrintf(" %6d.%03d", f >> FX32_SHIFT, (f & FX32_DEC_MASK) * 1000 / 4096);
466 }
467 else
468 {
469 OS_TPrintf("-%6d.%03d", (-f) >> FX32_SHIFT, ((-f) & FX32_DEC_MASK) * 1000 / 4096);
470 }
471 #endif
472 }
473
PrintError(fx32 * orig,fx32 * data)474 static BOOL PrintError(fx32 *orig, fx32 *data)
475 {
476 u32 i;
477 fx32 max_error;
478 double sum_sqd, e;
479
480 max_error = 0;
481 sum_sqd = 0;
482 for (i = 0; i < FFT_N * 2; i++)
483 {
484 fx32 d = MATH_ABS(data[i] - orig[i]);
485 double dd = FX_FX32_TO_F32(d);
486 double od = FX_FX32_TO_F32(orig[i]);
487
488 if (d > max_error)
489 {
490 max_error = d;
491 }
492
493 sum_sqd += dd * dd;
494 }
495 sum_sqd /= FFT_N * 2;
496 e = FX_FX32_TO_F32(max_error);
497 OS_Printf("Max Error: %f, Dist.^2: %.4g\n", e, sum_sqd);
498
499 return (e <= ERROR_THRETHOLD);
500 }
501
InitBitRevTable(void)502 static void InitBitRevTable(void)
503 {
504 int i, j, k;
505
506 i = j = 0;
507 for (;;)
508 {
509 gBitRevTable[i] = j;
510 if (++i >= FFT_N)
511 break;
512 k = FFT_N / 2;
513 while (k <= j)
514 {
515 j -= k;
516 k /= 2;
517 }
518 j += k;
519 }
520 }
521
FFT(fx32 * data,int nShift)522 static void FFT(fx32 *data, int nShift)
523 {
524 int i, j, k, ik, h, d, k2;
525 fx32 t, s, c, dx, dy;
526 u32 n = 1U << nShift;
527 OSTick start, end;
528
529 SDK_ASSERT(n == FFT_N);
530
531 for (i = 0; i < FFT_N; i++)
532 {
533 gFFTCos[i] = data[i * 2];
534 gFFTSin[i] = data[i * 2 + 1];
535 }
536
537 start = OS_GetTick();
538 for (i = 0; i < FFT_N; i++)
539 { /* Bit inversion */
540 j = gBitRevTable[i];
541 if (i < j)
542 {
543 t = gFFTCos[i];
544 gFFTCos[i] = gFFTCos[j];
545 gFFTCos[j] = t;
546 t = gFFTSin[i];
547 gFFTSin[i] = gFFTSin[j];
548 gFFTSin[j] = t;
549 }
550 }
551 for (k = 1; k < FFT_N; k = k2)
552 { /* Conversion */
553 h = 0;
554 k2 = k + k;
555 d = FFT_N / k2;
556 for (j = 0; j < k; j++)
557 {
558 c = FX_CosIdx(h * (65536 / FFT_N));
559 s = FX_SinIdx(h * (65536 / FFT_N));
560 for (i = j; i < FFT_N; i += k2)
561 {
562 ik = i + k;
563 dx = FX_Mul(s, gFFTSin[ik]) + FX_Mul(c, gFFTCos[ik]);
564 dy = FX_Mul(c, gFFTSin[ik]) - FX_Mul(s, gFFTCos[ik]);
565 gFFTCos[ik] = gFFTCos[i] - dx;
566 gFFTCos[i] += dx;
567 gFFTSin[ik] = gFFTSin[i] - dy;
568 gFFTSin[i] += dy;
569 }
570 h += d;
571 }
572 }
573 end = OS_GetTick();
574 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
575
576 for (i = 0; i < FFT_N; i++)
577 {
578 data[i * 2] = gFFTCos[i] >> nShift;
579 data[i * 2 + 1] = gFFTSin[i] >> nShift;
580 }
581 }
582
IFFT(fx32 * data,int nShift)583 static void IFFT(fx32 *data, int nShift)
584 {
585 int i, j, k, ik, h, d, k2;
586 fx32 t, s, c, dx, dy;
587 u32 n = 1U << nShift;
588
589 SDK_ASSERT(n == FFT_N);
590
591 for (i = 0; i < FFT_N; i++)
592 {
593 gFFTCos[i] = data[i * 2];
594 gFFTSin[i] = data[i * 2 + 1];
595 }
596
597 for (i = 0; i < FFT_N; i++)
598 { /* Bit inversion */
599 j = gBitRevTable[i];
600 if (i < j)
601 {
602 t = gFFTCos[i];
603 gFFTCos[i] = gFFTCos[j];
604 gFFTCos[j] = t;
605 t = gFFTSin[i];
606 gFFTSin[i] = gFFTSin[j];
607 gFFTSin[j] = t;
608 }
609 }
610 for (k = 1; k < FFT_N; k = k2)
611 { /* Conversion */
612 h = 0;
613 k2 = k + k;
614 d = FFT_N / k2;
615 for (j = 0; j < k; j++)
616 {
617 c = FX_CosIdx(h * (65536 / FFT_N));
618 s = FX_SinIdx(h * (65536 / FFT_N));
619 for (i = j; i < FFT_N; i += k2)
620 {
621 ik = i + k;
622 dx = -FX_Mul(s, gFFTSin[ik]) + FX_Mul(c, gFFTCos[ik]);
623 dy = FX_Mul(c, gFFTSin[ik]) + FX_Mul(s, gFFTCos[ik]);
624 gFFTCos[ik] = gFFTCos[i] - dx;
625 gFFTCos[i] += dx;
626 gFFTSin[ik] = gFFTSin[i] - dy;
627 gFFTSin[i] += dy;
628 }
629 h += d;
630 }
631 }
632
633 for (i = 0; i < FFT_N; i++)
634 {
635 data[i * 2] = gFFTCos[i];
636 data[i * 2 + 1] = gFFTSin[i];
637 }
638 }
639
640 /*---------------------------------------------------------------------------*
641 End of file
642 *---------------------------------------------------------------------------*/
643