1 /*---------------------------------------------------------------------------*
2 Project: TwlSDK - MATH - demos
3 File: main.c
4
5 Copyright 2007-2009 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Date:: 2009-06-04#$
14 $Rev: 10698 $
15 $Author: okubata_ryoma $
16 *---------------------------------------------------------------------------*/
17 /*---------------------------------------------------------------------------*
18 MATH library: Demo to confirm operation of fast Fourier transform
19 *---------------------------------------------------------------------------*/
20
21 #include <nitro.h>
22 #include <nitro/fx/fx_trig.h>
23
24
25 static void VBlankIntr(void);
26 static void DisplayInit(void);
27 static void FillScreen(u16 col);
28 static BOOL FFTTest(void);
29 static void PrintFX32(fx32 f);
30 static void PrintRealArray(fx32 *data);
31 static void PrintHalfComplexArray(fx32 *data);
32 static void PrintComplexArray(fx32 *data);
33 static BOOL PrintError(fx32 *orig, fx32 *data);
34
35 static void InitBitRevTable(void);
36 static void FFT(fx32 *data, int nShift);
37 static void IFFT(fx32 *data, int nShift);
38
39 /*---------------------------------------------------------------------------*
40 Variable Definitions
41 *---------------------------------------------------------------------------*/
42 #define FFT_NSHIFT 10
43 #define FFT_N (1 << FFT_NSHIFT)
44 #define FFT_VALUE_MAX ((1 << (31 - FFT_NSHIFT))-1)
45 #define FFT_VALUE_MIN (-(1 << (31 - FFT_NSHIFT)))
46 #define FFT_VALUE_RANGE (1U << (32 - FFT_NSHIFT))
47
48 #define N_RANDOM_INPUT 16
49 #define ERROR_THRETHOLD (MATH_MAX(((double)FFT_VALUE_RANGE)/(1 << 23), (double)(1 << (FFT_NSHIFT-12))))
50
51 static fx32 gFFTCos[FFT_N], gFFTSin[FFT_N];
52 static int gBitRevTable[FFT_N];
53
54 /*---------------------------------------------------------------------------*
55 Function Definitions
56 *---------------------------------------------------------------------------*/
57
58 /*---------------------------------------------------------------------------*
59 Name: NitroMain
60
61 Description: Initialization and main loop.
62
63 Arguments: None.
64
65 Returns: None.
66 *---------------------------------------------------------------------------*/
NitroMain(void)67 void NitroMain(void)
68 {
69 // Various types of initialization
70 OS_Init();
71 OS_InitTick();
72
73 DisplayInit();
74
75 if (FFTTest())
76 {
77 // Success
78 OS_TPrintf("------ Test Succeeded ------\n");
79 FillScreen(GX_RGB(0, 31, 0));
80 }
81 else
82 {
83 // Failed
84 OS_TPrintf("****** Test Failed ******\n");
85 FillScreen(GX_RGB(31, 0, 0));
86 }
87
88 // Main loop
89 while (TRUE)
90 {
91 // Waiting for the V-Blank
92 OS_WaitVBlankIntr();
93 }
94 }
95
96 /*---------------------------------------------------------------------------*
97 Name: VBlankIntr
98
99 Description: V-Blank interrupt vector.
100
101 Arguments: None.
102
103 Returns: None.
104 *---------------------------------------------------------------------------*/
VBlankIntr(void)105 static void VBlankIntr(void)
106 {
107 // Sets the IRQ check flag
108 OS_SetIrqCheckFlag(OS_IE_V_BLANK);
109 }
110
111 /*---------------------------------------------------------------------------*
112 Name: DisplayInit
113
114 Description: Graphics initialization.
115
116 Arguments: None.
117
118 Returns: None.
119 *---------------------------------------------------------------------------*/
DisplayInit(void)120 static void DisplayInit(void)
121 {
122
123 GX_Init();
124 FX_Init();
125
126 GX_DispOff();
127 GXS_DispOff();
128
129 GX_SetDispSelect(GX_DISP_SELECT_SUB_MAIN);
130
131 OS_SetIrqFunction(OS_IE_V_BLANK, VBlankIntr);
132 (void)OS_EnableIrqMask(OS_IE_V_BLANK);
133 (void)GX_VBlankIntr(TRUE); // To generate V-Blank interrupt request
134 (void)OS_EnableIrq();
135
136
137 GX_SetBankForLCDC(GX_VRAM_LCDC_ALL);
138 MI_CpuClearFast((void *)HW_LCDC_VRAM, HW_LCDC_VRAM_SIZE);
139
140 MI_CpuFillFast((void *)HW_OAM, 192, HW_OAM_SIZE); // Clear OAM
141 MI_CpuClearFast((void *)HW_PLTT, HW_PLTT_SIZE); // Clear the standard palette
142
143 MI_CpuFillFast((void *)HW_DB_OAM, 192, HW_DB_OAM_SIZE); // Clear OAM
144 MI_CpuClearFast((void *)HW_DB_PLTT, HW_DB_PLTT_SIZE); // Clear the standard palette
145 MI_DmaFill32(3, (void *)HW_LCDC_VRAM_C, 0x7FFF7FFF, 256 * 192 * sizeof(u16));
146
147
148 GX_SetBankForOBJ(GX_VRAM_OBJ_256_AB); // Set VRAM-A, B for OBJ
149
150 GX_SetGraphicsMode(GX_DISPMODE_VRAM_C, // VRAM mode
151 (GXBGMode)0, // Dummy
152 (GXBG0As)0); // Dummy
153
154 GX_SetVisiblePlane(GX_PLANEMASK_OBJ); // Make OBJ visible
155 GX_SetOBJVRamModeBmp(GX_OBJVRAMMODE_BMP_1D_128K); // 2D mapping object
156
157 OS_WaitVBlankIntr(); // Waiting for the end of the V-Blank interrupt
158 GX_DispOn();
159
160 }
161
162
163 /*---------------------------------------------------------------------------*
164 Name: FillScreen
165
166 Description: Fills the screen.
167
168 Arguments: col: FillColor
169
170 Returns: None.
171 *---------------------------------------------------------------------------*/
FillScreen(u16 col)172 static void FillScreen(u16 col)
173 {
174 MI_CpuFill16((void *)HW_LCDC_VRAM_C, col, 256 * 192 * 2);
175 }
176
177 /*---------------------------------------------------------------------------*
178 Name: FFTTest
179
180 Description: Test routine for fast Fourier transform.
181
182 Arguments: None.
183
184 Returns: TRUE if test succeeds.
185 *---------------------------------------------------------------------------*/
186 #define PrintResultEq( a, b, f ) \
187 { OS_TPrintf( ((a) == (b)) ? "[--OK--] " : "[**NG**] " ); (f) = (f) && ((a) == (b)); }
188
FFTTest(void)189 static BOOL FFTTest(void)
190 {
191 static fx32 data[FFT_N * 2];
192 static fx32 orig[FFT_N * 2];
193 static fx16 sinTable[FFT_N - FFT_N / 4];
194 static fx16 sinTable2[(FFT_N - FFT_N / 4) / 2];
195
196 BOOL flag = TRUE;
197 int i;
198 OSTick start, end;
199
200 MATH_MakeFFTSinTable(sinTable, FFT_NSHIFT);
201 MATH_MakeFFTSinTable(sinTable2, FFT_NSHIFT - 1);
202
203 OS_TPrintf("N = %d\n", FFT_N);
204 OS_TPrintf("\nMATH_FFT: Sin\n");
205 {
206 for (i = 0; i < FFT_N; i++)
207 {
208 orig[i * 2] = data[i * 2] = FX_SinIdx((65536 / FFT_N) * i);
209 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
210 }
211 start = OS_GetTick();
212 MATH_FFT(data, FFT_NSHIFT, sinTable);
213 end = OS_GetTick();
214 // PrintComplexArray(data);
215 MATH_IFFT(data, FFT_NSHIFT, sinTable);
216 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
217 flag = PrintError(orig, data) && flag;
218 }
219
220 OS_TPrintf("\nMATH_FFT: Cos\n");
221 {
222 for (i = 0; i < FFT_N; i++)
223 {
224 orig[i * 2] = data[i * 2] = FX_CosIdx((65536 / FFT_N) * i);
225 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
226 }
227 start = OS_GetTick();
228 MATH_FFT(data, FFT_NSHIFT, sinTable);
229 end = OS_GetTick();
230 // PrintComplexArray(data);
231 MATH_IFFT(data, FFT_NSHIFT, sinTable);
232 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
233 flag = PrintError(orig, data) && flag;
234 }
235
236 OS_TPrintf("\nMATH_FFT: Cos + Sin\n");
237 {
238 for (i = 0; i < FFT_N; i++)
239 {
240 orig[i * 2] = data[i * 2] =
241 FX_CosIdx((65536 / FFT_N) * i) + FX_SinIdx((65536 / FFT_N) * i);
242 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
243 }
244 start = OS_GetTick();
245 MATH_FFT(data, FFT_NSHIFT, sinTable);
246 end = OS_GetTick();
247 // PrintComplexArray(data);
248 MATH_IFFT(data, FFT_NSHIFT, sinTable);
249 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
250 flag = PrintError(orig, data) && flag;
251 }
252
253 OS_TPrintf("\nMATH_FFT: Highest Freqency (Real)\n");
254 {
255 for (i = 0; i < FFT_N; i++)
256 {
257 orig[i * 2] = data[i * 2] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
258 orig[i * 2 + 1] = data[i * 2 + 1] = 0;
259 }
260 start = OS_GetTick();
261 MATH_FFT(data, FFT_NSHIFT, sinTable);
262 end = OS_GetTick();
263 // PrintComplexArray(data);
264 MATH_IFFT(data, FFT_NSHIFT, sinTable);
265 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
266 flag = PrintError(orig, data) && flag;
267 }
268
269 OS_TPrintf("\nMATH_FFT: Highest Freqency (Complex)\n");
270 {
271 for (i = 0; i < FFT_N; i++)
272 {
273 orig[i * 2] = data[i * 2] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
274 orig[i * 2 + 1] = data[i * 2 + 1] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
275 }
276 start = OS_GetTick();
277 MATH_FFT(data, FFT_NSHIFT, sinTable);
278 end = OS_GetTick();
279 // PrintComplexArray(data);
280 MATH_IFFT(data, FFT_NSHIFT, sinTable);
281 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
282 flag = PrintError(orig, data) && flag;
283 }
284
285 OS_TPrintf("\nMATH_FFT: Constant\n");
286 {
287 for (i = 0; i < FFT_N; i++)
288 {
289 orig[i * 2] = data[i * 2] = FFT_VALUE_MAX;
290 orig[i * 2 + 1] = data[i * 2 + 1] = FFT_VALUE_MAX;
291 }
292 start = OS_GetTick();
293 MATH_FFT(data, FFT_NSHIFT, sinTable);
294 end = OS_GetTick();
295 // PrintComplexArray(data);
296 MATH_IFFT(data, FFT_NSHIFT, sinTable);
297 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
298 flag = PrintError(orig, data) && flag;
299 }
300
301 OS_TPrintf("\nMATH_FFT: Random Input\n");
302 {
303 u32 seed;
304 for (seed = 0; seed < N_RANDOM_INPUT; seed++)
305 {
306 MATHRandContext32 rand;
307
308 MATH_InitRand32(&rand, seed);
309 for (i = 0; i < FFT_N; i++)
310 {
311 orig[i * 2] = data[i * 2] =
312 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
313 orig[i * 2 + 1] = data[i * 2 + 1] =
314 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
315 }
316 start = OS_GetTick();
317 MATH_FFT(data, FFT_NSHIFT, sinTable);
318 end = OS_GetTick();
319 // PrintComplexArray(data);
320 MATH_IFFT(data, FFT_NSHIFT, sinTable);
321 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
322 flag = PrintError(orig, data) && flag;
323 }
324 }
325
326 OS_TPrintf("\nMATH_FFTReal: Sin\n");
327 {
328 for (i = 0; i < FFT_N; i++)
329 {
330 orig[i] = data[i] = FX_SinIdx((65536 / FFT_N) * i);
331 }
332 for (; i < FFT_N * 2; i++)
333 {
334 orig[i] = data[i] = 0;
335 }
336 start = OS_GetTick();
337 MATH_FFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
338 end = OS_GetTick();
339 // PrintHalfComplexArray(data);
340 MATH_IFFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
341 // PrintRealArray(data);
342 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
343 flag = PrintError(orig, data) && flag;
344 }
345
346 OS_TPrintf("\nMATH_FFTReal: Random Input\n");
347 {
348 u32 seed;
349 for (seed = 0; seed < N_RANDOM_INPUT; seed++)
350 {
351 MATHRandContext32 rand;
352
353 MATH_InitRand32(&rand, seed);
354 for (i = 0; i < FFT_N; i++)
355 {
356 orig[i] = data[i] =
357 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
358 }
359 for (; i < FFT_N * 2; i++)
360 {
361 orig[i] = data[i] = 0;
362 }
363 start = OS_GetTick();
364 MATH_FFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
365 end = OS_GetTick();
366 // PrintComplexArray(data);
367 MATH_IFFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
368 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
369 flag = PrintError(orig, data) && flag;
370 }
371 }
372
373 /*
374 FFT and IFFT below are reference implementations to compare with MATH_FFT and MATH_IFFT.
375 MATH_FFT is an optimized implementation, so a reference algorithm has been implemented to compare behavior.
376 The margin of error may differ between the two calculation results.
377 */
378 #if 0
379 OS_TPrintf("\nLocal FFT Function\n");
380 InitBitRevTable();
381 {
382 for (i = 0; i < FFT_N; i++)
383 {
384 data[i * 2] = FX_SinIdx((65536 / FFT_N) * i);
385 data[i * 2 + 1] = 0;
386 }
387 FFT(data, FFT_NSHIFT);
388 // PrintComplexArray(data);
389 }
390
391 {
392 u32 seed;
393 for (seed = 0; seed < N_RANDOM_INPUT; seed++)
394 {
395 MATHRandContext32 rand;
396
397 MATH_InitRand32(&rand, seed);
398 for (i = 0; i < FFT_N; i++)
399 {
400 orig[i * 2] = data[i * 2] =
401 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
402 orig[i * 2 + 1] = data[i * 2 + 1] =
403 (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
404 }
405 FFT(data, FFT_NSHIFT);
406 // PrintComplexArray(data);
407 IFFT(data, FFT_NSHIFT);
408 flag = PrintError(orig, data) && flag;
409 }
410 }
411
412 #endif
413
414
415 return flag;
416 }
417
PrintRealArray(fx32 * data)418 static void PrintRealArray(fx32 *data)
419 {
420 u32 i;
421 for (i = 0; i < FFT_N; i++)
422 {
423 OS_TPrintf("%3x: ", i);
424 PrintFX32(data[i]);
425 OS_TPrintf("\n");
426 }
427 }
428
PrintHalfComplexArray(fx32 * data)429 static void PrintHalfComplexArray(fx32 *data)
430 {
431 u32 i;
432
433 i = 0;
434 OS_TPrintf("%3x: ", i);
435 PrintFX32(data[0]);
436 OS_TPrintf("\n");
437 for (i = 1; i < FFT_N / 2; i++)
438 {
439 OS_TPrintf("%3x: ", i);
440 PrintFX32(data[i * 2]);
441 OS_TPrintf(", ");
442 PrintFX32(data[i * 2 + 1]);
443 OS_TPrintf("\n");
444 }
445 OS_TPrintf("%3x: ", i);
446 PrintFX32(data[1]);
447 OS_TPrintf("\n");
448 }
449
PrintComplexArray(fx32 * data)450 static void PrintComplexArray(fx32 *data)
451 {
452 u32 i;
453 for (i = 0; i < FFT_N; i++)
454 {
455 OS_TPrintf("%3x: ", i);
456 PrintFX32(data[i * 2]);
457 OS_TPrintf(", ");
458 PrintFX32(data[i * 2 + 1]);
459 OS_TPrintf("\n");
460 }
461 }
462
PrintFX32(fx32 f)463 static void PrintFX32(fx32 f)
464 {
465 #pragma unused(f) // Required for FINALROM build
466 OS_Printf("%f", FX_FX32_TO_F32(f));
467 #if 0
468 if (f >= 0)
469 {
470 OS_TPrintf(" %6d.%03d", f >> FX32_SHIFT, (f & FX32_DEC_MASK) * 1000 / 4096);
471 }
472 else
473 {
474 OS_TPrintf("-%6d.%03d", (-f) >> FX32_SHIFT, ((-f) & FX32_DEC_MASK) * 1000 / 4096);
475 }
476 #endif
477 }
478
PrintError(fx32 * orig,fx32 * data)479 static BOOL PrintError(fx32 *orig, fx32 *data)
480 {
481 u32 i;
482 fx32 max_error;
483 double sum_sqd, e;
484
485 max_error = 0;
486 sum_sqd = 0;
487 for (i = 0; i < FFT_N * 2; i++)
488 {
489 fx32 d = MATH_ABS(data[i] - orig[i]);
490 double dd = FX_FX32_TO_F32(d);
491 double od = FX_FX32_TO_F32(orig[i]);
492
493 if (d > max_error)
494 {
495 max_error = d;
496 }
497
498 sum_sqd += dd * dd;
499 }
500 sum_sqd /= FFT_N * 2;
501 e = FX_FX32_TO_F32(max_error);
502 OS_Printf("Max Error: %f, Dist.^2: %.4g\n", e, sum_sqd);
503
504 return (e <= ERROR_THRETHOLD);
505 }
506
507 /*
508 FFT and IFFT below are reference implementations to compare with MATH_FFT and MATH_IFFT.
509 MATH_FFT is an optimized implementation, so a reference algorithm has been implemented to compare behavior.
510 The margin of error may differ between the two calculation results.
511 */
512 #if 0
513 static void InitBitRevTable(void)
514 {
515 int i, j, k;
516
517 i = j = 0;
518 for (;;)
519 {
520 gBitRevTable[i] = j;
521 if (++i >= FFT_N)
522 break;
523 k = FFT_N / 2;
524 while (k <= j)
525 {
526 j -= k;
527 k /= 2;
528 }
529 j += k;
530 }
531 }
532
533 static void FFT(fx32 *data, int nShift)
534 {
535 int i, j, k, ik, h, d, k2;
536 fx32 t, s, c, dx, dy;
537 u32 n = 1U << nShift;
538 OSTick start, end;
539
540 SDK_ASSERT(n == FFT_N);
541
542 for (i = 0; i < FFT_N; i++)
543 {
544 gFFTCos[i] = data[i * 2];
545 gFFTSin[i] = data[i * 2 + 1];
546 }
547
548 start = OS_GetTick();
549 for (i = 0; i < FFT_N; i++)
550 { /* Bit inversion */
551 j = gBitRevTable[i];
552 if (i < j)
553 {
554 t = gFFTCos[i];
555 gFFTCos[i] = gFFTCos[j];
556 gFFTCos[j] = t;
557 t = gFFTSin[i];
558 gFFTSin[i] = gFFTSin[j];
559 gFFTSin[j] = t;
560 }
561 }
562 for (k = 1; k < FFT_N; k = k2)
563 { /* Conversion */
564 h = 0;
565 k2 = k + k;
566 d = FFT_N / k2;
567 for (j = 0; j < k; j++)
568 {
569 c = FX_CosIdx(h * (65536 / FFT_N));
570 s = FX_SinIdx(h * (65536 / FFT_N));
571 for (i = j; i < FFT_N; i += k2)
572 {
573 ik = i + k;
574 dx = FX_Mul(s, gFFTSin[ik]) + FX_Mul(c, gFFTCos[ik]);
575 dy = FX_Mul(c, gFFTSin[ik]) - FX_Mul(s, gFFTCos[ik]);
576 gFFTCos[ik] = gFFTCos[i] - dx;
577 gFFTCos[i] += dx;
578 gFFTSin[ik] = gFFTSin[i] - dy;
579 gFFTSin[i] += dy;
580 }
581 h += d;
582 }
583 }
584 end = OS_GetTick();
585 OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
586
587 for (i = 0; i < FFT_N; i++)
588 {
589 data[i * 2] = gFFTCos[i] >> nShift;
590 data[i * 2 + 1] = gFFTSin[i] >> nShift;
591 }
592 }
593
594 static void IFFT(fx32 *data, int nShift)
595 {
596 int i, j, k, ik, h, d, k2;
597 fx32 t, s, c, dx, dy;
598 u32 n = 1U << nShift;
599
600 SDK_ASSERT(n == FFT_N);
601
602 for (i = 0; i < FFT_N; i++)
603 {
604 gFFTCos[i] = data[i * 2];
605 gFFTSin[i] = data[i * 2 + 1];
606 }
607
608 for (i = 0; i < FFT_N; i++)
609 { /* Bit inversion */
610 j = gBitRevTable[i];
611 if (i < j)
612 {
613 t = gFFTCos[i];
614 gFFTCos[i] = gFFTCos[j];
615 gFFTCos[j] = t;
616 t = gFFTSin[i];
617 gFFTSin[i] = gFFTSin[j];
618 gFFTSin[j] = t;
619 }
620 }
621 for (k = 1; k < FFT_N; k = k2)
622 { /* Conversion */
623 h = 0;
624 k2 = k + k;
625 d = FFT_N / k2;
626 for (j = 0; j < k; j++)
627 {
628 c = FX_CosIdx(h * (65536 / FFT_N));
629 s = FX_SinIdx(h * (65536 / FFT_N));
630 for (i = j; i < FFT_N; i += k2)
631 {
632 ik = i + k;
633 dx = -FX_Mul(s, gFFTSin[ik]) + FX_Mul(c, gFFTCos[ik]);
634 dy = FX_Mul(c, gFFTSin[ik]) + FX_Mul(s, gFFTCos[ik]);
635 gFFTCos[ik] = gFFTCos[i] - dx;
636 gFFTCos[i] += dx;
637 gFFTSin[ik] = gFFTSin[i] - dy;
638 gFFTSin[i] += dy;
639 }
640 h += d;
641 }
642 }
643
644 for (i = 0; i < FFT_N; i++)
645 {
646 data[i * 2] = gFFTCos[i];
647 data[i * 2 + 1] = gFFTSin[i];
648 }
649 }
650 #endif
651
652 /*---------------------------------------------------------------------------*
653 End of file
654 *---------------------------------------------------------------------------*/
655