1 /*---------------------------------------------------------------------------*
2   Project:  TwlSDK - MATH - demos
3   File:     main.c
4 
5   Copyright 2007-2008 Nintendo. All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law. They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Date:: 2008-09-17#$
14   $Rev: 8556 $
15   $Author: okubata_ryoma $
16  *---------------------------------------------------------------------------*/
17 /*---------------------------------------------------------------------------*
18   MATH library: Demo to confirm operation of fast Fourier transform
19  *---------------------------------------------------------------------------*/
20 
21 #include <nitro.h>
22 #include <nitro/fx/fx_trig.h>
23 
24 
25 static void VBlankIntr(void);
26 static void DisplayInit(void);
27 static void FillScreen(u16 col);
28 static BOOL FFTTest(void);
29 static void PrintFX32(fx32 f);
30 static void PrintRealArray(fx32 *data);
31 static void PrintHalfComplexArray(fx32 *data);
32 static void PrintComplexArray(fx32 *data);
33 static BOOL PrintError(fx32 *orig, fx32 *data);
34 
35 static void InitBitRevTable(void);
36 static void FFT(fx32 *data, int nShift);
37 static void IFFT(fx32 *data, int nShift);
38 
39 /*---------------------------------------------------------------------------*
40     Variable Definitions
41  *---------------------------------------------------------------------------*/
42 #define FFT_NSHIFT         10
43 #define FFT_N              (1 << FFT_NSHIFT)
44 #define FFT_VALUE_MAX      ((1 << (31 - FFT_NSHIFT))-1)
45 #define FFT_VALUE_MIN      (-(1 << (31 - FFT_NSHIFT)))
46 #define FFT_VALUE_RANGE    (1U << (32 - FFT_NSHIFT))
47 
48 #define N_RANDOM_INPUT     16
49 #define ERROR_THRETHOLD    (MATH_MAX(((double)FFT_VALUE_RANGE)/(1 << 23), (double)(1 << (FFT_NSHIFT-12))))
50 
51 static fx32 gFFTCos[FFT_N], gFFTSin[FFT_N];
52 static int gBitRevTable[FFT_N];
53 
54 /*---------------------------------------------------------------------------*
55     Function definitions
56  *---------------------------------------------------------------------------*/
57 
58 /*---------------------------------------------------------------------------*
59   Name:         NitroMain
60 
61   Description:  Initialization and main loop
62 
63   Arguments:    None
64 
65   Returns:      None
66  *---------------------------------------------------------------------------*/
NitroMain(void)67 void NitroMain(void)
68 {
69     // Various types of initialization
70     OS_Init();
71     OS_InitTick();
72 
73     DisplayInit();
74 
75     if (FFTTest())
76     {
77         // Success
78         OS_TPrintf("------ Test Succeeded ------\n");
79         FillScreen(GX_RGB(0, 31, 0));
80     }
81     else
82     {
83         // Failed
84         OS_TPrintf("****** Test Failed ******\n");
85         FillScreen(GX_RGB(31, 0, 0));
86     }
87 
88     // Main loop
89     while (TRUE)
90     {
91         // Waiting for the V-Blank
92         OS_WaitVBlankIntr();
93     }
94 }
95 
96 /*---------------------------------------------------------------------------*
97   Name:         VBlankIntr
98 
99   Description:  V-Blank interrupt vector
100 
101   Arguments:    None
102 
103   Returns:      None
104  *---------------------------------------------------------------------------*/
VBlankIntr(void)105 static void VBlankIntr(void)
106 {
107     // Sets the IRQ check flag
108     OS_SetIrqCheckFlag(OS_IE_V_BLANK);
109 }
110 
111 /*---------------------------------------------------------------------------*
112   Name:         DisplayInit
113 
114   Description:  Graphics Initialization
115 
116   Arguments:    None
117 
118   Returns:      None
119  *---------------------------------------------------------------------------*/
DisplayInit(void)120 static void DisplayInit(void)
121 {
122 
123     GX_Init();
124     FX_Init();
125 
126     GX_DispOff();
127     GXS_DispOff();
128 
129     GX_SetDispSelect(GX_DISP_SELECT_SUB_MAIN);
130 
131     OS_SetIrqFunction(OS_IE_V_BLANK, VBlankIntr);
132     (void)OS_EnableIrqMask(OS_IE_V_BLANK);
133     (void)GX_VBlankIntr(TRUE);         // To generate V-Blank interrupt request
134     (void)OS_EnableIrq();
135 
136 
137     GX_SetBankForLCDC(GX_VRAM_LCDC_ALL);
138     MI_CpuClearFast((void *)HW_LCDC_VRAM, HW_LCDC_VRAM_SIZE);
139 
140     MI_CpuFillFast((void *)HW_OAM, 192, HW_OAM_SIZE);   // Clear OAM
141     MI_CpuClearFast((void *)HW_PLTT, HW_PLTT_SIZE);     // Clear the standard palette
142 
143     MI_CpuFillFast((void *)HW_DB_OAM, 192, HW_DB_OAM_SIZE);     // Clear OAM
144     MI_CpuClearFast((void *)HW_DB_PLTT, HW_DB_PLTT_SIZE);       // Clear the standard palette
145     MI_DmaFill32(3, (void *)HW_LCDC_VRAM_C, 0x7FFF7FFF, 256 * 192 * sizeof(u16));
146 
147 
148     GX_SetBankForOBJ(GX_VRAM_OBJ_256_AB);       // Set VRAM-A,B for OBJ
149 
150     GX_SetGraphicsMode(GX_DISPMODE_VRAM_C,      // VRAM mode
151                        (GXBGMode)0,    // Dummy
152                        (GXBG0As)0);    // Dummy
153 
154     GX_SetVisiblePlane(GX_PLANEMASK_OBJ);       // Make OBJs visible
155     GX_SetOBJVRamModeBmp(GX_OBJVRAMMODE_BMP_1D_128K);   // 2D mapping OBJ
156 
157     OS_WaitVBlankIntr();               // Waiting for the end of the V-Blank interrupt
158     GX_DispOn();
159 
160 }
161 
162 
163 /*---------------------------------------------------------------------------*
164   Name:         FillScreen
165 
166   Description:  Fills the screen
167 
168   Arguments:    col - FillColor
169 
170   Returns:      None
171  *---------------------------------------------------------------------------*/
FillScreen(u16 col)172 static void FillScreen(u16 col)
173 {
174     MI_CpuFill16((void *)HW_LCDC_VRAM_C, col, 256 * 192 * 2);
175 }
176 
177 /*---------------------------------------------------------------------------*
178   Name:         FFTTest
179 
180   Description:  Test routine for fast Fourier transform
181 
182   Arguments:    None
183 
184   Returns:      TRUE if test succeeds
185  *---------------------------------------------------------------------------*/
186 #define PrintResultEq( a, b, f ) \
187     { OS_TPrintf( ((a) == (b)) ? "[--OK--] " : "[**NG**] " ); (f) = (f) && ((a) == (b)); }
188 
FFTTest(void)189 static BOOL FFTTest(void)
190 {
191     static fx32 data[FFT_N * 2];
192     static fx32 orig[FFT_N * 2];
193     static fx16 sinTable[FFT_N - FFT_N / 4];
194     static fx16 sinTable2[(FFT_N - FFT_N / 4) / 2];
195 
196     BOOL    flag = TRUE;
197     int     i;
198     OSTick  start, end;
199 
200     MATH_MakeFFTSinTable(sinTable, FFT_NSHIFT);
201     MATH_MakeFFTSinTable(sinTable2, FFT_NSHIFT - 1);
202 
203     OS_TPrintf("N = %d\n", FFT_N);
204     OS_TPrintf("\nMATH_FFT: Sin\n");
205     {
206         for (i = 0; i < FFT_N; i++)
207         {
208             orig[i * 2] = data[i * 2] = FX_SinIdx((65536 / FFT_N) * i);
209             orig[i * 2 + 1] = data[i * 2 + 1] = 0;
210         }
211         start = OS_GetTick();
212         MATH_FFT(data, FFT_NSHIFT, sinTable);
213         end = OS_GetTick();
214 //        PrintComplexArray(data);
215         MATH_IFFT(data, FFT_NSHIFT, sinTable);
216         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
217         flag = PrintError(orig, data) && flag;
218     }
219 
220     OS_TPrintf("\nMATH_FFT: Cos\n");
221     {
222         for (i = 0; i < FFT_N; i++)
223         {
224             orig[i * 2] = data[i * 2] = FX_CosIdx((65536 / FFT_N) * i);
225             orig[i * 2 + 1] = data[i * 2 + 1] = 0;
226         }
227         start = OS_GetTick();
228         MATH_FFT(data, FFT_NSHIFT, sinTable);
229         end = OS_GetTick();
230 //        PrintComplexArray(data);
231         MATH_IFFT(data, FFT_NSHIFT, sinTable);
232         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
233         flag = PrintError(orig, data) && flag;
234     }
235 
236     OS_TPrintf("\nMATH_FFT: Cos + Sin\n");
237     {
238         for (i = 0; i < FFT_N; i++)
239         {
240             orig[i * 2] = data[i * 2] =
241                 FX_CosIdx((65536 / FFT_N) * i) + FX_SinIdx((65536 / FFT_N) * i);
242             orig[i * 2 + 1] = data[i * 2 + 1] = 0;
243         }
244         start = OS_GetTick();
245         MATH_FFT(data, FFT_NSHIFT, sinTable);
246         end = OS_GetTick();
247 //        PrintComplexArray(data);
248         MATH_IFFT(data, FFT_NSHIFT, sinTable);
249         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
250         flag = PrintError(orig, data) && flag;
251     }
252 
253     OS_TPrintf("\nMATH_FFT: Highest Freqency (Real)\n");
254     {
255         for (i = 0; i < FFT_N; i++)
256         {
257             orig[i * 2] = data[i * 2] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
258             orig[i * 2 + 1] = data[i * 2 + 1] = 0;
259         }
260         start = OS_GetTick();
261         MATH_FFT(data, FFT_NSHIFT, sinTable);
262         end = OS_GetTick();
263 //        PrintComplexArray(data);
264         MATH_IFFT(data, FFT_NSHIFT, sinTable);
265         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
266         flag = PrintError(orig, data) && flag;
267     }
268 
269     OS_TPrintf("\nMATH_FFT: Highest Freqency (Complex)\n");
270     {
271         for (i = 0; i < FFT_N; i++)
272         {
273             orig[i * 2] = data[i * 2] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
274             orig[i * 2 + 1] = data[i * 2 + 1] = (i & 1) ? FFT_VALUE_MIN : FFT_VALUE_MAX;
275         }
276         start = OS_GetTick();
277         MATH_FFT(data, FFT_NSHIFT, sinTable);
278         end = OS_GetTick();
279 //        PrintComplexArray(data);
280         MATH_IFFT(data, FFT_NSHIFT, sinTable);
281         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
282         flag = PrintError(orig, data) && flag;
283     }
284 
285     OS_TPrintf("\nMATH_FFT: Constant\n");
286     {
287         for (i = 0; i < FFT_N; i++)
288         {
289             orig[i * 2] = data[i * 2] = FFT_VALUE_MAX;
290             orig[i * 2 + 1] = data[i * 2 + 1] = FFT_VALUE_MAX;
291         }
292         start = OS_GetTick();
293         MATH_FFT(data, FFT_NSHIFT, sinTable);
294         end = OS_GetTick();
295 //        PrintComplexArray(data);
296         MATH_IFFT(data, FFT_NSHIFT, sinTable);
297         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
298         flag = PrintError(orig, data) && flag;
299     }
300 
301     OS_TPrintf("\nMATH_FFT: Random Input\n");
302     {
303         u32     seed;
304         for (seed = 0; seed < N_RANDOM_INPUT; seed++)
305         {
306             MATHRandContext32 rand;
307 
308             MATH_InitRand32(&rand, seed);
309             for (i = 0; i < FFT_N; i++)
310             {
311                 orig[i * 2] = data[i * 2] =
312                     (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
313                 orig[i * 2 + 1] = data[i * 2 + 1] =
314                     (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
315             }
316             start = OS_GetTick();
317             MATH_FFT(data, FFT_NSHIFT, sinTable);
318             end = OS_GetTick();
319 //            PrintComplexArray(data);
320             MATH_IFFT(data, FFT_NSHIFT, sinTable);
321             OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
322             flag = PrintError(orig, data) && flag;
323         }
324     }
325 
326     OS_TPrintf("\nMATH_FFTReal: Sin\n");
327     {
328         for (i = 0; i < FFT_N; i++)
329         {
330             orig[i] = data[i] = FX_SinIdx((65536 / FFT_N) * i);
331         }
332         for (; i < FFT_N * 2; i++)
333         {
334             orig[i] = data[i] = 0;
335         }
336         start = OS_GetTick();
337         MATH_FFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
338         end = OS_GetTick();
339 //        PrintHalfComplexArray(data);
340         MATH_IFFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
341 //        PrintRealArray(data);
342         OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
343         flag = PrintError(orig, data) && flag;
344     }
345 
346     OS_TPrintf("\nMATH_FFTReal: Random Input\n");
347     {
348         u32     seed;
349         for (seed = 0; seed < N_RANDOM_INPUT; seed++)
350         {
351             MATHRandContext32 rand;
352 
353             MATH_InitRand32(&rand, seed);
354             for (i = 0; i < FFT_N; i++)
355             {
356                 orig[i] = data[i] =
357                     (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
358             }
359             for (; i < FFT_N * 2; i++)
360             {
361                 orig[i] = data[i] = 0;
362             }
363             start = OS_GetTick();
364             MATH_FFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
365             end = OS_GetTick();
366 //            PrintComplexArray(data);
367             MATH_IFFTReal(data, FFT_NSHIFT, sinTable, sinTable2);
368             OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
369             flag = PrintError(orig, data) && flag;
370         }
371     }
372 
373 #if 0
374     OS_TPrintf("\nLocal FFT Function\n");
375     InitBitRevTable();
376     {
377         for (i = 0; i < FFT_N; i++)
378         {
379             data[i * 2] = FX_SinIdx((65536 / FFT_N) * i);
380             data[i * 2 + 1] = 0;
381         }
382         FFT(data, FFT_NSHIFT);
383 //        PrintComplexArray(data);
384     }
385 
386     {
387         u32     seed;
388         for (seed = 0; seed < N_RANDOM_INPUT; seed++)
389         {
390             MATHRandContext32 rand;
391 
392             MATH_InitRand32(&rand, seed);
393             for (i = 0; i < FFT_N; i++)
394             {
395                 orig[i * 2] = data[i * 2] =
396                     (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
397                 orig[i * 2 + 1] = data[i * 2 + 1] =
398                     (fx32)(MATH_Rand32(&rand, FFT_VALUE_RANGE) - (FFT_VALUE_RANGE / 2));
399             }
400             FFT(data, FFT_NSHIFT);
401 //            PrintComplexArray(data);
402             IFFT(data, FFT_NSHIFT);
403             flag = PrintError(orig, data) && flag;
404         }
405     }
406 
407 #endif
408 
409 
410     return flag;
411 }
412 
PrintRealArray(fx32 * data)413 static void PrintRealArray(fx32 *data)
414 {
415     u32     i;
416     for (i = 0; i < FFT_N; i++)
417     {
418         OS_TPrintf("%3x: ", i);
419         PrintFX32(data[i]);
420         OS_TPrintf("\n");
421     }
422 }
423 
PrintHalfComplexArray(fx32 * data)424 static void PrintHalfComplexArray(fx32 *data)
425 {
426     u32     i;
427 
428 	i = 0;
429     OS_TPrintf("%3x: ", i);
430 	PrintFX32(data[0]);
431     OS_TPrintf("\n");
432     for (i = 1; i < FFT_N / 2; i++)
433     {
434         OS_TPrintf("%3x: ", i);
435         PrintFX32(data[i * 2]);
436         OS_TPrintf(", ");
437         PrintFX32(data[i * 2 + 1]);
438         OS_TPrintf("\n");
439     }
440     OS_TPrintf("%3x: ", i);
441 	PrintFX32(data[1]);
442     OS_TPrintf("\n");
443 }
444 
PrintComplexArray(fx32 * data)445 static void PrintComplexArray(fx32 *data)
446 {
447     u32     i;
448     for (i = 0; i < FFT_N; i++)
449     {
450         OS_TPrintf("%3x: ", i);
451         PrintFX32(data[i * 2]);
452         OS_TPrintf(", ");
453         PrintFX32(data[i * 2 + 1]);
454         OS_TPrintf("\n");
455     }
456 }
457 
PrintFX32(fx32 f)458 static void PrintFX32(fx32 f)
459 {
460 #pragma unused(f)                      // Required for FINALROM build
461     OS_Printf("%f", FX_FX32_TO_F32(f));
462 #if 0
463     if (f >= 0)
464     {
465         OS_TPrintf(" %6d.%03d", f >> FX32_SHIFT, (f & FX32_DEC_MASK) * 1000 / 4096);
466     }
467     else
468     {
469         OS_TPrintf("-%6d.%03d", (-f) >> FX32_SHIFT, ((-f) & FX32_DEC_MASK) * 1000 / 4096);
470     }
471 #endif
472 }
473 
PrintError(fx32 * orig,fx32 * data)474 static BOOL PrintError(fx32 *orig, fx32 *data)
475 {
476     u32     i;
477     fx32    max_error;
478     double  sum_sqd, e;
479 
480     max_error = 0;
481     sum_sqd = 0;
482     for (i = 0; i < FFT_N * 2; i++)
483     {
484         fx32    d = MATH_ABS(data[i] - orig[i]);
485         double  dd = FX_FX32_TO_F32(d);
486         double  od = FX_FX32_TO_F32(orig[i]);
487 
488         if (d > max_error)
489         {
490             max_error = d;
491         }
492 
493         sum_sqd += dd * dd;
494     }
495     sum_sqd /= FFT_N * 2;
496     e = FX_FX32_TO_F32(max_error);
497     OS_Printf("Max Error: %f, Dist.^2: %.4g\n", e, sum_sqd);
498 
499     return (e <= ERROR_THRETHOLD);
500 }
501 
InitBitRevTable(void)502 static void InitBitRevTable(void)
503 {
504     int     i, j, k;
505 
506     i = j = 0;
507     for (;;)
508     {
509         gBitRevTable[i] = j;
510         if (++i >= FFT_N)
511             break;
512         k = FFT_N / 2;
513         while (k <= j)
514         {
515             j -= k;
516             k /= 2;
517         }
518         j += k;
519     }
520 }
521 
FFT(fx32 * data,int nShift)522 static void FFT(fx32 *data, int nShift)
523 {
524     int     i, j, k, ik, h, d, k2;
525     fx32    t, s, c, dx, dy;
526     u32     n = 1U << nShift;
527     OSTick  start, end;
528 
529     SDK_ASSERT(n == FFT_N);
530 
531     for (i = 0; i < FFT_N; i++)
532     {
533         gFFTCos[i] = data[i * 2];
534         gFFTSin[i] = data[i * 2 + 1];
535     }
536 
537     start = OS_GetTick();
538     for (i = 0; i < FFT_N; i++)
539     {                                  /* Bit inversion */
540         j = gBitRevTable[i];
541         if (i < j)
542         {
543             t = gFFTCos[i];
544             gFFTCos[i] = gFFTCos[j];
545             gFFTCos[j] = t;
546             t = gFFTSin[i];
547             gFFTSin[i] = gFFTSin[j];
548             gFFTSin[j] = t;
549         }
550     }
551     for (k = 1; k < FFT_N; k = k2)
552     {                                  /* Conversion */
553         h = 0;
554         k2 = k + k;
555         d = FFT_N / k2;
556         for (j = 0; j < k; j++)
557         {
558             c = FX_CosIdx(h * (65536 / FFT_N));
559             s = FX_SinIdx(h * (65536 / FFT_N));
560             for (i = j; i < FFT_N; i += k2)
561             {
562                 ik = i + k;
563                 dx = FX_Mul(s, gFFTSin[ik]) + FX_Mul(c, gFFTCos[ik]);
564                 dy = FX_Mul(c, gFFTSin[ik]) - FX_Mul(s, gFFTCos[ik]);
565                 gFFTCos[ik] = gFFTCos[i] - dx;
566                 gFFTCos[i] += dx;
567                 gFFTSin[ik] = gFFTSin[i] - dy;
568                 gFFTSin[i] += dy;
569             }
570             h += d;
571         }
572     }
573     end = OS_GetTick();
574     OS_Printf("%lld us, ", OS_TicksToMicroSeconds(end - start));
575 
576     for (i = 0; i < FFT_N; i++)
577     {
578         data[i * 2] = gFFTCos[i] >> nShift;
579         data[i * 2 + 1] = gFFTSin[i] >> nShift;
580     }
581 }
582 
IFFT(fx32 * data,int nShift)583 static void IFFT(fx32 *data, int nShift)
584 {
585     int     i, j, k, ik, h, d, k2;
586     fx32    t, s, c, dx, dy;
587     u32     n = 1U << nShift;
588 
589     SDK_ASSERT(n == FFT_N);
590 
591     for (i = 0; i < FFT_N; i++)
592     {
593         gFFTCos[i] = data[i * 2];
594         gFFTSin[i] = data[i * 2 + 1];
595     }
596 
597     for (i = 0; i < FFT_N; i++)
598     {                                  /* Bit inversion */
599         j = gBitRevTable[i];
600         if (i < j)
601         {
602             t = gFFTCos[i];
603             gFFTCos[i] = gFFTCos[j];
604             gFFTCos[j] = t;
605             t = gFFTSin[i];
606             gFFTSin[i] = gFFTSin[j];
607             gFFTSin[j] = t;
608         }
609     }
610     for (k = 1; k < FFT_N; k = k2)
611     {                                  /* Conversion */
612         h = 0;
613         k2 = k + k;
614         d = FFT_N / k2;
615         for (j = 0; j < k; j++)
616         {
617             c = FX_CosIdx(h * (65536 / FFT_N));
618             s = FX_SinIdx(h * (65536 / FFT_N));
619             for (i = j; i < FFT_N; i += k2)
620             {
621                 ik = i + k;
622                 dx = -FX_Mul(s, gFFTSin[ik]) + FX_Mul(c, gFFTCos[ik]);
623                 dy = FX_Mul(c, gFFTSin[ik]) + FX_Mul(s, gFFTCos[ik]);
624                 gFFTCos[ik] = gFFTCos[i] - dx;
625                 gFFTCos[i] += dx;
626                 gFFTSin[ik] = gFFTSin[i] - dy;
627                 gFFTSin[i] += dy;
628             }
629             h += d;
630         }
631     }
632 
633     for (i = 0; i < FFT_N; i++)
634     {
635         data[i * 2] = gFFTCos[i];
636         data[i * 2 + 1] = gFFTSin[i];
637     }
638 }
639 
640 /*---------------------------------------------------------------------------*
641   End of file
642  *---------------------------------------------------------------------------*/
643