1 /*---------------------------------------------------------------------------*
2   Project:  TwlSDK - MATH -
3   File:     fft.c
4 
5   Copyright 2003-2008 Nintendo. All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law. They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Date:: 2008-09-17#$
14   $Rev: 8556 $
15   $Author:$
16  *---------------------------------------------------------------------------*/
17 
18 #include <nitro.h>
19 #include <nitro/fx/fx_trig.h>
20 
21 //#define Try using USE_MATH_DFT  // DFT (For evaluation)
22 
23 #if defined(USE_MATH_DFT)
24 static void MATHi_DFT(fx32 *data, fx32 *ret, u32 nShift, const fx16 *sinTable);
25 #endif
26 
27 /*---------------------------------------------------------------------------*
28   Name:         MATH_MakeFFTSinTable
29 
30   Description:  Creates sin table used in fast Fourier transform.
31 
32   Arguments:    sinTable - Pointer to the location that will store the 2^nShift * 3/4 element sin table
33                 nShift - log2 of the number of data
34 
35   Returns:      None.
36  *---------------------------------------------------------------------------*/
MATH_MakeFFTSinTable(fx16 * sinTable,u32 nShift)37 void MATH_MakeFFTSinTable(fx16 *sinTable, u32 nShift)
38 {
39     u32     i;
40     u32     n = 1U << nShift;
41     u32     nq3 = (n >> 1) + (n >> 2);
42     s32     w;
43     u32     dw;
44 
45     SDK_ASSERT(nShift < 32);
46 
47     dw = 65536U >> nShift;
48     for (i = 0, w = 0; i < nq3; i++, w += dw)
49     {
50         sinTable[i] = FX_SinIdx(w);
51     }
52 }
53 
54 /*---------------------------------------------------------------------------*
55   Name:         MATHi_FFT
56 
57   Description:  Internal function that performs fast Fourier transform.
58 
59   Arguments:    data - Data to transform, with even-numbered data being real and odd-numbered data being imaginary.
60                        The transformation result will be returned overwritten.
61                 nShift - log2 of the number of data
62                 sinTable - Sin value table based on a circle divided into equal sections; the number of sections is the number of data
63 
64   Returns:      None.
65  *---------------------------------------------------------------------------*/
66 #define SWAP_FX32(a, b) {fx32 t; t = (a); (a) = (b); (b) = t;}
MATHi_FFT(fx32 * data,u32 nShift,const fx16 * sinTable)67 void MATHi_FFT(fx32 *data, u32 nShift, const fx16 *sinTable)
68 {
69     u32     i, j;
70     u32     n = 1U << nShift;
71     u32     n2 = n << 1;
72     u32     nq = n >> 2;
73 
74     // Bit inversion
75     {
76         u32     rev = 0;
77         u32     shift = 32 - nShift - 1;        // -1 is used to double j
78         for (i = 0; i < n2; i += 2)
79         {
80             j = rev >> shift;          // Logical shift
81             if (i < j)
82             {
83                 // i and j have already been doubled
84                 SWAP_FX32(data[i], data[j]);
85                 SWAP_FX32(data[i + 1], data[j + 1]);
86             }
87             // Treat rev as a 32-bit inverted integer and increment
88             {
89                 u32     s;
90                 s = MATH_CLZ(~rev);
91                 rev ^= (((s32)(0x80000000U)) >> s);     // Arithmetic shift
92             }
93         }
94     }
95 
96     // Butterfly
97     // To Do: Adjust the loop order so that memory can be accessed in sequence.
98     {
99         u32     div, dt;
100 
101         dt = n;
102         // Build up FFT from small divisions
103         // Note that div and k have been doubled for future use
104         for (div = 2; div < n2; div <<= 1)
105         {
106             u32     k;
107             u32     di = div * 2;
108             u32     t;
109             dt >>= 1;
110 
111             // Perform calculation for 0th term in each division
112             for (i = 0; i < n2; i += di)
113             {
114                 fx32    xr, xi, yr, yi;
115                 u32     j = i + div;
116                 // i and j have already been doubled
117 
118                 // 0th term can be calculated with a simple sum and difference.
119                 // (wr, wi) == (1, 0)
120                 xr = data[i];
121                 xi = data[i + 1];
122                 yr = data[j];
123                 yi = data[j + 1];
124                 data[i] = xr + yr;
125                 data[i + 1] = xi + yi;
126                 data[j] = xr - yr;
127                 data[j + 1] = xi - yi;
128             }
129 
130             t = dt;
131 
132             // Calculate together the Kth terms that use the same rotation factor.
133             for (k = 2; k < div; k += 2)
134             {
135                 if (k != div / 2)
136                 {
137                     fx32    wr, wi, w1, w2;
138                     wr = sinTable[t + nq];      // cos(-x) == cos(x) == sin(x+(pi/2))
139                     wi = -sinTable[t];
140 //                    if (inverse)
141 //                    {
142 //                        wi = -wi; // sin(-x) = -sin(x)
143 //                    }
144                     t += dt;
145 
146                     w1 = wr + wi;
147                     w2 = wr - wi;
148 
149                     // Perform calculation for Kth term in each division
150                     for (i = k; i < n2; i += di)
151                     {
152                         fx32    xr, xi, yr, yi;
153                         u32     j = i + div;
154                         // i and j have already been doubled
155 
156                         // Twist
157                         xr = data[j];
158                         xi = data[j + 1];
159                         {
160                             // Equivalent to the following processes
161                             // yr = FX_Mul(wr, xr) - FX_Mul(wi, xi);
162                             // yi = FX_Mul(wr, xi) + FX_Mul(wi, xr);
163                             fx32    t = FX_Mul(wr, xr + xi);
164                             yr = t - FX_Mul(w1, xi);
165                             yi = t - FX_Mul(w2, xr);
166                         }
167 
168                         xr = data[i];
169                         xi = data[i + 1];
170                         data[i] = xr + yr;
171                         data[i + 1] = xi + yi;
172                         data[j] = xr - yr;
173                         data[j + 1] = xi - yi;
174                     }
175                 }
176                 else
177                 {
178                     t += dt;
179                     // Perform calculation for Kth term in each division
180                     for (i = k; i < n2; i += di)
181                     {
182                         fx32    xr, xi, yr, yi;
183                         u32     j = i + div;
184                         // i and j have already been doubled
185 
186                         // div/2-th term can be calculated with a simple sum and difference.
187                         // (wr, wi) == (0, 1)
188                         xr = data[i];
189                         xi = data[i + 1];
190                         yr = data[j];
191                         yi = data[j + 1];
192 //                        if (inverse)
193 //                        {
194 //                            yr = -yr; yi = -yi;
195 //                        }
196                         data[i] = xr + yi;
197                         data[i + 1] = xi - yr;
198                         data[j] = xr - yi;
199                         data[j + 1] = xi + yr;
200                     }
201                 }
202             }
203         }
204     }
205 }
206 
207 /*---------------------------------------------------------------------------*
208   Name:         MATHi_IFFT
209 
210   Description:  Internal function that reverses the fast Fourier transform.
211 
212   Arguments:    data - Data to transform, with even-numbered data being real and odd-numbered data being imaginary.
213                        The transformation result will be returned overwritten.
214                 nShift - log2 of the number of data
215                 sinTable - Sin value table based on a circle divided into equal sections; the number of sections is the number of data
216 
217   Returns:      None.
218  *---------------------------------------------------------------------------*/
MATHi_IFFT(fx32 * data,u32 nShift,const fx16 * sinTable)219 void MATHi_IFFT(fx32 *data, u32 nShift, const fx16 *sinTable)
220 {
221     u32     i, j;
222     u32     n = 1U << nShift;
223     u32     n2 = n << 1;
224     u32     nq = n >> 2;
225 
226     // Bit inversion
227     {
228         u32     rev = 0;
229         u32     shift = 32 - nShift - 1;        // -1 is used to double j
230         for (i = 0; i < n2; i += 2)
231         {
232             j = rev >> shift;          // Logical shift
233             if (i < j)
234             {
235                 // i and j have already been doubled
236                 SWAP_FX32(data[i], data[j]);
237                 SWAP_FX32(data[i + 1], data[j + 1]);
238             }
239             // Treat rev as a 32-bit inverted integer and increment
240             {
241                 u32     s;
242                 s = MATH_CLZ(~rev);
243                 rev ^= (((s32)(0x80000000U)) >> s);     // Arithmetic shift
244             }
245         }
246     }
247 
248     // Butterfly
249     // To Do: Adjust the loop order so that memory can be accessed in sequence.
250     {
251         u32     div, dt;
252 
253         dt = n;
254         // Build up FFT from small divisions
255         // Note that div and k have been doubled for future use
256         for (div = 2; div < n2; div <<= 1)
257         {
258             u32     k;
259             u32     di = div * 2;
260             u32     t;
261             dt >>= 1;
262 
263             // Perform calculation for 0th term in each division
264             for (i = 0; i < n2; i += di)
265             {
266                 fx32    xr, xi, yr, yi;
267                 u32     j = i + div;
268                 // i and j have already been doubled
269 
270                 // 0th term can be calculated with a simple sum and difference.
271                 // (wr, wi) == (1, 0)
272                 xr = data[i];
273                 xi = data[i + 1];
274                 yr = data[j];
275                 yi = data[j + 1];
276                 data[i] = xr + yr;
277                 data[i + 1] = xi + yi;
278                 data[j] = xr - yr;
279                 data[j + 1] = xi - yi;
280             }
281 
282             t = dt;
283 
284             // Calculate together the Kth terms that use the same rotation factor.
285             for (k = 2; k < div; k += 2)
286             {
287                 if (k != div / 2)
288                 {
289                     fx32    wr, wi, w1, w2;
290                     wr = sinTable[t + nq];      // cos(-x) == cos(x) == sin(x+(pi/2))
291                     wi = sinTable[t];
292                     t += dt;
293 
294                     w1 = wr + wi;
295                     w2 = wr - wi;
296 
297                     // Perform calculation for Kth term in each division
298                     for (i = k; i < n2; i += di)
299                     {
300                         fx32    xr, xi, yr, yi;
301                         u32     j = i + div;
302                         // i and j have already been doubled
303 
304                         // Twist
305                         xr = data[j];
306                         xi = data[j + 1];
307                         {
308                             // Equivalent to the following processes
309                             // yr = FX_Mul(wr, xr) - FX_Mul(wi, xi);
310                             // yi = FX_Mul(wr, xi) + FX_Mul(wi, xr);
311                             fx32    t = FX_Mul(wr, xr + xi);
312                             yr = t - FX_Mul(w1, xi);
313                             yi = t - FX_Mul(w2, xr);
314                         }
315 
316                         xr = data[i];
317                         xi = data[i + 1];
318                         data[i] = xr + yr;
319                         data[i + 1] = xi + yi;
320                         data[j] = xr - yr;
321                         data[j + 1] = xi - yi;
322                     }
323                 }
324                 else
325                 {
326                     t += dt;
327                     // Perform calculation for Kth term in each division
328                     for (i = k; i < n2; i += di)
329                     {
330                         fx32    xr, xi, yr, yi;
331                         u32     j = i + div;
332                         // i and j have already been doubled
333 
334                         // div/2-th term can be calculated with a simple sum and difference.
335                         // (wr, wi) == (0, 1)
336                         xr = data[i];
337                         xi = data[i + 1];
338                         yr = data[j];
339                         yi = data[j + 1];
340                         data[i] = xr - yi;
341                         data[i + 1] = xi + yr;
342                         data[j] = xr + yi;
343                         data[j + 1] = xi - yr;
344                     }
345                 }
346             }
347         }
348     }
349 }
350 
351 #if 0
352 void MATHi_FFT_alter(fx32 *data, u32 nShift, const fx16 *sinTable, BOOL inverse)
353 {
354     u32     i, j;
355     u32     n = 1 << nShift;
356     u32     n2 = n << 1;
357     u32     nq = n >> 2;
358 
359     // Bit inversion
360     {
361         u32     rev = 0;
362         u32     shift = 32 - nShift - 1;        // -1 is used to double j
363         for (i = 0; i < n2; i += 2)
364         {
365             j = rev >> shift;          // Logical shift
366             if (i < j)
367             {
368                 // i and j have already been doubled
369                 SWAP_FX32(data[i], data[j]);
370                 SWAP_FX32(data[i + 1], data[j + 1]);
371             }
372             // Treat rev as a 32-bit inverted integer and increment
373             {
374                 u32     s;
375                 s = MATH_CLZ(~rev);
376                 rev ^= (((s32)(0x80000000U)) >> s);     // Arithmetic shift
377             }
378         }
379     }
380 
381     // Butterfly
382     {
383         u32     div, dt;
384 
385         dt = n;
386         // Build up FFT from small divisions
387         // Note that div and k have been doubled for future use
388         for (div = 2; div < n2; div <<= 1)
389         {
390             u32     k;
391             u32     di = div * 2;
392             u32     t;
393             dt >>= 1;
394 
395             for (i = 0; i < n2; i += di)
396             {
397                 {
398                     fx32    xr, xi, yr, yi;
399                     u32     j = i + div;
400                     // i and j have already been doubled
401 
402                     // 0th term can be calculated with a simple sum and difference.
403                     xr = data[i];
404                     xi = data[i + 1];
405                     yr = data[j];
406                     yi = data[j + 1];
407                     data[i] = xr + yr;
408                     data[i + 1] = xi + yi;
409                     data[j] = xr - yr;
410                     data[j + 1] = xi - yi;
411                 }
412 
413                 t = dt;
414 
415                 for (k = 2; k < div; k += 2)
416                 {
417                     fx32    wr, wi;
418                     wr = sinTable[t + nq];      // cos(-x) == cos(x) == sin(x+(pi/2))
419                     wi = sinTable[t];
420                     if (!inverse)
421                     {
422                         wi = -wi;      // sin(-x) = -sin(x)
423                     }
424                     t += dt;
425 
426                     {
427                         fx32    xr, xi, yr, yi;
428                         u32     ik = i + k;
429                         u32     j = ik + div;
430                         // ik and j have already been doubled
431 
432                         // Twist
433                         xr = data[j];
434                         xi = data[j + 1];
435                         {
436                             // Equivalent to the following processes
437                             // yr = FX_Mul(wr, xr) - FX_Mul(wi, xi);
438                             // yi = FX_Mul(wr, xi) + FX_Mul(wi, xr);
439                             fx32    t = FX_Mul(wr, xr + xi);
440                             yr = t - FX_Mul(wr + wi, xi);
441                             yi = t - FX_Mul(wr - wi, xr);
442                         }
443 
444                         xr = data[ik];
445                         xi = data[ik + 1];
446                         data[ik] = xr + yr;
447                         data[ik + 1] = xi + yi;
448                         data[j] = xr - yr;
449                         data[j + 1] = xi - yi;
450                     }
451                 }
452             }
453         }
454     }
455 }
456 #endif
457 
458 /*---------------------------------------------------------------------------*
459   Name:         MATH_FFT
460 
461   Description:  Performs fast Fourier transform.
462 
463   Arguments:    data - Data to transform, with even-numbered data being real and odd-numbered data being imaginary.
464                        The transformation result will be returned overwritten.
465                 nShift - log2 of the number of data
466                 sinTable - Sin value table based on a circle divided into equal sections; the number of sections is the number of data
467 
468   Returns:      None.
469  *---------------------------------------------------------------------------*/
MATH_FFT(fx32 * data,u32 nShift,const fx16 * sinTable)470 void MATH_FFT(fx32 *data, u32 nShift, const fx16 *sinTable)
471 {
472     u32     i;
473     u32     n = 1U << nShift;
474     u32     n2 = n * 2;
475 
476     MATHi_FFT(data, nShift, sinTable);
477 
478     // Divide the result by n
479     for (i = 0; i < n2; i++)
480     {
481         data[i] >>= nShift;
482     }
483 }
484 
485 /*---------------------------------------------------------------------------*
486   Name:         MATH_IFFT
487 
488   Description:  Performs the inverse transformation of fast Fourier transform.
489 
490   Arguments:    data - Data to transform, with even-numbered data being real and odd-numbered data being imaginary.
491                        The transformation result will be returned overwritten.
492                 nShift - log2 of the number of data
493                 sinTable - Sin value table based on a circle divided into equal sections; the number of sections is the number of data
494 
495   Returns:      None.
496  *---------------------------------------------------------------------------*/
MATH_IFFT(fx32 * data,u32 nShift,const fx16 * sinTable)497 void MATH_IFFT(fx32 *data, u32 nShift, const fx16 *sinTable)
498 {
499     MATHi_IFFT(data, nShift, sinTable);
500 }
501 
502 /*---------------------------------------------------------------------------*
503   Name:         MATH_FFTReal
504 
505   Description:  Performs fast Fourier transform.
506 
507   Arguments:    data - Data that includes only real number data.
508                        The transformation result will be returned overwritten.
509                 nShift - log2 of the number of data
510                 sinTable - Sin value table based on a circle divided into equal sections; the number of sections is the number of data
511 
512   Returns:      None.
513  *---------------------------------------------------------------------------*/
MATH_FFTReal(fx32 * data,u32 nShift,const fx16 * sinTable,const fx16 * sinTable2)514 void MATH_FFTReal(fx32 *data, u32 nShift, const fx16 *sinTable, const fx16 *sinTable2)
515 {
516     u32     i, j, k;
517     u32     n = 1U << nShift;
518     u32     nq = n >> 2;
519 
520     MATHi_FFT(data, nShift - 1, sinTable2);
521 
522     for (k = 1; k < nq; k++)
523     {
524         fx32    xr, xi, yr, yi, zr, zi;
525         fx32    wr, wi;
526 
527         i = k * 2;
528         j = n - i;                     // == (n/2-k)*2
529 
530         wr = sinTable[k + nq];         // sin(x+(pi/2)) == cos(x)
531         wi = sinTable[k];
532 
533         {
534             fx32    d1r, d1i, d2r, d2i;
535             d1r = data[i];
536             d1i = data[i + 1];
537             d2r = data[j];
538             d2i = data[j + 1];
539             yr = (d1r + d2r) >> 1;
540             yi = (d1i - d2i) >> 1;
541             zr = (d1i + d2i) >> 1;
542             zi = -(d1r - d2r) >> 1;
543         }
544 
545         // xr = FX_Mul(wr, zr) + FX_Mul(wi, zi);
546         // xi = FX_Mul(wr, zi) - FX_Mul(wi, zr);
547         {
548             fx32    t = FX_Mul(wr, zr + zi);
549             xr = t - FX_Mul(wr - wi, zi);
550             xi = t - FX_Mul(wr + wi, zr);
551         }
552 
553         data[i] = yr + xr;
554         data[i + 1] = yi + xi;
555         data[j] = yr - xr;
556         data[j + 1] = -yi + xi;
557     }
558     {
559         fx32    xr, xi;
560         xr = data[0];
561         xi = data[1];
562         data[0] = xr + xi;
563         data[1] = xr - xi;
564     }
565 
566     for (i = 0; i < n; i++)
567     {
568         data[i] >>= nShift;
569     }
570 
571 }
572 
573 /*---------------------------------------------------------------------------*
574   Name:         MATH_IFFTReal
575 
576   Description:  Performs the inverse transformation of fast Fourier transform.
577 
578   Arguments:    data - Data that includes only real number data.
579                        The transformation result will be returned overwritten.
580                 nShift - log2 of the number of data
581                 sinTable - Sin value table based on a circle divided into equal sections; the number of sections is the number of data
582 
583   Returns:      None.
584  *---------------------------------------------------------------------------*/
MATH_IFFTReal(fx32 * data,u32 nShift,const fx16 * sinTable,const fx16 * sinTable2)585 void MATH_IFFTReal(fx32 *data, u32 nShift, const fx16 *sinTable, const fx16 *sinTable2)
586 {
587     u32     i, j, k;
588     u32     n = 1U << nShift;
589     u32     nq = n >> 2;
590 
591     for (k = 1; k < nq; k++)
592     {
593         fx32    yr, yi, zr, zi, xr, xi;
594         fx32    wr, wi;
595 
596         i = k * 2;
597         j = n - i;                     // == (n/2-k)*2
598 
599         wr = sinTable[k + nq];         // cos(-x) == sin(x+(pi/2))
600         wi = -sinTable[k];             // sin(-x) == -sin(x)
601 
602         {
603             fx32    d1r, d1i, d2r, d2i;
604             d1r = data[i];
605             d1i = data[i + 1];
606             d2r = data[j];
607             d2i = data[j + 1];
608             yr = (d1r + d2r);
609             yi = (d1i - d2i);
610             zr = -(d1i + d2i);
611             zi = (d1r - d2r);
612         }
613 
614         // xr = FX_Mul(wr, zr) + FX_Mul(wi, zi);
615         // xi = FX_Mul(wr, zi) - FX_Mul(wi, zr);
616         {
617             fx32    t = FX_Mul(wr, zr + zi);
618             xr = t - FX_Mul(wr - wi, zi);
619             xi = t - FX_Mul(wr + wi, zr);
620         }
621 
622         data[i] = yr + xr;
623         data[i + 1] = yi + xi;
624         data[j] = yr - xr;
625         data[j + 1] = -yi + xi;
626     }
627     {
628         fx32    xr, xi;
629         xr = data[0];
630         xi = data[1];
631         data[0] = (xr + xi);
632         data[1] = (xr - xi);
633         data[n / 2] <<= 1;
634         data[n / 2 + 1] <<= 1;
635     }
636 
637     MATHi_IFFT(data, nShift - 1, sinTable2);
638 }
639 
640 #if defined(USE_MATH_DFT)
MATH_DFT(fx32 * data,fx32 * ret,u32 nShift,const fx16 * sinTable)641 void MATH_DFT(fx32 *data, fx32 *ret, u32 nShift, const fx16 *sinTable)
642 {
643 	u32    i;
644 	u32    n2 = 1U << (nShift + 1);
645 
646 	MATHi_DFT( data, ret, nShift, sinTable );
647 
648 	// Divide the result by n
649     for (i = 0; i < n2; i++)
650     {
651         ret[i] >>= nShift;
652     }
653 }
654 
MATHi_DFT(fx32 * data,fx32 * ret,u32 nShift,const fx16 * sinTable)655 static void MATHi_DFT(fx32 *data, fx32 *ret, u32 nShift, const fx16 *sinTable)
656 {
657 	u32    n = 1U << nShift;
658 	u32    n2 = n << 1;
659     u32     nq = n >> 2;
660 	u32    i, j, k;
661 	fx32   wr, wi;
662 	u32    rad, minus;
663 	fx32   tr, ti;
664 
665 	for(j=0;j<n;j++)
666 	{
667 		ret[j * 2]     = 0;
668 		ret[j * 2 + 1] = 0;
669 
670 		for(k=0;k<n;k++)
671 		{
672 			rad = j * k;
673 			rad %= n;
674 			if( rad < n/2 )
675 			{
676 				minus = 0;
677 			}
678 			else
679 			{
680 				rad -= n/2;
681 				minus = 1;
682 			}
683 			wr = sinTable[rad + nq];   // cos(-x) == cos(x) == sin(x+(pi/2))
684 			wi = sinTable[rad];
685 			if( minus == 1 )
686 			{
687 				wr = ~wr + 1L;		//- 100%
688 				wi = ~wi + 1L;		//- 100%
689 			}
690 			tr = FX_Mul(data[k * 2], wr) + FX_Mul(data[k * 2 + 1], wi);
691 			ti = FX_Mul(data[k * 2 + 1], wr) - FX_Mul(data[k * 2], wi);
692 			ret[j * 2]     += tr;
693 			ret[j * 2 + 1] += ti;
694 #if 0
695 			if(j == 1)
696 			{
697 				// tr, ti, wr, wi
698 				OS_Printf("val : %f, %f, %f, %f\n",
699 						  FX_FX32_TO_F32(tr), FX_FX32_TO_F32(ti),
700 						  FX_FX32_TO_F32(wr), FX_FX32_TO_F32(wi) );
701 			}
702 #endif
703 			//if( tr > 10000 )
704 			{
705 				//OS_Printf("tr = %f, ti = %f\n", FX_FX32_TO_F32(tr), FX_FX32_TO_F32(ti));
706 				//OS_Printf("    wr = %f, wi = %f\n", FX_FX32_TO_F32(wr), FX_FX32_TO_F32(wi));
707 				//OS_Printf("    rr = %f, ri = %f\n", FX_FX32_TO_F32(data[k * 2]), FX_FX32_TO_F32(data[k * 2 + 1]));
708 			}
709 		}
710 	}
711 }
712 #endif
713