1 /*---------------------------------------------------------------------------*
2   Project:  Low level locked cache API demo
3   File:     lockedcachedemo2.c
4 
5   Copyright 1998, 1999 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Log: lockedcachedemo2.c,v $
14   Revision 1.2  02/20/2006 04:13:11  mitu
15   changed include path from dolphin/ to revolution/.
16 
17   Revision 1.1  01/13/2006 11:24:13  hiratsu
18   Initial check in.
19 
20 
21     3     6/11/01 7:54p Tian
22     bzero definition not required for SN
23 
24     2     6/08/00 12:45p Tian
25     Corrected PMC usage to enable cycle count last.
26 
27     1     2/15/00 7:03p Tian
28     Initial check-in.
29   $NoKeywords: $
30  *---------------------------------------------------------------------------*/
31 #include <revolution.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 
36 // Low level locked cache api demo
37 
38 /*---------------------------------------------------------------------------*
39   Performance monitor macros
40  *---------------------------------------------------------------------------*/
41 // STARTPMC sets both MMCRs (monitor control registers) going.
42 // PMC1 measures instruction count
43 // PMC2 measures # of loads and stores
44 // PMC3 measures # of cycles lost to L1 misses
45 // PMC4 measures cycle count
46 // Note : cycle counter is turned on last
47 #define STARTPMC            PPCMtmmcr0(MMCR0_PMC1_INSTRUCTION |   \
48                                        MMCR0_PMC2_LOAD_STORE);    \
49                             PPCMtmmcr1(MMCR1_PMC3_L1_MISS_CYCLE | \
50                                        MMCR1_PMC4_CYCLE);
51 
52 // STOPPMC pauses all performance counters by writing 0 to the MMCRs.
53 // Note that cycle counter is turned off first.
54 #define STOPPMC             PPCMtmmcr1(0); \
55                             PPCMtmmcr0(0);
56 
57 #define PRINTPMC            OSReport("<%d loadstores / %d miss cycles / %d cycles / %d Instructions>\n", \
58                                      PPCMfpmc2(), PPCMfpmc3(), PPCMfpmc4(), PPCMfpmc1());
59 
60 #define RESETPMC            PPCMtpmc1(0); \
61                             PPCMtpmc2(0); \
62                             PPCMtpmc3(0); \
63                             PPCMtpmc4(0);
64 
65 
66 
67 /*---------------------------------------------------------------------------*
68   Buffer management
69  *---------------------------------------------------------------------------*/
70 // use 4 4k buffers
71 // note that NUMBUFFERS * BUFFER_SIZE <= 16k
72 #define BUFFER_SIZE         (4*1024)
73 #define NUM_BUFFERS         (4)
74 #define DATA_ELEMENTS       (2*1024*1024)
75 
76 // value to write to each buffer
77 #define TESTVALUE           0xA
78 
79 void VerifyData(u8* buffer, u8 value);
80 void ProcessBuf(u8* buffer);
81 #ifndef __SN__
82 void bzero(void*, u32);
83 #endif
84 // real memory location of Buffers[i] is at BufAddr[i]
85 u8*                         Buffers[NUM_BUFFERS];
86 u8*                         BufAddr[NUM_BUFFERS];
87 #ifndef __SN__
bzero(void * ptr,u32 bytes)88 void bzero(void* ptr, u32 bytes)
89 {
90     u32     i;
91     u8*     p = ptr;
92 
93     for (i = 0; i < bytes; i++)
94     {
95         p[i] = 0;
96     }
97 }
98 #endif
99 
100 // verify that the buffer pointed to by buffer is equal to value
VerifyData(u8 * buffer,u8 value)101 void VerifyData(u8* buffer, u8 value)
102 {
103     u32 i;
104 
105     for (i = 0; i < DATA_ELEMENTS; i++)
106     {
107         if (buffer[i] != value)
108         {
109             OSReport("ERROR : Buffer[%d]@0x%x (%d) != %d\n",
110                      i,
111                      buffer + i,
112                      buffer[i],
113                      value);
114             OSHalt("Test failed");
115         }
116     }
117 }
118 
119 
120 // trivial buffer processing - increment each buffer element with TESTVALUE
121 // this ensures we perform a READ on the data.
ProcessBuf(u8 * buffer)122 void ProcessBuf(u8* buffer)
123 {
124     u32 i;
125     u8  val;
126 
127     for (i = 0; i < BUFFER_SIZE; i++)
128     {
129         // Because this loop might overrun the locked cache
130         // during a mispredicted branch, pad the top of the loop
131         // with a bunch of non-loads.
132         val = TESTVALUE;
133         val *= 2;
134         val /= 2;
135         buffer[i] = (u8)(buffer[i] + val);
136     }
137 }
138 
139 
main()140 void main ()
141 {
142     u8*         data;
143     u8*         currDataPtr;        // offset into data
144     u32         i;
145     void*       arenaLo;
146     void*       arenaHi;
147 
148 #ifndef GEKKO
149     OSHalt("This test is GEKKO specific");
150 #endif
151     OSInit();
152 
153     OSReport(" Locked Cache Demo 1 : ");
154     OSReport("using high level interface for DMA load/store \n");
155 
156     LCEnable();
157 
158     arenaLo = OSGetArenaLo();
159     arenaHi = OSGetArenaHi();
160 
161     // OSInitAlloc should only ever be invoked once.
162     arenaLo = OSInitAlloc(arenaLo, arenaHi, 1); // 1 heap
163     OSSetArenaLo(arenaLo);
164 
165     // Ensure boundaries are 32B aligned
166     arenaLo = (void*)OSRoundUp32B(arenaLo);
167     arenaHi = (void*)OSRoundDown32B(arenaHi);
168 
169     // The boundaries given to OSCreateHeap should be 32B aligned
170     OSSetCurrentHeap(OSCreateHeap(arenaLo, arenaHi));
171     // From here on out, OSAlloc and OSFree behave like malloc and free
172     // respectively
173     OSSetArenaLo(arenaLo=arenaHi);
174 
175     OSReport("Splitting locked cache into %d buffers\n", NUM_BUFFERS);
176 
177     for (i = 0; i < NUM_BUFFERS; i++)
178     {
179         Buffers[i] = (u8*) ((u32)LCGetBase() + BUFFER_SIZE*i);
180         OSReport("  Locked Cache : Allocated %d bytes at 0x%x\n",
181                  BUFFER_SIZE,
182                  Buffers[i]);
183     }
184 
185     // Initialize source data
186     data = (u8*)OSAlloc(DATA_ELEMENTS * sizeof(u8));
187     OSReport("Initializing source data <0x%x - 0x%x> to all 0's\n",
188              data,
189              data + DATA_ELEMENTS);
190     bzero(data, DATA_ELEMENTS);
191     DCFlushRange(data, DATA_ELEMENTS);
192 
193     // Initialize the first buffers
194     for (i = 0; i < NUM_BUFFERS; i++)
195     {
196         BufAddr[i]      = data + BUFFER_SIZE*i;
197         LCLoadBlocks(Buffers[i], BufAddr[i], 0); // 0 is max number of lines
198     }
199 
200     currDataPtr = data + (BUFFER_SIZE * NUM_BUFFERS);
201 
202     RESETPMC
203     STARTPMC
204     LCQueueWait(NUM_BUFFERS-1);
205 
206     while (currDataPtr < data+DATA_ELEMENTS)
207     {
208         for (i = 0; i < NUM_BUFFERS; i++)
209         {
210             // prevstore + prevload, each takes 2
211             LCQueueWait((NUM_BUFFERS-1)*2);
212             ProcessBuf(Buffers[i]);
213             LCStoreBlocks(BufAddr[i], Buffers[i], 0);
214             BufAddr[i] = currDataPtr;   // move to next unprocessed buffer
215             LCLoadBlocks(Buffers[i], BufAddr[i], 0);
216             // advance the next block to be read
217             currDataPtr += BUFFER_SIZE;
218         }
219     }
220 
221     // process last buffers
222     for (i = 0; i < NUM_BUFFERS; i++)
223     {
224         ProcessBuf(Buffers[i]);
225         LCStoreBlocks(BufAddr[i], Buffers[i],  0);
226     }
227 
228     LCQueueWait(NUM_BUFFERS);
229     STOPPMC
230     OSReport("Dumping performance monitors- L1 miss cycles should be zero:\n");
231     PRINTPMC
232 
233     OSReport("Verifying data...\n");
234     VerifyData(data,TESTVALUE);
235 
236     OSHalt("Demo complete");
237 }
238