1 /*---------------------------------------------------------------------------*
2   Project:  High level locked cache API demo
3   File:     lockedcachedemo1.c
4 
5   Copyright 1998, 1999 Nintendo.  All rights reserved.
6 
7   These coded instructions, statements, and computer programs contain
8   proprietary information of Nintendo of America Inc. and/or Nintendo
9   Company Ltd., and are protected by Federal copyright law.  They may
10   not be disclosed to third parties or copied or duplicated in any form,
11   in whole or in part, without the prior written consent of Nintendo.
12 
13   $Log: lockedcachedemo1.c,v $
14   Revision 1.2  02/20/2006 04:13:11  mitu
15   changed include path from dolphin/ to revolution/.
16 
17   Revision 1.1  01/13/2006 11:24:13  hiratsu
18   Initial check in.
19 
20 
21     3     6/11/01 7:54p Tian
22     bzero definition not required for SN
23 
24     2     6/08/00 12:45p Tian
25     Corrected PMC usage to enable cycle count last.
26 
27     1     2/15/00 7:03p Tian
28     Initial check-in.
29   $NoKeywords: $
30  *---------------------------------------------------------------------------*/
31 #include <revolution.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 
36 // High level locked cache api demo
37 
38 /*---------------------------------------------------------------------------*
39   Performance monitor macros
40  *---------------------------------------------------------------------------*/
41 // STARTPMC sets both MMCRs (monitor control registers) going.
42 // PMC1 measures instruction count
43 // PMC2 measures # of loads and stores
44 // PMC3 measures # of cycles lost to L1 misses
45 // PMC4 measures cycle count
46 // Note : cycle counter is turned on last
47 #define STARTPMC            PPCMtmmcr0(MMCR0_PMC1_INSTRUCTION |   \
48                                        MMCR0_PMC2_LOAD_STORE);    \
49                             PPCMtmmcr1(MMCR1_PMC3_L1_MISS_CYCLE | \
50                                        MMCR1_PMC4_CYCLE);
51 
52 // STOPPMC pauses all performance counters by writing 0 to the MMCRs.
53 // Note that cycle counter is turned off first.
54 #define STOPPMC             PPCMtmmcr1(0); \
55                             PPCMtmmcr0(0);
56 
57 #define PRINTPMC            OSReport("<%d loadstores / %d miss cycles / %d cycles / %d Instructions>\n", \
58                                      PPCMfpmc2(), PPCMfpmc3(), PPCMfpmc4(), PPCMfpmc1());
59 
60 #define RESETPMC            PPCMtpmc1(0); \
61                             PPCMtpmc2(0); \
62                             PPCMtpmc3(0); \
63                             PPCMtpmc4(0);
64 
65 
66 
67 /*---------------------------------------------------------------------------*
68   Buffer management
69  *---------------------------------------------------------------------------*/
70 // use 2 8k buffers
71 // note that NUMBUFFERS * BUFFER_SIZE <= 16k
72 #define BUFFER_SIZE         (8*1024)
73 #define NUM_BUFFERS         (2)
74 #define DATA_ELEMENTS       (2*1024*1024)
75 
76 // value to write to each buffer
77 #define TESTVALUE           0xA
78 
79 void VerifyData(u8* buffer, u8 value);
80 void ProcessBuf(u8* buffer);
81 #ifndef __SN__
82 void bzero(void*, u32);
83 #endif
84 // real memory location of Buffers[i] is at BufAddr[i]
85 u8*                         Buffers[NUM_BUFFERS];
86 u8*                         BufAddr[NUM_BUFFERS];
87 #ifndef __SN__
bzero(void * ptr,u32 bytes)88 void bzero(void* ptr, u32 bytes)
89 {
90     u32     i;
91     u8*     p = ptr;
92 
93     for (i = 0; i < bytes; i++)
94     {
95         p[i] = 0;
96     }
97 }
98 #endif
99 
100 // verify that the buffer pointed to by buffer is equal to value
VerifyData(u8 * buffer,u8 value)101 void VerifyData(u8* buffer, u8 value)
102 {
103     u32 i;
104 
105     for (i = 0; i < DATA_ELEMENTS; i++)
106     {
107         if (buffer[i] != value)
108         {
109             OSReport("ERROR : Buffer[%d]@0x%x (%d) != %d\n",
110                      i,
111                      buffer + i,
112                      buffer[i],
113                      value);
114             OSHalt("Test failed");
115         }
116     }
117 }
118 
119 
120 // trivial buffer processing - increment each buffer element with TESTVALUE
121 // this ensures we perform a READ on the data.
ProcessBuf(u8 * buffer)122 void ProcessBuf(u8* buffer)
123 {
124     u32 i;
125     u8  val;
126 
127     for (i = 0; i < BUFFER_SIZE; i++)
128     {
129         // Because this loop might overrun the locked cache
130         // during a mispredicted branch, pad the top of the loop
131         // with a bunch of non-loads.
132         val = TESTVALUE;
133         val *= 2;
134         val /= 2;
135         buffer[i] = (u8)(buffer[i] + val);
136     }
137 }
138 
139 
main()140 void main ()
141 {
142     u8*         data;
143     u8*         currDataPtr;        // offset into data
144     u32         i;
145     void*       arenaLo;
146     void*       arenaHi;
147     u32         numTransactions;
148 
149 #ifndef GEKKO
150     OSHalt("This test is GEKKO specific");
151 #endif
152     OSInit();
153 
154     OSReport(" Locked Cache Demo 1 : ");
155     OSReport("using high level interface for DMA load/store \n");
156 
157     LCEnable();
158 
159     arenaLo = OSGetArenaLo();
160     arenaHi = OSGetArenaHi();
161 
162     // OSInitAlloc should only ever be invoked once.
163     arenaLo = OSInitAlloc(arenaLo, arenaHi, 1); // 1 heap
164     OSSetArenaLo(arenaLo);
165 
166     // Ensure boundaries are 32B aligned
167     arenaLo = (void*)OSRoundUp32B(arenaLo);
168     arenaHi = (void*)OSRoundDown32B(arenaHi);
169 
170     // The boundaries given to OSCreateHeap should be 32B aligned
171     OSSetCurrentHeap(OSCreateHeap(arenaLo, arenaHi));
172     // From here on out, OSAlloc and OSFree behave like malloc and free
173     // respectively
174     OSSetArenaLo(arenaLo=arenaHi);
175 
176     OSReport("Splitting locked cache into %d buffers\n", NUM_BUFFERS);
177 
178     for (i = 0; i < NUM_BUFFERS; i++)
179     {
180         Buffers[i] = (u8*) ((u32)LCGetBase() + BUFFER_SIZE*i);
181         OSReport("  Locked Cache : Allocated %d bytes at 0x%x\n",
182                  BUFFER_SIZE,
183                  Buffers[i]);
184     }
185 
186     // Initialize source data
187     data = (u8*)OSAlloc(DATA_ELEMENTS * sizeof(u8));
188     OSReport("Initializing source data <0x%x - 0x%x> to all 0's\n",
189              data,
190              data + DATA_ELEMENTS);
191     bzero(data, DATA_ELEMENTS);
192     DCFlushRange(data, DATA_ELEMENTS);
193 
194     // Initialize the first buffers
195     for (i = 0; i < NUM_BUFFERS; i++)
196     {
197         BufAddr[i]      = data + BUFFER_SIZE*i;
198         numTransactions = LCLoadData(Buffers[i], BufAddr[i], BUFFER_SIZE);
199     }
200 
201     currDataPtr = data + (BUFFER_SIZE * NUM_BUFFERS);
202 
203     RESETPMC
204     STARTPMC
205     LCQueueWait((NUM_BUFFERS-1) * 4);
206 
207     while (currDataPtr < data+DATA_ELEMENTS)
208     {
209         for (i = 0; i < NUM_BUFFERS; i++)
210         {
211             // prevstore + prevload, each takes 2
212             LCQueueWait((NUM_BUFFERS-1)*numTransactions);
213             ProcessBuf(Buffers[i]);
214             LCStoreData(BufAddr[i], Buffers[i],  BUFFER_SIZE);
215             BufAddr[i] = currDataPtr;   // move to next unprocessed buffer
216             LCLoadData(Buffers[i], BufAddr[i], BUFFER_SIZE);
217             // advance the next block to be read
218             currDataPtr += BUFFER_SIZE;
219         }
220     }
221 
222     // process last buffers
223     for (i = 0; i < NUM_BUFFERS; i++)
224     {
225         ProcessBuf(Buffers[i]);
226         LCStoreData(BufAddr[i], Buffers[i],  BUFFER_SIZE);
227     }
228 
229     LCQueueWait(NUM_BUFFERS);
230     STOPPMC
231     OSReport("Dumping performance monitors- L1 miss cycles should be zero:\n");
232     PRINTPMC
233 
234     OSReport("Verifying data...\n");
235     VerifyData(data,TESTVALUE);
236 
237     OSHalt("Demo complete");
238 }
239