1 /*---------------------------------------------------------------------------*
2 Project: Low level locked cache API demo
3 File: lockedcachedemo2.c
4
5 Copyright 1998, 1999 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Log: lockedcachedemo2.c,v $
14 Revision 1.2 02/20/2006 04:13:11 mitu
15 changed include path from dolphin/ to revolution/.
16
17 Revision 1.1 01/13/2006 11:24:13 hiratsu
18 Initial check in.
19
20
21 3 6/11/01 7:54p Tian
22 bzero definition not required for SN
23
24 2 6/08/00 12:45p Tian
25 Corrected PMC usage to enable cycle count last.
26
27 1 2/15/00 7:03p Tian
28 Initial check-in.
29 $NoKeywords: $
30 *---------------------------------------------------------------------------*/
31 #include <revolution.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35
36 // Low level locked cache api demo
37
38 /*---------------------------------------------------------------------------*
39 Performance monitor macros
40 *---------------------------------------------------------------------------*/
41 // STARTPMC sets both MMCRs (monitor control registers) going.
42 // PMC1 measures instruction count
43 // PMC2 measures # of loads and stores
44 // PMC3 measures # of cycles lost to L1 misses
45 // PMC4 measures cycle count
46 // Note : cycle counter is turned on last
47 #define STARTPMC PPCMtmmcr0(MMCR0_PMC1_INSTRUCTION | \
48 MMCR0_PMC2_LOAD_STORE); \
49 PPCMtmmcr1(MMCR1_PMC3_L1_MISS_CYCLE | \
50 MMCR1_PMC4_CYCLE);
51
52 // STOPPMC pauses all performance counters by writing 0 to the MMCRs.
53 // Note that cycle counter is turned off first.
54 #define STOPPMC PPCMtmmcr1(0); \
55 PPCMtmmcr0(0);
56
57 #define PRINTPMC OSReport("<%d loadstores / %d miss cycles / %d cycles / %d Instructions>\n", \
58 PPCMfpmc2(), PPCMfpmc3(), PPCMfpmc4(), PPCMfpmc1());
59
60 #define RESETPMC PPCMtpmc1(0); \
61 PPCMtpmc2(0); \
62 PPCMtpmc3(0); \
63 PPCMtpmc4(0);
64
65
66
67 /*---------------------------------------------------------------------------*
68 Buffer management
69 *---------------------------------------------------------------------------*/
70 // use 4 4k buffers
71 // note that NUMBUFFERS * BUFFER_SIZE <= 16k
72 #define BUFFER_SIZE (4*1024)
73 #define NUM_BUFFERS (4)
74 #define DATA_ELEMENTS (2*1024*1024)
75
76 // value to write to each buffer
77 #define TESTVALUE 0xA
78
79 void VerifyData(u8* buffer, u8 value);
80 void ProcessBuf(u8* buffer);
81 #ifndef __SN__
82 void bzero(void*, u32);
83 #endif
84 // real memory location of Buffers[i] is at BufAddr[i]
85 u8* Buffers[NUM_BUFFERS];
86 u8* BufAddr[NUM_BUFFERS];
87 #ifndef __SN__
bzero(void * ptr,u32 bytes)88 void bzero(void* ptr, u32 bytes)
89 {
90 u32 i;
91 u8* p = ptr;
92
93 for (i = 0; i < bytes; i++)
94 {
95 p[i] = 0;
96 }
97 }
98 #endif
99
100 // verify that the buffer pointed to by buffer is equal to value
VerifyData(u8 * buffer,u8 value)101 void VerifyData(u8* buffer, u8 value)
102 {
103 u32 i;
104
105 for (i = 0; i < DATA_ELEMENTS; i++)
106 {
107 if (buffer[i] != value)
108 {
109 OSReport("ERROR : Buffer[%d]@0x%x (%d) != %d\n",
110 i,
111 buffer + i,
112 buffer[i],
113 value);
114 OSHalt("Test failed");
115 }
116 }
117 }
118
119
120 // trivial buffer processing - increment each buffer element with TESTVALUE
121 // this ensures we perform a READ on the data.
ProcessBuf(u8 * buffer)122 void ProcessBuf(u8* buffer)
123 {
124 u32 i;
125 u8 val;
126
127 for (i = 0; i < BUFFER_SIZE; i++)
128 {
129 // Because this loop might overrun the locked cache
130 // during a mispredicted branch, pad the top of the loop
131 // with a bunch of non-loads.
132 val = TESTVALUE;
133 val *= 2;
134 val /= 2;
135 buffer[i] = (u8)(buffer[i] + val);
136 }
137 }
138
139
main()140 void main ()
141 {
142 u8* data;
143 u8* currDataPtr; // offset into data
144 u32 i;
145 void* arenaLo;
146 void* arenaHi;
147
148 #ifndef GEKKO
149 OSHalt("This test is GEKKO specific");
150 #endif
151 OSInit();
152
153 OSReport(" Locked Cache Demo 1 : ");
154 OSReport("using high level interface for DMA load/store \n");
155
156 LCEnable();
157
158 arenaLo = OSGetArenaLo();
159 arenaHi = OSGetArenaHi();
160
161 // OSInitAlloc should only ever be invoked once.
162 arenaLo = OSInitAlloc(arenaLo, arenaHi, 1); // 1 heap
163 OSSetArenaLo(arenaLo);
164
165 // Ensure boundaries are 32B aligned
166 arenaLo = (void*)OSRoundUp32B(arenaLo);
167 arenaHi = (void*)OSRoundDown32B(arenaHi);
168
169 // The boundaries given to OSCreateHeap should be 32B aligned
170 OSSetCurrentHeap(OSCreateHeap(arenaLo, arenaHi));
171 // From here on out, OSAlloc and OSFree behave like malloc and free
172 // respectively
173 OSSetArenaLo(arenaLo=arenaHi);
174
175 OSReport("Splitting locked cache into %d buffers\n", NUM_BUFFERS);
176
177 for (i = 0; i < NUM_BUFFERS; i++)
178 {
179 Buffers[i] = (u8*) ((u32)LCGetBase() + BUFFER_SIZE*i);
180 OSReport(" Locked Cache : Allocated %d bytes at 0x%x\n",
181 BUFFER_SIZE,
182 Buffers[i]);
183 }
184
185 // Initialize source data
186 data = (u8*)OSAlloc(DATA_ELEMENTS * sizeof(u8));
187 OSReport("Initializing source data <0x%x - 0x%x> to all 0's\n",
188 data,
189 data + DATA_ELEMENTS);
190 bzero(data, DATA_ELEMENTS);
191 DCFlushRange(data, DATA_ELEMENTS);
192
193 // Initialize the first buffers
194 for (i = 0; i < NUM_BUFFERS; i++)
195 {
196 BufAddr[i] = data + BUFFER_SIZE*i;
197 LCLoadBlocks(Buffers[i], BufAddr[i], 0); // 0 is max number of lines
198 }
199
200 currDataPtr = data + (BUFFER_SIZE * NUM_BUFFERS);
201
202 RESETPMC
203 STARTPMC
204 LCQueueWait(NUM_BUFFERS-1);
205
206 while (currDataPtr < data+DATA_ELEMENTS)
207 {
208 for (i = 0; i < NUM_BUFFERS; i++)
209 {
210 // prevstore + prevload, each takes 2
211 LCQueueWait((NUM_BUFFERS-1)*2);
212 ProcessBuf(Buffers[i]);
213 LCStoreBlocks(BufAddr[i], Buffers[i], 0);
214 BufAddr[i] = currDataPtr; // move to next unprocessed buffer
215 LCLoadBlocks(Buffers[i], BufAddr[i], 0);
216 // advance the next block to be read
217 currDataPtr += BUFFER_SIZE;
218 }
219 }
220
221 // process last buffers
222 for (i = 0; i < NUM_BUFFERS; i++)
223 {
224 ProcessBuf(Buffers[i]);
225 LCStoreBlocks(BufAddr[i], Buffers[i], 0);
226 }
227
228 LCQueueWait(NUM_BUFFERS);
229 STOPPMC
230 OSReport("Dumping performance monitors- L1 miss cycles should be zero:\n");
231 PRINTPMC
232
233 OSReport("Verifying data...\n");
234 VerifyData(data,TESTVALUE);
235
236 OSHalt("Demo complete");
237 }
238