1 /*---------------------------------------------------------------------------*
2 Project: High level locked cache API demo
3 File: lockedcachedemo1.c
4
5 Copyright 1998, 1999 Nintendo. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Log: lockedcachedemo1.c,v $
14 Revision 1.2 02/20/2006 04:13:11 mitu
15 changed include path from dolphin/ to revolution/.
16
17 Revision 1.1 01/13/2006 11:24:13 hiratsu
18 Initial check in.
19
20
21 3 6/11/01 7:54p Tian
22 bzero definition not required for SN
23
24 2 6/08/00 12:45p Tian
25 Corrected PMC usage to enable cycle count last.
26
27 1 2/15/00 7:03p Tian
28 Initial check-in.
29 $NoKeywords: $
30 *---------------------------------------------------------------------------*/
31 #include <revolution.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35
36 // High level locked cache api demo
37
38 /*---------------------------------------------------------------------------*
39 Performance monitor macros
40 *---------------------------------------------------------------------------*/
41 // STARTPMC sets both MMCRs (monitor control registers) going.
42 // PMC1 measures instruction count
43 // PMC2 measures # of loads and stores
44 // PMC3 measures # of cycles lost to L1 misses
45 // PMC4 measures cycle count
46 // Note : cycle counter is turned on last
47 #define STARTPMC PPCMtmmcr0(MMCR0_PMC1_INSTRUCTION | \
48 MMCR0_PMC2_LOAD_STORE); \
49 PPCMtmmcr1(MMCR1_PMC3_L1_MISS_CYCLE | \
50 MMCR1_PMC4_CYCLE);
51
52 // STOPPMC pauses all performance counters by writing 0 to the MMCRs.
53 // Note that cycle counter is turned off first.
54 #define STOPPMC PPCMtmmcr1(0); \
55 PPCMtmmcr0(0);
56
57 #define PRINTPMC OSReport("<%d loadstores / %d miss cycles / %d cycles / %d Instructions>\n", \
58 PPCMfpmc2(), PPCMfpmc3(), PPCMfpmc4(), PPCMfpmc1());
59
60 #define RESETPMC PPCMtpmc1(0); \
61 PPCMtpmc2(0); \
62 PPCMtpmc3(0); \
63 PPCMtpmc4(0);
64
65
66
67 /*---------------------------------------------------------------------------*
68 Buffer management
69 *---------------------------------------------------------------------------*/
70 // use 2 8k buffers
71 // note that NUMBUFFERS * BUFFER_SIZE <= 16k
72 #define BUFFER_SIZE (8*1024)
73 #define NUM_BUFFERS (2)
74 #define DATA_ELEMENTS (2*1024*1024)
75
76 // value to write to each buffer
77 #define TESTVALUE 0xA
78
79 void VerifyData(u8* buffer, u8 value);
80 void ProcessBuf(u8* buffer);
81 #ifndef __SN__
82 void bzero(void*, u32);
83 #endif
84 // real memory location of Buffers[i] is at BufAddr[i]
85 u8* Buffers[NUM_BUFFERS];
86 u8* BufAddr[NUM_BUFFERS];
87 #ifndef __SN__
bzero(void * ptr,u32 bytes)88 void bzero(void* ptr, u32 bytes)
89 {
90 u32 i;
91 u8* p = ptr;
92
93 for (i = 0; i < bytes; i++)
94 {
95 p[i] = 0;
96 }
97 }
98 #endif
99
100 // verify that the buffer pointed to by buffer is equal to value
VerifyData(u8 * buffer,u8 value)101 void VerifyData(u8* buffer, u8 value)
102 {
103 u32 i;
104
105 for (i = 0; i < DATA_ELEMENTS; i++)
106 {
107 if (buffer[i] != value)
108 {
109 OSReport("ERROR : Buffer[%d]@0x%x (%d) != %d\n",
110 i,
111 buffer + i,
112 buffer[i],
113 value);
114 OSHalt("Test failed");
115 }
116 }
117 }
118
119
120 // trivial buffer processing - increment each buffer element with TESTVALUE
121 // this ensures we perform a READ on the data.
ProcessBuf(u8 * buffer)122 void ProcessBuf(u8* buffer)
123 {
124 u32 i;
125 u8 val;
126
127 for (i = 0; i < BUFFER_SIZE; i++)
128 {
129 // Because this loop might overrun the locked cache
130 // during a mispredicted branch, pad the top of the loop
131 // with a bunch of non-loads.
132 val = TESTVALUE;
133 val *= 2;
134 val /= 2;
135 buffer[i] = (u8)(buffer[i] + val);
136 }
137 }
138
139
main()140 void main ()
141 {
142 u8* data;
143 u8* currDataPtr; // offset into data
144 u32 i;
145 void* arenaLo;
146 void* arenaHi;
147 u32 numTransactions;
148
149 #ifndef GEKKO
150 OSHalt("This test is GEKKO specific");
151 #endif
152 OSInit();
153
154 OSReport(" Locked Cache Demo 1 : ");
155 OSReport("using high level interface for DMA load/store \n");
156
157 LCEnable();
158
159 arenaLo = OSGetArenaLo();
160 arenaHi = OSGetArenaHi();
161
162 // OSInitAlloc should only ever be invoked once.
163 arenaLo = OSInitAlloc(arenaLo, arenaHi, 1); // 1 heap
164 OSSetArenaLo(arenaLo);
165
166 // Ensure boundaries are 32B aligned
167 arenaLo = (void*)OSRoundUp32B(arenaLo);
168 arenaHi = (void*)OSRoundDown32B(arenaHi);
169
170 // The boundaries given to OSCreateHeap should be 32B aligned
171 OSSetCurrentHeap(OSCreateHeap(arenaLo, arenaHi));
172 // From here on out, OSAlloc and OSFree behave like malloc and free
173 // respectively
174 OSSetArenaLo(arenaLo=arenaHi);
175
176 OSReport("Splitting locked cache into %d buffers\n", NUM_BUFFERS);
177
178 for (i = 0; i < NUM_BUFFERS; i++)
179 {
180 Buffers[i] = (u8*) ((u32)LCGetBase() + BUFFER_SIZE*i);
181 OSReport(" Locked Cache : Allocated %d bytes at 0x%x\n",
182 BUFFER_SIZE,
183 Buffers[i]);
184 }
185
186 // Initialize source data
187 data = (u8*)OSAlloc(DATA_ELEMENTS * sizeof(u8));
188 OSReport("Initializing source data <0x%x - 0x%x> to all 0's\n",
189 data,
190 data + DATA_ELEMENTS);
191 bzero(data, DATA_ELEMENTS);
192 DCFlushRange(data, DATA_ELEMENTS);
193
194 // Initialize the first buffers
195 for (i = 0; i < NUM_BUFFERS; i++)
196 {
197 BufAddr[i] = data + BUFFER_SIZE*i;
198 numTransactions = LCLoadData(Buffers[i], BufAddr[i], BUFFER_SIZE);
199 }
200
201 currDataPtr = data + (BUFFER_SIZE * NUM_BUFFERS);
202
203 RESETPMC
204 STARTPMC
205 LCQueueWait((NUM_BUFFERS-1) * 4);
206
207 while (currDataPtr < data+DATA_ELEMENTS)
208 {
209 for (i = 0; i < NUM_BUFFERS; i++)
210 {
211 // prevstore + prevload, each takes 2
212 LCQueueWait((NUM_BUFFERS-1)*numTransactions);
213 ProcessBuf(Buffers[i]);
214 LCStoreData(BufAddr[i], Buffers[i], BUFFER_SIZE);
215 BufAddr[i] = currDataPtr; // move to next unprocessed buffer
216 LCLoadData(Buffers[i], BufAddr[i], BUFFER_SIZE);
217 // advance the next block to be read
218 currDataPtr += BUFFER_SIZE;
219 }
220 }
221
222 // process last buffers
223 for (i = 0; i < NUM_BUFFERS; i++)
224 {
225 ProcessBuf(Buffers[i]);
226 LCStoreData(BufAddr[i], Buffers[i], BUFFER_SIZE);
227 }
228
229 LCQueueWait(NUM_BUFFERS);
230 STOPPMC
231 OSReport("Dumping performance monitors- L1 miss cycles should be zero:\n");
232 PRINTPMC
233
234 OSReport("Verifying data...\n");
235 VerifyData(data,TESTVALUE);
236
237 OSHalt("Demo complete");
238 }
239