1 /*---------------------------------------------------------------------------*
2 Project: Horizon
3 File: math_Matrix34.cpp
4
5 Copyright (C)2009-2012 Nintendo Co., Ltd. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Rev: 46347 $
14 *---------------------------------------------------------------------------*/
15
16 #include <nn/math.h>
17
18 #include <cmath>
19 #include <nn/math/ARMv6/math_Matrix34.h>
20
21 #if !defined(NN_MATH_AS_INLINE)
22 #include <nn/math/ARMv6/inline/math_Matrix34.ipp>
23 #endif
24
25 namespace nn {
26 namespace math {
27 namespace ARMv6 {
28
29 #include <nn/hw/ARM/code32.h>
30
31 NN_FUNC_ATTR_PRIVATE_SECTION
32 asm MTX34*
MTX34CopyAsm(MTX34 *,const MTX34 *)33 MTX34CopyAsm(MTX34* , const MTX34*)
34 {
35 CMP r1,r0 // Are p and pOut the same?
36 BXEQ lr // If the same, returns without copying
37 VLDMIA r1!,{s0-s5} // Segment and load to shorten stall times due to data hazards
38 MOV r2, r0 //
39 VLDMIA r1,{s6-s11}
40 VSTMIA r2!,{s0-s5}
41 VSTMIA r2,{s6-s11}
42 BX lr
43 }
44
45 NN_FUNC_ATTR_PRIVATE_SECTION
46 asm MTX34*
MTX34MultAsm_ORG(MTX34 *,const MTX34 *,const MTX34 *)47 MTX34MultAsm_ORG(MTX34*, const MTX34*, const MTX34*)
48 {
49 VPUSH {d8-d15} // Save registers
50
51 VLDMIA r2,{s8-s19} // The entire p2 matrix is put in the [S8-S19] registers
52 VLDMIA r1!,{s0-s7} // Matrix p1 is put into the [S0-S7] registers
53
54 VMUL.F32 s20,s8,s0
55 VMUL.F32 s21,s9,s0
56 VMUL.F32 s22,s10,s0
57 VMUL.F32 s23,s11,s0
58
59 VMUL.F32 s24,s8,s4
60 VMUL.F32 s25,s9,s4
61 VLDR.F32 s0,[r1,#0] // Continuation of p1
62 VMUL.F32 s26,s10,s4
63 VMUL.F32 s27,s11,s4
64
65 VMUL.F32 s28,s8,s0
66 VMUL.F32 s29,s9,s0
67 VMUL.F32 s30,s10,s0
68 VMUL.F32 s31,s11,s0
69
70 VMLA.F32 s20,s12,s1
71 VMLA.F32 s21,s13,s1
72 VLDR.F32 s4,[r1,#4] // Continuation of p1
73 VMLA.F32 s22,s14,s1
74 VMLA.F32 s23,s15,s1
75
76 VMLA.F32 s24,s12,s5
77 VMLA.F32 s25,s13,s5
78 VMLA.F32 s26,s14,s5
79 VMLA.F32 s27,s15,s5
80
81 VMLA.F32 s28,s12,s4
82 VMLA.F32 s29,s13,s4
83 VMLA.F32 s30,s14,s4
84 VMLA.F32 s31,s15,s4
85
86 VLDR.F32 s1,[r1,#8] // Continuation of p1
87 VMLA.F32 s23,s19,s2
88 VMLA.F32 s20,s16,s2
89 VMLA.F32 s21,s17,s2
90 VMLA.F32 s22,s18,s2
91
92 VLDR.F32 s5,[r1,#12] // Continuation of p1
93 VMLA.F32 s27,s19,s6
94 VMLA.F32 s24,s16,s6
95 VMLA.F32 s25,s17,s6
96 VMLA.F32 s26,s18,s6
97
98 VADD.F32 s23,s23,s3
99
100 VMLA.F32 s31,s19,s1
101 VMLA.F32 s28,s16,s1
102 VMLA.F32 s29,s17,s1
103 VMLA.F32 s30,s18,s1
104
105 VADD.F32 s27,s27,s7
106 VADD.F32 s31,s31,s5
107
108 VSTMIA r0!,{s20-s23} // Store result
109 VPOP {d8-d11} // Faster to segment POP
110 VSTMIA r0!,{s24-s27} // Store result
111 VPOP {d12-d13} // Faster to break into small pieces
112 VSTMIA r0,{s28-s31} // Store result
113 VPOP {d14-d15} // Faster to break into chunks
114 BX lr // Return
115 }
116
117 NN_FUNC_ATTR_PRIVATE_SECTION
118 asm MTX34*
MTX34MultAsm(MTX34 *,const MTX34 *,const MTX34 *)119 MTX34MultAsm(MTX34*, const MTX34*, const MTX34*)
120 {
121 VPUSH {d8-d10} // Save registers
122
123 VLDR.F32 s3,[r1,#4*4*0+4*3] // Matrix p1[0][3]
124 VLDR.F32 s7,[r1,#4*4*1+4*3] // Matrix p1[1][3]
125 VLDR.F32 s11,[r1,#4*4*2+4*3] // Matrix p1[2][3]
126
127 VLDMIA r2!,{s12-s15} // Matrix p2 is put into the [S12-S15] registers
128
129 VLDR.F32 s20,[r1,#4*4*0+4*0] // Matrix p1[0][0]
130 VLDR.F32 s21,[r1,#4*4*1+4*0] // Matrix p1[1][0]
131 VMUL.F32 s0,s12,s20
132 VMUL.F32 s1,s13,s20
133 VMUL.F32 s2,s14,s20
134 VMLA.F32 s3,s15,s20
135
136 VLDR.F32 s20,[r1,#4*4*2+4*0] // Matrix p1[2][0]
137 VMUL.F32 s4,s12,s21
138 VMUL.F32 s5,s13,s21
139 VMUL.F32 s6,s14,s21
140 VMLA.F32 s7,s15,s21
141 VLDMIA r2!,{s16-s19} // Matrix p2 is put into the [S16-S19] registers
142
143 VLDR.F32 s21,[r1,#4*4*0+4*1] // Matrix p1[0][1]
144 VMUL.F32 s8,s12,s20
145 VMUL.F32 s9,s13,s20
146 VMUL.F32 s10,s14,s20
147 VMLA.F32 s11,s15,s20
148 VLDMIA r2,{s12-s15} // Matrix p2 is put into the [S12-S15] registers
149
150 VLDR.F32 s20,[r1,#4*4*1+4*1] // Matrix p1[1][1]
151 VMLA.F32 s0,s16,s21
152 VMLA.F32 s1,s17,s21
153 VMLA.F32 s2,s18,s21
154 VMLA.F32 s3,s19,s21
155
156 VLDR.F32 s21,[r1,#4*4*2+4*1] // Matrix p1[2][1]
157 VMLA.F32 s4,s16,s20
158 VMLA.F32 s5,s17,s20
159 VMLA.F32 s6,s18,s20
160 VMLA.F32 s7,s19,s20
161
162 VLDR.F32 s20,[r1,#4*4*0+4*2] // Matrix p1[0][2]
163 VMLA.F32 s8,s16,s21
164 VMLA.F32 s9,s17,s21
165 VMLA.F32 s10,s18,s21
166 VMLA.F32 s11,s19,s21
167
168 VLDR.F32 s21,[r1,#4*4*1+4*2] // Matrix p1[1][2]
169 VMLA.F32 s0,s12,s20
170 VMLA.F32 s1,s13,s20
171 VMLA.F32 s2,s14,s20
172 VMLA.F32 s3,s15,s20
173
174 VLDR.F32 s20,[r1,#4*4*2+4*2] // Matrix p1[2][2]
175 VMLA.F32 s4,s12,s21
176 VMLA.F32 s5,s13,s21
177 VMLA.F32 s6,s14,s21
178 VMLA.F32 s7,s15,s21
179
180 VMLA.F32 s8,s12,s20
181 VMLA.F32 s9,s13,s20
182 VMLA.F32 s10,s14,s20
183 VMLA.F32 s11,s15,s20
184
185 VPOP {d8-d10} // POP
186 MOV r1,r0
187 VSTMIA r1!,{s0-s3} // Store result
188 VSTMIA r1,{s4-s11} // Store result
189 BX lr // Return
190 }
191
192 NN_FUNC_ATTR_PRIVATE_SECTION
193 asm MTX34*
MTX34MultAsm(MTX34 *,const MTX34 *,f32)194 MTX34MultAsm(MTX34*, const MTX34*, f32)
195 {
196 VLDMIA r1,{s2-s13} // Matrix p is put into the [S1-S12] registers
197
198 VMUL.F32 s2,s2,s0
199 VMUL.F32 s3,s3,s0
200 VMUL.F32 s4,s4,s0
201 VMUL.F32 s5,s5,s0
202
203 VMUL.F32 s6,s6,s0
204 VMUL.F32 s7,s7,s0
205 VMUL.F32 s8,s8,s0
206 VMUL.F32 s9,s9,s0
207
208 VMUL.F32 s10,s10,s0
209 VMUL.F32 s11,s11,s0
210 VMUL.F32 s12,s12,s0
211 VMUL.F32 s13,s13,s0
212
213 VSTMIA r0,{s2-s13} // Store result
214 BX lr // Return
215 }
216
217 NN_FUNC_ATTR_PRIVATE_SECTION
218 asm MTX34*
MTX34AddAsm(MTX34 *,const MTX34 *,const MTX34 *)219 MTX34AddAsm(MTX34*, const MTX34*, const MTX34*)
220 {
221 VPUSH {d8-d9} // Save registers
222 VLDMIA r2,{s0-s11} // The entire p2 matrix is put in the [S0-S11] registers
223 VLDMIA r1!,{s12-s19} // Matrix p1 is put into the [S12-S19] registers
224
225 VADD.F32 s0,s12,s0
226 VADD.F32 s1,s13,s1
227 VADD.F32 s2,s14,s2
228
229 VADD.F32 s3,s15,s3
230 VADD.F32 s4,s16,s4
231 VLDMIA r1!,{s12-s15} // Continuation of p1
232 VADD.F32 s5,s17,s5
233
234 VADD.F32 s6,s18,s6
235 VADD.F32 s7,s19,s7
236 VADD.F32 s8,s12,s8
237
238 VADD.F32 s9,s13,s9
239 VADD.F32 s10,s14,s10
240 VADD.F32 s11,s15,s11
241
242 VPOP {d8-d9} // Register return
243 VSTMIA r0,{s0-s11} // Store result
244 BX lr // Return
245 }
246
247 NN_FUNC_ATTR_PRIVATE_SECTION
248 asm MTX34*
MTX34MAddAsm(MTX34 *,f32,const MTX34 *,const MTX34 *)249 MTX34MAddAsm(MTX34*, f32, const MTX34*, const MTX34*)
250 {
251 VPUSH {d8-d10} // Save registers
252 VLDMIA r2,{s2-s13} // The entire p2 matrix is put in the [S2-S13] registers
253 VLDMIA r1!,{s14-s21} // Matrix p1 is put into the [S14-S21] registers
254
255 VMLA.F32 s2,s14,s0
256 VMLA.F32 s3,s15,s0
257 VMLA.F32 s4,s16,s0
258 VMLA.F32 s5,s17,s0
259 VLDMIA r1,{s14-s17} // Continuation of p1
260
261 VMLA.F32 s6,s18,s0
262 VMLA.F32 s7,s19,s0
263 VMLA.F32 s8,s20,s0
264 VMLA.F32 s9,s21,s0
265
266 VMLA.F32 s10,s14,s0
267 VMLA.F32 s11,s15,s0
268 VMLA.F32 s12,s16,s0
269 VMLA.F32 s13,s17,s0
270
271 VPOP {d8-d10} // Register return
272 VSTMIA r0,{s2-s13} // Store result
273 BX lr // Return
274 }
275
276 NN_FUNC_ATTR_PRIVATE_SECTION
277 asm MTX34*
MTX34MultScaleAsm(MTX34 *,const MTX34 *,const VEC3 *)278 MTX34MultScaleAsm(MTX34* , const MTX34* , const VEC3* )
279 {
280 VLDMIA r1,{s0-s11} // Matrix p is put into the [S0-S11] registers
281 VLDMIA r2,{s12-s14} // VEC3 is put into the [S12-S14] registers
282
283 VMUL.F32 s0,s0,s12
284 VMUL.F32 s1,s1,s13
285 VMUL.F32 s2,s2,s14
286
287 VMUL.F32 s4,s4,s12
288 VMUL.F32 s5,s5,s13
289 VMUL.F32 s6,s6,s14
290
291 VMUL.F32 s8,s8,s12
292 VMUL.F32 s9,s9,s13
293 VMUL.F32 s10,s10,s14
294
295 VSTMIA r0,{s0-s11} // Store result
296 BX lr // Return
297 }
298
299 NN_FUNC_ATTR_PRIVATE_SECTION
300 asm MTX34*
MTX34MultScaleAsm(MTX34 *,const VEC3 *,const MTX34 *)301 MTX34MultScaleAsm(MTX34* , const VEC3*, const MTX34* )
302 {
303 VLDMIA r2,{s0-s11} // Matrix p is put into the [S0-S11] registers
304 VLDMIA r1,{s12-s14} // VEC3 is put into the [S12-S14] registers
305
306 VMUL.F32 s0,s0,s12
307 VMUL.F32 s1,s1,s12
308 VMUL.F32 s2,s2,s12
309 VMUL.F32 s3,s3,s12
310
311 VMUL.F32 s4,s4,s13
312 VMUL.F32 s5,s5,s13
313 VMUL.F32 s6,s6,s13
314 VMUL.F32 s7,s7,s13
315
316 VMUL.F32 s8,s8,s14
317 VMUL.F32 s9,s9,s14
318 VMUL.F32 s10,s10,s14
319 VMUL.F32 s11,s11,s14
320
321 VSTMIA r0,{s0-s11} // Store result
322 BX lr // Return
323 }
324
325 NN_FUNC_ATTR_PRIVATE_SECTION
326 asm u32
MTX34InverseAsm(MTX34 *,const MTX34 *)327 MTX34InverseAsm(MTX34*, const MTX34* )
328 {
329 VLDMIA r1,{s0-s2}
330 ADD r1,#4*4
331 VLDMIA r1,{s3-s5}
332 ADD r1,#4*4
333 VLDMIA r1,{s6-s8}
334
335 VMUL.F32 s10,s0,s4
336 VMUL.F32 s11,s1,s5
337 VMUL.F32 s12,s2,s3
338 VMUL.F32 s13,s6,s4
339 VMUL.F32 s14,s3,s1
340 VMUL.F32 s15,s0,s7
341
342 VMUL.F32 s10,s10,s8
343 VMUL.F32 s11,s11,s6
344 VMUL.F32 s12,s12,s7
345
346 VMLS.F32 s10,s13,s2
347 VMLS.F32 s11,s14,s8
348 VMLS.F32 s12,s15,s5
349
350 VADD.F32 s10,s10,s11
351 VLDR.F32 s15,=1.0
352 VADD.F32 s10,s10,s12
353
354 FMRS r2,s10
355 CMP r2,#0x80000000
356 CMPNE r2,#0
357 MOVEQ r0,#0
358 BXEQ lr
359
360 VPUSH {d8-d12} // Save registers
361 VDIV.F32 s15,s10
362
363 VMUL.F32 s16,s4,s8
364 VMUL.F32 s17,s1,s8
365 VMUL.F32 s18,s1,s5
366
367 VMUL.F32 s19,s3,s8
368 VMUL.F32 s20,s0,s8
369 VMUL.F32 s21,s0,s5
370
371 VMUL.F32 s22,s3,s7
372 VMUL.F32 s23,s0,s7
373 VMUL.F32 s24,s0,s4
374
375
376 VMLS.F32 s16,s7,s5
377 VMLS.F32 s17,s7,s2
378 VMLS.F32 s18,s4,s2
379
380 VMLS.F32 s19,s6,s5
381 VMLS.F32 s20,s6,s2
382 VMLS.F32 s21,s3,s2
383
384 VMLS.F32 s22,s6,s4
385 VMLS.F32 s23,s6,s1
386 VMLS.F32 s24,s3,s1
387
388
389 VMUL.F32 s0,s16,s15 // m[0][0]
390 VNMUL.F32 s1,s17,s15 // m[0][1]
391 VMUL.F32 s2,s18,s15 // m[0][2]
392
393 VNMUL.F32 s4,s19,s15 // m[1][0]
394 VMUL.F32 s5,s20,s15 // m[1][1]
395 VNMUL.F32 s6,s21,s15 // m[1][2]
396
397 VMUL.F32 s8,s22,s15 // m[2][0]
398 VLDR.F32 s12,[r1,#4*3-(4*4*2)]// src[0][3]
399 VNMUL.F32 s9,s23,s15 // m[2][1]
400 VMUL.F32 s10,s24,s15 // m[2][2]
401
402 VNMUL.F32 s3,s0,s12
403 VLDR.F32 s13,[r1,#4*3-(4*4)] // src[1][3]
404 VNMUL.F32 s7,s4,s12
405 VNMUL.F32 s11,s8,s12
406
407 VMLS.F32 s3,s1,s13
408 VLDR.F32 s14,[r1,#4*3] // src[2][3]
409 VMLS.F32 s7,s5,s13
410 VMLS.F32 s11,s9,s13
411
412 VMLS.F32 s3,s2,s14
413 VMLS.F32 s7,s6,s14
414 VMLS.F32 s11,s10,s14
415
416 VPOP {d8-d12} // Register return
417 VSTMIA r0,{s0-s11} // Store result
418 MOV r0,#1
419 BX lr // Return
420 }
421
422 NN_FUNC_ATTR_PRIVATE_SECTION
423 asm u32
MTX34InvTransposeAsm(MTX34 *,const MTX34 *)424 MTX34InvTransposeAsm(MTX34*, const MTX34* )
425 {
426 VLDMIA r1,{s0-s2}
427 ADD r1,#4*4
428 VLDMIA r1,{s3-s5}
429 ADD r1,#4*4
430 VLDMIA r1,{s6-s8}
431
432 VMUL.F32 s10,s0,s4
433 VMUL.F32 s11,s1,s5
434 VMUL.F32 s12,s2,s3
435 VMUL.F32 s13,s6,s4
436 VMUL.F32 s14,s3,s1
437 VMUL.F32 s15,s0,s7
438
439 VMUL.F32 s10,s10,s8
440 VMUL.F32 s11,s11,s6
441 VMUL.F32 s12,s12,s7
442
443 VMLS.F32 s10,s13,s2
444 VMLS.F32 s11,s14,s8
445 VMLS.F32 s12,s15,s5
446
447 VADD.F32 s10,s10,s11
448 VLDR.F32 s15,=1.0
449 VADD.F32 s10,s10,s12
450
451 FMRS r2,s10
452 CMP r2,#0x80000000
453 CMPNE r2,#0
454 MOVEQ r0,#0
455 BXEQ lr
456
457 VPUSH {d8-d12} // Save registers
458 VDIV.F32 s15,s10
459
460 VMUL.F32 s16,s4,s8
461 VMUL.F32 s17,s3,s8
462 VMUL.F32 s18,s3,s7
463
464 VMUL.F32 s19,s1,s8
465 VMUL.F32 s20,s0,s8
466 VMUL.F32 s21,s0,s7
467
468 VMUL.F32 s22,s1,s5
469 VMUL.F32 s23,s0,s5
470 VMUL.F32 s24,s0,s4
471
472
473 VMLS.F32 s16,s7,s5
474 VMLS.F32 s17,s6,s5
475 VMLS.F32 s18,s6,s4
476
477 VMLS.F32 s19,s7,s2
478 VMLS.F32 s20,s6,s2
479 VMLS.F32 s21,s6,s1
480
481 VMLS.F32 s22,s4,s2
482 VMLS.F32 s23,s3,s2
483 VMLS.F32 s24,s3,s1
484
485
486 VMUL.F32 s0,s16,s15 // m[0][0]
487 VNMUL.F32 s1,s17,s15 // m[0][1]
488 VMUL.F32 s2,s18,s15 // m[0][2]
489
490 VNMUL.F32 s4,s19,s15 // m[1][0]
491 VMUL.F32 s5,s20,s15 // m[1][1]
492 VNMUL.F32 s6,s21,s15 // m[1][2]
493
494 VMUL.F32 s8,s22,s15 // m[2][0]
495 VNMUL.F32 s9,s23,s15 // m[2][1]
496 VMUL.F32 s10,s24,s15 // m[2][2]
497
498 VLDR.F32 s3,=0.0
499 VLDR.F32 s7,=0.0
500 VLDR.F32 s11,=0.0
501
502 VPOP {d8-d12} // Register return
503 VSTMIA r0,{s0-s11} // Store result
504 MOV r0,#1
505 BX lr // Return
506
507 }
508
509 NN_FUNC_ATTR_PRIVATE_SECTION
510 asm MTX34*
MTX34MultTranslateAsm(MTX34 *,const VEC3 *,const MTX34 *)511 MTX34MultTranslateAsm(MTX34*, const VEC3*, const MTX34*)
512 {
513 VLDMIA r2,{s0-s11} // The entire pM matrix is put in the [S0-S11] registers
514 VLDMIA r1,{s12-s14} // All vectors are put in the [S0-S11] registers
515
516 VADD.F32 s3,s3,s12
517 VADD.F32 s7,s7,s13
518 VADD.F32 s11,s11,s14
519
520 VSTMIA r0,{s0-s11} // Store result
521 BX lr // Return
522 }
523
524 NN_FUNC_ATTR_PRIVATE_SECTION
525 asm MTX34*
MTX34MultTranslateAsm(MTX34 *,const MTX34 *,const VEC3 *)526 MTX34MultTranslateAsm(MTX34*, const MTX34*, const VEC3*)
527 {
528 VLDMIA r1,{s0-s11} // The entire pM matrix is put in the [S0-S11] registers
529 VLDMIA r2,{s12-s14} // All vectors are put in the [S0-S11] registers
530
531 VMLA.F32 s3,s0,s12
532 VMLA.F32 s7,s4,s12
533 VMLA.F32 s11,s8,s12
534
535 VMLA.F32 s3,s1,s13
536 VMLA.F32 s7,s5,s13
537 VMLA.F32 s11,s9,s13
538
539 VMLA.F32 s3,s2,s14
540 VMLA.F32 s7,s6,s14
541 VMLA.F32 s11,s10,s14
542
543 VSTMIA r0,{s0-s11} // Store result
544 BX lr // Return
545
546 }
547
548 NN_FUNC_ATTR_PRIVATE_SECTION
VEC3TransformAsm(VEC3 *,const MTX34 *,const VEC3 *)549 asm VEC3* VEC3TransformAsm(VEC3* , const MTX34* , const VEC3* )
550 {
551 VLDMIA r1,{s0-s11} // The entire pM matrix is put in the [S0-S11] registers
552 VLDMIA r2,{s12-s14} // All vectors are put in the [S0-S11] registers
553
554 VMLA.F32 s3,s0,s12
555 VMLA.F32 s7,s4,s12
556 VMLA.F32 s11,s8,s12
557
558 VMLA.F32 s3,s1,s13
559 VMLA.F32 s7,s5,s13
560 VMLA.F32 s11,s9,s13
561
562 VMLA.F32 s3,s2,s14
563 VMLA.F32 s7,s6,s14
564 VMLA.F32 s11,s10,s14
565
566 VSTR.F32 s3,[r0,#0]
567 VSTR.F32 s7,[r0,#4]
568 VSTR.F32 s11,[r0,#8] // Store result
569 BX lr // Return
570
571 }
572
573 NN_FUNC_ATTR_PRIVATE_SECTION
574 asm MTX34*
MTX34TransposeAsm(MTX34 *,const MTX34 *)575 MTX34TransposeAsm(MTX34* , const MTX34* )
576 {
577 VLDR.F32 s0,[r1,#0*16+0*4]
578 VLDR.F32 s1,[r1,#1*16+0*4]
579 VLDR.F32 s2,[r1,#2*16+0*4]
580 VLDR.F32 s3,=0.0
581 VLDR.F32 s4,[r1,#0*16+1*4]
582 VLDR.F32 s5,[r1,#1*16+1*4]
583 VLDR.F32 s6,[r1,#2*16+1*4]
584 VLDR.F32 s7,=0.0
585 VLDR.F32 s8,[r1,#0*16+2*4]
586 VLDR.F32 s9,[r1,#1*16+2*4]
587 VLDR.F32 s10,[r1,#2*16+2*4]
588 VLDR.F32 s11,=0.0
589
590 VSTMIA r0,{s0-s11} // Store result
591 BX lr // Return
592 }
593
594 NN_FUNC_ATTR_PRIVATE_SECTION
595 asm MTX34*
MTX34ScaleAsm(MTX34 *,const VEC3 *)596 MTX34ScaleAsm(MTX34* , const VEC3* )
597 {
598 VLDR.F32 s0,[r1,#0]
599 VLDR.F32 s1,=0.0
600 VLDR.F32 s2,=0.0
601 VLDR.F32 s3,=0.0
602 VLDR.F32 s4,=0.0
603 VLDR.F32 s5,[r1,#4]
604 VLDR.F32 s6,=0.0
605 VLDR.F32 s7,=0.0
606 VLDR.F32 s8,=0.0
607 VLDR.F32 s9,=0.0
608 VLDR.F32 s10,[r1,#8]
609 VLDR.F32 s11,=0.0
610
611 VSTMIA r0,{s0-s11} // Store result
612 BX lr // Return
613 }
614
615 #include <nn/hw/ARM/codereset.h>
616
617 } // namespace ARMv6
618 } // namespace math
619 } // namespace nn
620