1 /*---------------------------------------------------------------------------*
2 Project: Horizon
3 File: math_Matrix43.cpp
4
5 Copyright (C)2009-2012 Nintendo Co., Ltd. All rights reserved.
6
7 These coded instructions, statements, and computer programs contain
8 proprietary information of Nintendo of America Inc. and/or Nintendo
9 Company Ltd., and are protected by Federal copyright law. They may
10 not be disclosed to third parties or copied or duplicated in any form,
11 in whole or in part, without the prior written consent of Nintendo.
12
13 $Rev: 46347 $
14 *---------------------------------------------------------------------------*/
15
16 #include <nn/math.h>
17
18 #include <nn/math/math_Matrix43.h>
19 #include <nn/math/ARMv6/math_Matrix43.h>
20
21 #if !defined(NN_MATH_AS_INLINE)
22 #include <nn/math/inline/math_Matrix43.ipp>
23 #include <nn/math/ARMv6/inline/math_Matrix43.ipp>
24 #endif
25
26
27 namespace nn {
28 namespace math {
29 namespace ARMv6 {
30
31 #include <nn/hw/ARM/code32.h>
32
33 NN_FUNC_ATTR_PRIVATE_SECTION
34 asm MTX43*
MTX43CopyAsm(MTX43 *,const MTX43 *)35 MTX43CopyAsm(MTX43* , const MTX43*)
36 {
37 CMP r1,r0 // Are p and pOut the same?
38 BXEQ lr // If the same, returns without copying
39 VLDMIA r1,{s0-s11} // All p are put in the [S0-S11] registers
40 VSTMIA r0,{s0-s11} // All pOut are put in the [S0-S11] registers
41 BX lr
42 }
43
44 NN_FUNC_ATTR_PRIVATE_SECTION
45 asm MTX43*
MTX43AddAsm(MTX43 *,const MTX43 *,const MTX43 *)46 MTX43AddAsm(MTX43* , const MTX43* , const MTX43* )
47 {
48 VPUSH {d8-d9} // Save registers
49 VLDMIA r2,{s0-s11} // The entire p2 matrix is put in the [S0-S11] registers
50 VLDMIA r1!,{s12-s19} // Matrix p1 is put into the [S12-S19] registers
51
52 VADD.F32 s0,s12,s0
53 VADD.F32 s1,s13,s1
54 VADD.F32 s2,s14,s2
55 VADD.F32 s3,s15,s3
56 VLDMIA r1,{s12-s15} // Continuation of p1
57
58 VADD.F32 s4,s16,s4
59 VADD.F32 s5,s17,s5
60 VADD.F32 s6,s18,s6
61 VADD.F32 s7,s19,s7
62
63 VADD.F32 s8,s12,s8
64 VADD.F32 s9,s13,s9
65 VADD.F32 s10,s14,s10
66 VADD.F32 s11,s15,s11
67
68 VPOP {d8-d9} // Register return
69 VSTMIA r0,{s0-s11} // Store result
70 BX lr // Return
71 }
72
73 NN_FUNC_ATTR_PRIVATE_SECTION
74 asm MTX43*
MTX43MultAsm(MTX43 *,const MTX43 *,const MTX43 *)75 MTX43MultAsm(MTX43*, const MTX43*, const MTX43*)
76 {
77 VPUSH {d8-d15} // Save registers
78
79 VLDR.F32 s9,[r2,#3*3*4] // Matrix p2[3][0]
80 VLDR.F32 s10,[r2,#4+3*3*4] // Matrix p2[3][1]
81 VLDR.F32 s11,[r2,#8+3*3*4] // Matrix p2[3][2]
82
83 VLDMIA r1,{s12-s23} // The entire matrix p1 is put into the [S12-S23] registers
84 VLDMIA r2!,{s24-s29} // Matrix p2 is put into the [S24-S29] registers
85
86 VMUL.F32 s0,s12,s24
87 VMUL.F32 s1,s12,s25
88 VMUL.F32 s2,s12,s26
89
90 VMUL.F32 s3,s15,s24
91 VMUL.F32 s4,s15,s25
92 VMUL.F32 s5,s15,s26
93
94 VMUL.F32 s6,s18,s24
95 VMUL.F32 s7,s18,s25
96 VMUL.F32 s8,s18,s26
97
98 VMLA.F32 s9,s21,s24
99 VMLA.F32 s10,s21,s25
100 VMLA.F32 s11,s21,s26
101
102 VMLA.F32 s0,s13,s27
103 VMLA.F32 s1,s13,s28
104 VMLA.F32 s2,s13,s29
105
106 VMLA.F32 s3,s16,s27
107 VMLA.F32 s4,s16,s28
108 VMLA.F32 s5,s16,s29
109
110 VLDMIA r2,{s24-s26} // Continuation of p2
111
112 VMLA.F32 s6,s19,s27
113 VMLA.F32 s7,s19,s28
114 VMLA.F32 s8,s19,s29
115
116 VMLA.F32 s9,s22,s27
117 VMLA.F32 s10,s22,s28
118 VMLA.F32 s11,s22,s29
119
120 VMLA.F32 s0,s14,s24
121 VMLA.F32 s1,s14,s25
122 VMLA.F32 s2,s14,s26
123
124 VMLA.F32 s3,s17,s24
125 VMLA.F32 s4,s17,s25
126 VMLA.F32 s5,s17,s26
127
128 VMLA.F32 s6,s20,s24
129 VMLA.F32 s7,s20,s25
130 VMLA.F32 s8,s20,s26
131
132 VMLA.F32 s9,s23,s24
133 VMLA.F32 s10,s23,s25
134 VMLA.F32 s11,s23,s26
135
136 VPOP {d8-d15} // Register return
137 VSTMIA r0,{s0-s11} // Store result
138 BX lr // Return
139
140 }
141
142 NN_FUNC_ATTR_PRIVATE_SECTION
143 asm MTX43*
MTX43MultAsm(MTX43 *,const MTX43 *,f32)144 MTX43MultAsm(MTX43*, const MTX43*, f32)
145 {
146 VLDMIA r1,{s1-s12} // Matrix p is put into the [S1-S12] registers
147
148 VMUL.F32 s1,s1,s0
149 VMUL.F32 s2,s2,s0
150 VMUL.F32 s3,s3,s0
151 VMUL.F32 s4,s4,s0
152
153 VMUL.F32 s5,s5,s0
154 VMUL.F32 s6,s6,s0
155 VMUL.F32 s7,s7,s0
156 VMUL.F32 s8,s8,s0
157
158 VMUL.F32 s9,s9,s0
159 VMUL.F32 s10,s10,s0
160 VMUL.F32 s11,s11,s0
161 VMUL.F32 s12,s12,s0
162
163 VSTMIA r0,{s1-s12} // Store result
164 BX lr // Return
165
166 }
167
168 #include <nn/hw/ARM/codereset.h>
169
170 } // namespace ARMv6
171 } // namespace math
172 } // namespace nn
173