1//
2//------------------------------------------------------------
3// Copyright(c) 2009-2010 by Digital Media Professionals Inc.
4// All rights reserved.
5//------------------------------------------------------------
6// This source code is the confidential and proprietary
7// of Digital Media Professionals Inc.
8//------------------------------------------------------------
9//
10
11#include "commonasm.h"
12
13// Input registers map
14#define aPosition       v0
15#define aNormal         v1
16#define aTang           v2
17#define aTexCoord       v3
18
19// Output registers map
20#define vPosition       o0
21#define vQuaternion     o1
22#define vView           o2
23#define vTexCoord       o3
24
25#pragma bind_symbol(aPosition.xyz,v0,v0)
26#pragma bind_symbol(aNormal.xyz,v1,v1)
27#pragma bind_symbol(aTang.xyz,v2,v2)
28#pragma bind_symbol(aTexCoord.xy,v3,v3)
29
30#pragma bind_symbol(uProjection,c0,c3)
31#pragma bind_symbol(uModelView,c4,c7)
32
33#pragma bind_symbol(uTangentEnabled,b1,b1)
34
35#pragma output_map ( position, o0 )
36#pragma output_map ( quaternion, o1 )
37#pragma output_map ( view, o2 )
38#pragma output_map ( texture0, o3.xy )
39#pragma output_map ( texture1, o3.zw )
40
41defb    b0, true
42
43l_position_view_full_quaternion:
44    m4x4    TEMP_VIEW,          dmp_position,       dmp_local_to_view_matrix
45	m3x3    TEMP_NORM.xyz,          dmp_normal,       dmp_local_to_view_matrix
46	m3x3    TEMP_TANG.xyz,         aTang,              dmp_local_to_view_matrix
47    mov     dmp_lrView,         -TEMP_VIEW
48	m4x4    gl_Position,       TEMP_VIEW,          dmp_projection_matrix
49
50    mul     r5,                 TEMP_NORM.yzx,      TEMP_TANG.zxy
51    mad     r5,                 -TEMP_TANG.yzx,     TEMP_NORM.zxy,    r5           // b*|n|^2 is in r5
52    dp3     r5.w,               r5,                 r5
53    rsq     r5.w,               r5.w
54    mul     r5,                 r5,                 r5.w
55    add     r6.w,               TEMP_NORM.z,        r5.y                           // m11+m22 in r6.w
56    mul     TEMP_TANG,          r5.yzx,             TEMP_NORM.zxy
57    mad     TEMP_TANG,          -TEMP_NORM.yzx,     r5.zxy,            TEMP_TANG
58    add     r6.w,               TEMP_TANG.x,        r6                             // m00+m11+m22 in r6.w
59    mov     TEMP_TANG.w,        r5.z                                               // m21 in tang.w
60    mov     r5.z,               TEMP_TANG.x                                        // m00 in r5.z
61    add     r6.w,               CONST_1,            r6                             // 1+m00+m11+m22 in r6.w
62    mov     TEMP_NORM.w,        r5.x                                               // m01 in norm.w
63    mov     r5.x,               TEMP_NORM.z                                        // m22 in r5.x -> m22,m11,m00 in r5
64    cmp     CMP_MODE_GT,        CMP_MODE_GT,        r6.w,            CONST_1__256
65    mov     r6.x,               CONST_1
66    mov     r6.y,               -CONST_1                                           // 1,-1,?,1+m00+m11+m22 in r6
67    jpc     0, 0,               COND_MODE_STA0,     l_full_quaternion_calc_fallback
68    add     r7.xz,              TEMP_TANG.wwyy,     -TEMP_NORM.yyww                // m21-m12,?,m10-m01,? in r7
69    add     r7.y,               TEMP_NORM.x,        -TEMP_TANG.z                   // m21-m12,m02-m20,m10-m01,? in r7
70    mov     r7.w,               r6                                                 // m21-m12,m02-m20,m10-m01,1+m00+m11+m22 in r7
71    dp4     r6,                 r7,                 r7
72    rsq     r6,                 r6.x
73    mul     dmp_lrQuat,         r7,                 r6
74    jpb     b0, true, l_full_quaternion_calc_end
75l_full_quaternion_calc_fallback:
76    cmp     CMP_MODE_GT, CMP_MODE_GT, r5.zy, r5.yx                              // comparing m00,m11 with m11,m22
77    ifc     1, 1, COND_MODE_STA0
78        ifc     1, 1, COND_MODE_STA1                                            // if m00>m11 && m11>m22 - m00 is greatest
79            mul     r8,             TEMP_TANG.yyzw,     r6.xxxy                 // m10,m10,m20,-m21 in r8
80            add     r8.x,           CONST_1,            -r5.y                   // 1-m11,m10,m20,-m21 in r8
81            add     r9,             r5.z,               -r5.x                   // m00-m22 in r9
82            add     r8.yzw,         r8,                 TEMP_NORM.wwxy          // 1-m11,m10+m01,m20+m02,m12-m21 in r8  <==should be m21-m12?
83            add     r8.x,           r9,                 r8                      // 1+m00-m22-m11,m10+m01,m20+m02,m12-m21 in r8
84        else
85            cmp     CMP_MODE_GT, CMP_MODE_GT, r5.z, r5.x
86            mul     r8,              TEMP_TANG.yyzw,     r6.xxxy                // m10,m10,m20,-m21 in r8
87            add     r8.x,            CONST_1,            -r5.y                  // 1-m11,m10,m20,-m21 in r8
88            ifc 1, 1, COND_MODE_STA0                                            // if m00>m11 && m11<=m22 && m00 > m22 - m00 is greatest
89                add     r9,             r5.z,                -r5.x              // m00-m22 in r9
90                add     r8.yzw,         r8,                  TEMP_NORM.wwxy     // 1-m11,m10+m01,m20+m02,m12-m21 in r8  <==should be m21-m12?
91                add     r8.x,           r9,                  r8                 // 1+m00-m22-m11,m10+m01,m20+m02,m12-m21 in r8
92            else                                                                // if m00>m11 && m11<=m22 && m00 <= m22 - m22 is greatest
93                mul     r8,             TEMP_TANG.zwwy,      r6.xxxy            // m20,m21,m21,-m10 in r8
94                add     r8.z,           CONST_1,             -r5.z              // m20,m21,1-m00,-m10 in r8
95                add     r9,             r5.x,                -r5.y              // m22-m11 in r9
96                add     r8.xyw,         r8,                  TEMP_NORM.xyyw     // m20+m02,m21+m12,1-m00,m01-m10
97                add     r8.z,           r9,                  r8                 // m20+m02,m21+m12,1+m22-m11-m00,m01-m10 <= should be m10-m01
98            endif
99            nop
100        endif
101        mov     r8.w,   -r8
102    else
103        ifc 1, 1, COND_MODE_STA1                                                // if m00<=m11 && m11>m22 - m11 is greatest
104            mul     r8,             TEMP_TANG.yywz,     r6.xxxy                 // m10,m10,m21,-m20 in r8
105            add     r8.y,           CONST_1,            -r5.z                   // m10,1-m00,m21,-m20 in r8
106            add     r9,             r5.y,               -r5.x                   // m11-m22 in r9
107            add     r8.xzw,         r8,                 TEMP_NORM.wwyx          // m01+m10,1-m00,m12+m21,m02-m20 in r8
108            add     r8.y,           r9,                 r8                      // m01+m10,1+m11-m22-m00,m12+m21,m02-m20 in r8
109        else                                                                    // if m00<=m11 && m11<=m22 - m22 is greatest
110            mul     r8,             TEMP_TANG.zwwy,     r6.xxxy
111            add     r8.z,           CONST_1,            -r5.z
112            add     r9,             r5.x,               -r5.y
113            add     r8.xyw,         r8,                 TEMP_NORM.xyyw
114            add     r8.z,           r9,                 r8                      // m20+m02,m21+m12,1+m22-m11-m00,m01-m10 <= should be m10-m01
115            mov     r8.w,           -r8
116        endif
117        nop
118    endif
119    dp4     r6,            r8,        r8
120    rsq     r6,            r6.x
121    mul     dmp_lrQuat,    r8,        r6
122l_full_quaternion_calc_end:
123    nop
124    ret
125
126l_position_view_quaternion:
127    m4x4    TEMP_VIEW,          dmp_position,       dmp_local_to_view_matrix
128	// m3x3 is not done on purpose to prevent stalls in the following add instructions.
129    dp3     TEMP_NORM.z,        dmp_normal,         dmp_local_to_view_matrix[2]
130    dp3     TEMP_NORM.x,        dmp_normal,         dmp_local_to_view_matrix[0]
131    dp3     TEMP_NORM.y,        dmp_normal,         dmp_local_to_view_matrix[1]
132    mov     dmp_lrView,         -TEMP_VIEW
133    dp4     gl_Position.x,      TEMP_VIEW,          dmp_projection_matrix[0]
134    add     r4,                 TEMP_NORM.z,        CONST_1                 // n.z + 1.0 is in r4
135	// m4x4 is not done on purpose to prevent stalls in the following mul instructions .
136    dp4     gl_Position.y,      TEMP_VIEW,          dmp_projection_matrix[1]
137    mul     r4,                 r4,                 CONST_HALF              // 0.5*(n.z + 1.0) is in r4
138    dp4     gl_Position.z,      TEMP_VIEW,          dmp_projection_matrix[2]
139    dp4     gl_Position.w,      TEMP_VIEW,          dmp_projection_matrix[3]
140    cmp     LEQ, LEQ,           r4.x,               CONST_0                 // compare 0.5*(n.z + 1.0) with 0.0
141    mov     dmp_lrQuat.w,       CONST_0                                     // zero out quat.w component
142    rsq     r4,                 r4.x                                        // 1/sqrt(0.5*(n.z + 1.0)) is in r4
143    mul     r5,                 TEMP_NORM,          CONST_HALF              // 0.5*n is in r5
144    ifc 0, 1, COND_MODE_STA0
145        rcp     dmp_lrQuat.z,   r4.x                                        // sqrt(0.5*(n.z + 1.0)) is in quat.z component
146        mul     dmp_lrQuat.xy,  r5,                 r4                      // 0.5*n*quat.z is in quat.xy
147    else
148        mov     dmp_lrQuat.xyz, CONST_1_0                                   // zero out quat.w component
149    endif
150    nop
151    ret
152    nop
153
154main:
155    ifb b1
156        call l_position_view_full_quaternion
157        mov  vTexCoord, aTexCoord.xyxy
158    else
159        call l_position_view_quaternion
160        mov  vTexCoord, CONST_0
161    endif
162    end
163endmain:
164
165