1// 2//------------------------------------------------------------ 3// Copyright(c) 2009-2010 by Digital Media Professionals Inc. 4// All rights reserved. 5//------------------------------------------------------------ 6// This source code is the confidential and proprietary 7// of Digital Media Professionals Inc. 8//------------------------------------------------------------ 9// 10 11#include "commonasm.h" 12 13// Input registers map 14#define aPosition v0 15#define aNormal v1 16#define aTang v2 17#define aTexCoord v3 18 19// Output registers map 20#define vPosition o0 21#define vQuaternion o1 22#define vView o2 23#define vTexCoord o3 24 25#pragma bind_symbol(aPosition.xyz,v0,v0) 26#pragma bind_symbol(aNormal.xyz,v1,v1) 27#pragma bind_symbol(aTang.xyz,v2,v2) 28#pragma bind_symbol(aTexCoord.xy,v3,v3) 29 30#pragma bind_symbol(uProjection,c0,c3) 31#pragma bind_symbol(uModelView,c4,c7) 32 33#pragma bind_symbol(uTangentEnabled,b1,b1) 34 35#pragma output_map ( position, o0 ) 36#pragma output_map ( quaternion, o1 ) 37#pragma output_map ( view, o2 ) 38#pragma output_map ( texture0, o3.xy ) 39#pragma output_map ( texture1, o3.zw ) 40 41defb b0, true 42 43l_position_view_full_quaternion: 44 m4x4 TEMP_VIEW, dmp_position, dmp_local_to_view_matrix 45 m3x3 TEMP_NORM.xyz, dmp_normal, dmp_local_to_view_matrix 46 m3x3 TEMP_TANG.xyz, aTang, dmp_local_to_view_matrix 47 mov dmp_lrView, -TEMP_VIEW 48 m4x4 gl_Position, TEMP_VIEW, dmp_projection_matrix 49 50 mul r5, TEMP_NORM.yzx, TEMP_TANG.zxy 51 mad r5, -TEMP_TANG.yzx, TEMP_NORM.zxy, r5 // b*|n|^2 is in r5 52 dp3 r5.w, r5, r5 53 rsq r5.w, r5.w 54 mul r5, r5, r5.w 55 add r6.w, TEMP_NORM.z, r5.y // m11+m22 in r6.w 56 mul TEMP_TANG, r5.yzx, TEMP_NORM.zxy 57 mad TEMP_TANG, -TEMP_NORM.yzx, r5.zxy, TEMP_TANG 58 add r6.w, TEMP_TANG.x, r6 // m00+m11+m22 in r6.w 59 mov TEMP_TANG.w, r5.z // m21 in tang.w 60 mov r5.z, TEMP_TANG.x // m00 in r5.z 61 add r6.w, CONST_1, r6 // 1+m00+m11+m22 in r6.w 62 mov TEMP_NORM.w, r5.x // m01 in norm.w 63 mov r5.x, TEMP_NORM.z // m22 in r5.x -> m22,m11,m00 in r5 64 cmp CMP_MODE_GT, CMP_MODE_GT, r6.w, CONST_1__256 65 mov r6.x, CONST_1 66 mov r6.y, -CONST_1 // 1,-1,?,1+m00+m11+m22 in r6 67 jpc 0, 0, COND_MODE_STA0, l_full_quaternion_calc_fallback 68 add r7.xz, TEMP_TANG.wwyy, -TEMP_NORM.yyww // m21-m12,?,m10-m01,? in r7 69 add r7.y, TEMP_NORM.x, -TEMP_TANG.z // m21-m12,m02-m20,m10-m01,? in r7 70 mov r7.w, r6 // m21-m12,m02-m20,m10-m01,1+m00+m11+m22 in r7 71 dp4 r6, r7, r7 72 rsq r6, r6.x 73 mul dmp_lrQuat, r7, r6 74 jpb b0, true, l_full_quaternion_calc_end 75l_full_quaternion_calc_fallback: 76 cmp CMP_MODE_GT, CMP_MODE_GT, r5.zy, r5.yx // comparing m00,m11 with m11,m22 77 ifc 1, 1, COND_MODE_STA0 78 ifc 1, 1, COND_MODE_STA1 // if m00>m11 && m11>m22 - m00 is greatest 79 mul r8, TEMP_TANG.yyzw, r6.xxxy // m10,m10,m20,-m21 in r8 80 add r8.x, CONST_1, -r5.y // 1-m11,m10,m20,-m21 in r8 81 add r9, r5.z, -r5.x // m00-m22 in r9 82 add r8.yzw, r8, TEMP_NORM.wwxy // 1-m11,m10+m01,m20+m02,m12-m21 in r8 <==should be m21-m12? 83 add r8.x, r9, r8 // 1+m00-m22-m11,m10+m01,m20+m02,m12-m21 in r8 84 else 85 cmp CMP_MODE_GT, CMP_MODE_GT, r5.z, r5.x 86 mul r8, TEMP_TANG.yyzw, r6.xxxy // m10,m10,m20,-m21 in r8 87 add r8.x, CONST_1, -r5.y // 1-m11,m10,m20,-m21 in r8 88 ifc 1, 1, COND_MODE_STA0 // if m00>m11 && m11<=m22 && m00 > m22 - m00 is greatest 89 add r9, r5.z, -r5.x // m00-m22 in r9 90 add r8.yzw, r8, TEMP_NORM.wwxy // 1-m11,m10+m01,m20+m02,m12-m21 in r8 <==should be m21-m12? 91 add r8.x, r9, r8 // 1+m00-m22-m11,m10+m01,m20+m02,m12-m21 in r8 92 else // if m00>m11 && m11<=m22 && m00 <= m22 - m22 is greatest 93 mul r8, TEMP_TANG.zwwy, r6.xxxy // m20,m21,m21,-m10 in r8 94 add r8.z, CONST_1, -r5.z // m20,m21,1-m00,-m10 in r8 95 add r9, r5.x, -r5.y // m22-m11 in r9 96 add r8.xyw, r8, TEMP_NORM.xyyw // m20+m02,m21+m12,1-m00,m01-m10 97 add r8.z, r9, r8 // m20+m02,m21+m12,1+m22-m11-m00,m01-m10 <= should be m10-m01 98 endif 99 nop 100 endif 101 mov r8.w, -r8 102 else 103 ifc 1, 1, COND_MODE_STA1 // if m00<=m11 && m11>m22 - m11 is greatest 104 mul r8, TEMP_TANG.yywz, r6.xxxy // m10,m10,m21,-m20 in r8 105 add r8.y, CONST_1, -r5.z // m10,1-m00,m21,-m20 in r8 106 add r9, r5.y, -r5.x // m11-m22 in r9 107 add r8.xzw, r8, TEMP_NORM.wwyx // m01+m10,1-m00,m12+m21,m02-m20 in r8 108 add r8.y, r9, r8 // m01+m10,1+m11-m22-m00,m12+m21,m02-m20 in r8 109 else // if m00<=m11 && m11<=m22 - m22 is greatest 110 mul r8, TEMP_TANG.zwwy, r6.xxxy 111 add r8.z, CONST_1, -r5.z 112 add r9, r5.x, -r5.y 113 add r8.xyw, r8, TEMP_NORM.xyyw 114 add r8.z, r9, r8 // m20+m02,m21+m12,1+m22-m11-m00,m01-m10 <= should be m10-m01 115 mov r8.w, -r8 116 endif 117 nop 118 endif 119 dp4 r6, r8, r8 120 rsq r6, r6.x 121 mul dmp_lrQuat, r8, r6 122l_full_quaternion_calc_end: 123 nop 124 ret 125 126l_position_view_quaternion: 127 m4x4 TEMP_VIEW, dmp_position, dmp_local_to_view_matrix 128 // m3x3 is not done on purpose to prevent stalls in the following add instructions. 129 dp3 TEMP_NORM.z, dmp_normal, dmp_local_to_view_matrix[2] 130 dp3 TEMP_NORM.x, dmp_normal, dmp_local_to_view_matrix[0] 131 dp3 TEMP_NORM.y, dmp_normal, dmp_local_to_view_matrix[1] 132 mov dmp_lrView, -TEMP_VIEW 133 dp4 gl_Position.x, TEMP_VIEW, dmp_projection_matrix[0] 134 add r4, TEMP_NORM.z, CONST_1 // n.z + 1.0 is in r4 135 // m4x4 is not done on purpose to prevent stalls in the following mul instructions . 136 dp4 gl_Position.y, TEMP_VIEW, dmp_projection_matrix[1] 137 mul r4, r4, CONST_HALF // 0.5*(n.z + 1.0) is in r4 138 dp4 gl_Position.z, TEMP_VIEW, dmp_projection_matrix[2] 139 dp4 gl_Position.w, TEMP_VIEW, dmp_projection_matrix[3] 140 cmp LEQ, LEQ, r4.x, CONST_0 // compare 0.5*(n.z + 1.0) with 0.0 141 mov dmp_lrQuat.w, CONST_0 // zero out quat.w component 142 rsq r4, r4.x // 1/sqrt(0.5*(n.z + 1.0)) is in r4 143 mul r5, TEMP_NORM, CONST_HALF // 0.5*n is in r5 144 ifc 0, 1, COND_MODE_STA0 145 rcp dmp_lrQuat.z, r4.x // sqrt(0.5*(n.z + 1.0)) is in quat.z component 146 mul dmp_lrQuat.xy, r5, r4 // 0.5*n*quat.z is in quat.xy 147 else 148 mov dmp_lrQuat.xyz, CONST_1_0 // zero out quat.w component 149 endif 150 nop 151 ret 152 nop 153 154main: 155 ifb b1 156 call l_position_view_full_quaternion 157 mov vTexCoord, aTexCoord.xyxy 158 else 159 call l_position_view_quaternion 160 mov vTexCoord, CONST_0 161 endif 162 end 163endmain: 164 165