// //------------------------------------------------------------ // Copyright(c) 2009-2010 by Digital Media Professionals Inc. // All rights reserved. //------------------------------------------------------------ // This source code is the confidential and proprietary // of Digital Media Professionals Inc. //------------------------------------------------------------ // #include "commonasm.h" // Input registers map #define aPosition v0 #define aNormal v1 #define aTang v2 #define aTexCoord v3 // Output registers map #define vPosition o0 #define vQuaternion o1 #define vView o2 #define vTexCoord o3 #pragma bind_symbol(aPosition.xyz,v0,v0) #pragma bind_symbol(aNormal.xyz,v1,v1) #pragma bind_symbol(aTang.xyz,v2,v2) #pragma bind_symbol(aTexCoord.xy,v3,v3) #pragma bind_symbol(uProjection,c0,c3) #pragma bind_symbol(uModelView,c4,c7) #pragma bind_symbol(uTangentEnabled,b1,b1) #pragma output_map ( position, o0 ) #pragma output_map ( quaternion, o1 ) #pragma output_map ( view, o2 ) #pragma output_map ( texture0, o3.xy ) #pragma output_map ( texture1, o3.zw ) defb b0, true l_position_view_full_quaternion: m4x4 TEMP_VIEW, dmp_position, dmp_local_to_view_matrix m3x3 TEMP_NORM.xyz, dmp_normal, dmp_local_to_view_matrix m3x3 TEMP_TANG.xyz, aTang, dmp_local_to_view_matrix mov dmp_lrView, -TEMP_VIEW m4x4 gl_Position, TEMP_VIEW, dmp_projection_matrix mul r5, TEMP_NORM.yzx, TEMP_TANG.zxy mad r5, -TEMP_TANG.yzx, TEMP_NORM.zxy, r5 // b*|n|^2 is in r5 dp3 r5.w, r5, r5 rsq r5.w, r5.w mul r5, r5, r5.w add r6.w, TEMP_NORM.z, r5.y // m11+m22 in r6.w mul TEMP_TANG, r5.yzx, TEMP_NORM.zxy mad TEMP_TANG, -TEMP_NORM.yzx, r5.zxy, TEMP_TANG add r6.w, TEMP_TANG.x, r6 // m00+m11+m22 in r6.w mov TEMP_TANG.w, r5.z // m21 in tang.w mov r5.z, TEMP_TANG.x // m00 in r5.z add r6.w, CONST_1, r6 // 1+m00+m11+m22 in r6.w mov TEMP_NORM.w, r5.x // m01 in norm.w mov r5.x, TEMP_NORM.z // m22 in r5.x -> m22,m11,m00 in r5 cmp CMP_MODE_GT, CMP_MODE_GT, r6.w, CONST_1__256 mov r6.x, CONST_1 mov r6.y, -CONST_1 // 1,-1,?,1+m00+m11+m22 in r6 jpc 0, 0, COND_MODE_STA0, l_full_quaternion_calc_fallback add r7.xz, TEMP_TANG.wwyy, -TEMP_NORM.yyww // m21-m12,?,m10-m01,? in r7 add r7.y, TEMP_NORM.x, -TEMP_TANG.z // m21-m12,m02-m20,m10-m01,? in r7 mov r7.w, r6 // m21-m12,m02-m20,m10-m01,1+m00+m11+m22 in r7 dp4 r6, r7, r7 rsq r6, r6.x mul dmp_lrQuat, r7, r6 jpb b0, true, l_full_quaternion_calc_end l_full_quaternion_calc_fallback: cmp CMP_MODE_GT, CMP_MODE_GT, r5.zy, r5.yx // comparing m00,m11 with m11,m22 ifc 1, 1, COND_MODE_STA0 ifc 1, 1, COND_MODE_STA1 // if m00>m11 && m11>m22 - m00 is greatest mul r8, TEMP_TANG.yyzw, r6.xxxy // m10,m10,m20,-m21 in r8 add r8.x, CONST_1, -r5.y // 1-m11,m10,m20,-m21 in r8 add r9, r5.z, -r5.x // m00-m22 in r9 add r8.yzw, r8, TEMP_NORM.wwxy // 1-m11,m10+m01,m20+m02,m12-m21 in r8 <==should be m21-m12? add r8.x, r9, r8 // 1+m00-m22-m11,m10+m01,m20+m02,m12-m21 in r8 else cmp CMP_MODE_GT, CMP_MODE_GT, r5.z, r5.x mul r8, TEMP_TANG.yyzw, r6.xxxy // m10,m10,m20,-m21 in r8 add r8.x, CONST_1, -r5.y // 1-m11,m10,m20,-m21 in r8 ifc 1, 1, COND_MODE_STA0 // if m00>m11 && m11<=m22 && m00 > m22 - m00 is greatest add r9, r5.z, -r5.x // m00-m22 in r9 add r8.yzw, r8, TEMP_NORM.wwxy // 1-m11,m10+m01,m20+m02,m12-m21 in r8 <==should be m21-m12? add r8.x, r9, r8 // 1+m00-m22-m11,m10+m01,m20+m02,m12-m21 in r8 else // if m00>m11 && m11<=m22 && m00 <= m22 - m22 is greatest mul r8, TEMP_TANG.zwwy, r6.xxxy // m20,m21,m21,-m10 in r8 add r8.z, CONST_1, -r5.z // m20,m21,1-m00,-m10 in r8 add r9, r5.x, -r5.y // m22-m11 in r9 add r8.xyw, r8, TEMP_NORM.xyyw // m20+m02,m21+m12,1-m00,m01-m10 add r8.z, r9, r8 // m20+m02,m21+m12,1+m22-m11-m00,m01-m10 <= should be m10-m01 endif nop endif mov r8.w, -r8 else ifc 1, 1, COND_MODE_STA1 // if m00<=m11 && m11>m22 - m11 is greatest mul r8, TEMP_TANG.yywz, r6.xxxy // m10,m10,m21,-m20 in r8 add r8.y, CONST_1, -r5.z // m10,1-m00,m21,-m20 in r8 add r9, r5.y, -r5.x // m11-m22 in r9 add r8.xzw, r8, TEMP_NORM.wwyx // m01+m10,1-m00,m12+m21,m02-m20 in r8 add r8.y, r9, r8 // m01+m10,1+m11-m22-m00,m12+m21,m02-m20 in r8 else // if m00<=m11 && m11<=m22 - m22 is greatest mul r8, TEMP_TANG.zwwy, r6.xxxy add r8.z, CONST_1, -r5.z add r9, r5.x, -r5.y add r8.xyw, r8, TEMP_NORM.xyyw add r8.z, r9, r8 // m20+m02,m21+m12,1+m22-m11-m00,m01-m10 <= should be m10-m01 mov r8.w, -r8 endif nop endif dp4 r6, r8, r8 rsq r6, r6.x mul dmp_lrQuat, r8, r6 l_full_quaternion_calc_end: nop ret l_position_view_quaternion: m4x4 TEMP_VIEW, dmp_position, dmp_local_to_view_matrix // m3x3 is not done on purpose to prevent stalls in the following add instructions. dp3 TEMP_NORM.z, dmp_normal, dmp_local_to_view_matrix[2] dp3 TEMP_NORM.x, dmp_normal, dmp_local_to_view_matrix[0] dp3 TEMP_NORM.y, dmp_normal, dmp_local_to_view_matrix[1] mov dmp_lrView, -TEMP_VIEW dp4 gl_Position.x, TEMP_VIEW, dmp_projection_matrix[0] add r4, TEMP_NORM.z, CONST_1 // n.z + 1.0 is in r4 // m4x4 is not done on purpose to prevent stalls in the following mul instructions . dp4 gl_Position.y, TEMP_VIEW, dmp_projection_matrix[1] mul r4, r4, CONST_HALF // 0.5*(n.z + 1.0) is in r4 dp4 gl_Position.z, TEMP_VIEW, dmp_projection_matrix[2] dp4 gl_Position.w, TEMP_VIEW, dmp_projection_matrix[3] cmp LEQ, LEQ, r4.x, CONST_0 // compare 0.5*(n.z + 1.0) with 0.0 mov dmp_lrQuat.w, CONST_0 // zero out quat.w component rsq r4, r4.x // 1/sqrt(0.5*(n.z + 1.0)) is in r4 mul r5, TEMP_NORM, CONST_HALF // 0.5*n is in r5 ifc 0, 1, COND_MODE_STA0 rcp dmp_lrQuat.z, r4.x // sqrt(0.5*(n.z + 1.0)) is in quat.z component mul dmp_lrQuat.xy, r5, r4 // 0.5*n*quat.z is in quat.xy else mov dmp_lrQuat.xyz, CONST_1_0 // zero out quat.w component endif nop ret nop main: ifb b1 call l_position_view_full_quaternion mov vTexCoord, aTexCoord.xyxy else call l_position_view_quaternion mov vTexCoord, CONST_0 endif end endmain: