ACE3/extensions/common/glm/gtx/simd_quat.inl

///////////////////////////////////////////////////////////////////////////////////
/// OpenGL Mathematics (glm.g-truc.net)
///
/// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)
/// Permission is hereby granted, free of charge, to any person obtaining a copy
/// of this software and associated documentation files (the "Software"), to deal
/// in the Software without restriction, including without limitation the rights
/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
/// copies of the Software, and to permit persons to whom the Software is
/// furnished to do so, subject to the following conditions:
/// 
/// The above copyright notice and this permission notice shall be included in
/// all copies or substantial portions of the Software.
/// 
/// Restrictions:
///		By making use of the Software for military purposes, you choose to make
///		a Bunny unhappy.
/// 
/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
/// THE SOFTWARE.
///
/// @ref gtx_simd_quat
/// @file glm/gtx/simd_quat.inl
/// @date 2013-04-22 / 2014-11-25
/// @author Christophe Riccio
///////////////////////////////////////////////////////////////////////////////////

namespace glm{
namespace detail{


//////////////////////////////////////
// Debugging
#if 0
void print(__m128 v)
{
    GLM_ALIGN(16) float result[4];
    _mm_store_ps(result, v);

    printf("__m128:    %f %f %f %f\n", result[0], result[1], result[2], result[3]);
}

void print(const fvec4SIMD &v)
{
    printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);
}
#endif


//////////////////////////////////////
// Implicit basic constructors

GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()
#	ifdef GLM_FORCE_NO_CTOR_INIT
		: Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))
#	endif
{}

GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :
	Data(Data)
{}

GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :
	Data(q.Data)
{}


//////////////////////////////////////
// Explicit basic constructors

GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :
	Data(_mm_set_ps(w, z, y, x))
{}

GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :
	Data(_mm_set_ps(q.w, q.z, q.y, q.x))
{}

GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)
{
    vec3 c = glm::cos(eulerAngles * 0.5f);
	vec3 s = glm::sin(eulerAngles * 0.5f);

    Data = _mm_set_ps(
        (c.x * c.y * c.z) + (s.x * s.y * s.z),
        (c.x * c.y * s.z) - (s.x * s.y * c.z),
        (c.x * s.y * c.z) + (s.x * c.y * s.z),
        (s.x * c.y * c.z) - (c.x * s.y * s.z));
}


//////////////////////////////////////
// Unary arithmetic operators

GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)
{
    this->Data = q.Data;
    return *this;
}

GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)
{
	this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));
	return *this;
}

GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)
{
	this->Data = _mm_div_ps(Data, _mm_set1_ps(s));
	return *this;
}


// negate operator
GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)
{
    return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));
}

// operator+
GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)
{
	return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));
}

//operator*
GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)
{
    // SSE2 STATS:
    //    11 shuffle
    //    8  mul
    //    8  add
    
    // SSE4 STATS:
    //    3 shuffle
    //    4 mul
    //    4 dpps

    __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));
    __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));
    __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));
    __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);

#   if((GLM_ARCH & GLM_ARCH_SSE4))
    __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f), 0xff);
    __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f), 0xff);
    __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f), 0xff);
    __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);
#   else
           mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f,  1.0f,  1.0f));
    __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));
           add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));

           mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f,  1.0f,  1.0f, -1.0f));
    __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));
           add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));

           mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f,  1.0f, -1.0f,  1.0f));
    __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));
           add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));

           mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));
    __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));
           add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));
#endif


    // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than
    // the final code below. I'll keep this here for reference - maybe somebody else can do something better...
    //
    //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));
    //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));
    //
    //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));
    
    float x;
    float y;
    float z;
    float w;

    _mm_store_ss(&x, add0);
    _mm_store_ss(&y, add1);
    _mm_store_ss(&z, add2);
    _mm_store_ss(&w, add3);

    return detail::fquatSIMD(w, x, y, z);
}

GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)
{
    static const __m128 two = _mm_set1_ps(2.0f);

    __m128 q_wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
    __m128 q_swp0  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));
	__m128 q_swp1  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
	__m128 v_swp0  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));
	__m128 v_swp1  = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));
	
	__m128 uv      = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));
    __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));
    __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));
    __m128 uuv     = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));

    
    uv  = _mm_mul_ps(uv,  _mm_mul_ps(q_wwww, two));
    uuv = _mm_mul_ps(uuv, two);

    return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));
}

GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)
{
	return glm::inverse(q) * v;
}

GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)
{
	return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));
}

GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)
{
	return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));
}


//operator/
GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)
{
	return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));
}


}//namespace detail


GLM_FUNC_QUALIFIER quat quat_cast
(
	detail::fquatSIMD const & x
)
{
	GLM_ALIGN(16) quat Result;
	_mm_store_ps(&Result[0], x.Data);

	return Result;
}

template <typename T>
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])
{
    T trace = m0[0] + m1[1] + m2[2] + T(1.0);
    if (trace > T(0))
    {
        T s = static_cast<T>(0.5) / sqrt(trace);

        return _mm_set_ps(
            static_cast<float>(T(0.25) / s),
            static_cast<float>((m0[1] - m1[0]) * s),
            static_cast<float>((m2[0] - m0[2]) * s),
            static_cast<float>((m1[2] - m2[1]) * s));
    }
    else
    {
        if (m0[0] > m1[1])
        {
            if (m0[0] > m2[2])
            {
                // X is biggest.
                T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);

                return _mm_set_ps(
                    static_cast<float>((m1[2] - m2[1]) * s),
                    static_cast<float>((m2[0] + m0[2]) * s),
                    static_cast<float>((m0[1] + m1[0]) * s),
                    static_cast<float>(T(0.5)          * s));
            }
        }
        else
        {
            if (m1[1] > m2[2])
            {
                // Y is biggest.
                T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);

                return _mm_set_ps(
                    static_cast<float>((m2[0] - m0[2]) * s),
                    static_cast<float>((m1[2] + m2[1]) * s),
                    static_cast<float>(T(0.5)          * s),
                    static_cast<float>((m0[1] + m1[0]) * s));
            }
        }

        // Z is biggest.
        T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);

        return _mm_set_ps(
            static_cast<float>((m0[1] - m1[0]) * s),
            static_cast<float>(T(0.5)          * s),
            static_cast<float>((m1[2] + m2[1]) * s),
            static_cast<float>((m2[0] + m0[2]) * s));
    }
}

GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
(
	detail::fmat4x4SIMD const & m
)
{
    // Scalar implementation for now.
    GLM_ALIGN(16) float m0[4];
    GLM_ALIGN(16) float m1[4];
    GLM_ALIGN(16) float m2[4];

    _mm_store_ps(m0, m[0].Data);
    _mm_store_ps(m1, m[1].Data);
    _mm_store_ps(m2, m[2].Data);

    return quatSIMD_cast_impl(m0, m1, m2);
}

template <typename T, precision P>
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
(
    tmat4x4<T, P> const & m
)
{
    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
}

template <typename T, precision P>
GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast
(
    tmat3x3<T, P> const & m
)
{
    return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);
}


GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast
(
	detail::fquatSIMD const & q
)
{
    detail::fmat4x4SIMD result;

    __m128 _wwww  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));
    __m128 _xyzw  = q.Data;
    __m128 _zxyw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));
    __m128 _yzxw  = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));

    __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);
    __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));
    __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));
    
    __m128 _tmp0  = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));
           _tmp0  = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));

    __m128 _tmp1  = _mm_mul_ps(_yzxw2, _xyzw);
           _tmp1  = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));

    __m128 _tmp2  = _mm_mul_ps(_zxyw2, _xyzw);
           _tmp2  = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));


    // There's probably a better, more politically correct way of doing this...
    result[0].Data = _mm_set_ps(
        0.0f,
        reinterpret_cast<float*>(&_tmp2)[0],
        reinterpret_cast<float*>(&_tmp1)[0],
        reinterpret_cast<float*>(&_tmp0)[0]);

    result[1].Data = _mm_set_ps(
        0.0f,
        reinterpret_cast<float*>(&_tmp1)[1],
        reinterpret_cast<float*>(&_tmp0)[1],
        reinterpret_cast<float*>(&_tmp2)[1]);

    result[2].Data = _mm_set_ps(
        0.0f,
        reinterpret_cast<float*>(&_tmp0)[2],
        reinterpret_cast<float*>(&_tmp2)[2],
        reinterpret_cast<float*>(&_tmp1)[2]);

   result[3].Data = _mm_set_ps(
        1.0f,
        0.0f,
        0.0f,
        0.0f);


    return result;
}

GLM_FUNC_QUALIFIER mat4 mat4_cast
(
	detail::fquatSIMD const & q
)
{
    return mat4_cast(mat4SIMD_cast(q));
}


GLM_FUNC_QUALIFIER float length
(
	detail::fquatSIMD const & q
)
{
    return glm::sqrt(dot(q, q));
}

GLM_FUNC_QUALIFIER detail::fquatSIMD normalize
(
	detail::fquatSIMD const & q
)
{
    return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));
}

GLM_FUNC_QUALIFIER float dot
(
	detail::fquatSIMD const & q1,
	detail::fquatSIMD const & q2
)
{
    float result;
    _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));

    return result;
}

GLM_FUNC_QUALIFIER detail::fquatSIMD mix
(
	detail::fquatSIMD const & x, 
	detail::fquatSIMD const & y, 
	float const & a
)
{
	float cosTheta = dot(x, y);

    if (cosTheta > 1.0f - glm::epsilon<float>())
    {
	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    }
    else
    {
        float angle = glm::acos(cosTheta);
        
        
        float s0 = glm::sin((1.0f - a) * angle);
        float s1 = glm::sin(a * angle);
        float d  = 1.0f / glm::sin(angle);

        return (s0 * x + s1 * y) * d;
    }
}

GLM_FUNC_QUALIFIER detail::fquatSIMD lerp
(
	detail::fquatSIMD const & x, 
	detail::fquatSIMD const & y, 
	float const & a
)
{
	// Lerp is only defined in [0, 1]
	assert(a >= 0.0f);
	assert(a <= 1.0f);

    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
}

GLM_FUNC_QUALIFIER detail::fquatSIMD slerp
(
	detail::fquatSIMD const & x, 
	detail::fquatSIMD const & y, 
	float const & a
)
{
	detail::fquatSIMD z = y;

	float cosTheta = dot(x, y);

	// If cosTheta < 0, the interpolation will take the long way around the sphere. 
	// To fix this, one quat must be negated.
	if (cosTheta < 0.0f)
	{
		z        = -y;
		cosTheta = -cosTheta;
	}

	// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator
	if(cosTheta > 1.0f - epsilon<float>())
	{
		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
	}
	else
	{
        float angle = glm::acos(cosTheta);


		float s0 = glm::sin((1.0f - a) * angle);
        float s1 = glm::sin(a * angle);
        float d  = 1.0f / glm::sin(angle);

        return (s0 * x + s1 * y) * d;
	}
}


GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix
(
	detail::fquatSIMD const & x, 
	detail::fquatSIMD const & y, 
	float const & a
)
{
	float cosTheta = dot(x, y);

    if (cosTheta > 1.0f - glm::epsilon<float>())
    {
	    return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
    }
    else
    {
        float angle = glm::fastAcos(cosTheta);


        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));

        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
        
        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
    }
}

GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp
(
	detail::fquatSIMD const & x, 
	detail::fquatSIMD const & y, 
	float const & a
)
{
	detail::fquatSIMD z = y;

	float cosTheta = dot(x, y);
	if (cosTheta < 0.0f)
	{
		z        = -y;
		cosTheta = -cosTheta;
	}


	if(cosTheta > 1.0f - epsilon<float>())
	{
		return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));
	}
	else
	{
        float angle = glm::fastAcos(cosTheta);


        __m128 s  = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));

        __m128 s0 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));
        __m128 s1 =                               _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));
        __m128 d  = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));
        
        return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);
	}
}


GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate
(
	detail::fquatSIMD const & q
)
{
	return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));
}

GLM_FUNC_QUALIFIER detail::fquatSIMD inverse
(
	detail::fquatSIMD const & q
)
{
	return conjugate(q) / dot(q, q);
}


GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
(
	float const & angle,
	vec3 const & v
)
{
	float s = glm::sin(angle * 0.5f);

	return _mm_set_ps(
		glm::cos(angle * 0.5f),
		v.z * s,
		v.y * s,
		v.x * s);
}

GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD
(
	float const & angle, 
	float const & x, 
	float const & y, 
	float const & z
)
{
	return angleAxisSIMD(angle, vec3(x, y, z));
}


GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)
{
	static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);
	static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);
	static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);

	__m128 x3 = _mm_mul_ps(x,  _mm_mul_ps(x, x));
	__m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));
	__m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));

	__m128 y0 = _mm_mul_ps(x3, c0);
	__m128 y1 = _mm_mul_ps(x5, c1);
	__m128 y2 = _mm_mul_ps(x7, c2);

	return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);
}


}//namespace glm
Normalize to LF line endings 2016-05-30 16:37:03 +00:00			`///////////////////////////////////////////////////////////////////////////////////`
			`/// OpenGL Mathematics (glm.g-truc.net)`
			`///`
			`/// Copyright (c) 2005 - 2015 G-Truc Creation (www.g-truc.net)`
			`/// Permission is hereby granted, free of charge, to any person obtaining a copy`
			`/// of this software and associated documentation files (the "Software"), to deal`
			`/// in the Software without restriction, including without limitation the rights`
			`/// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell`
			`/// copies of the Software, and to permit persons to whom the Software is`
			`/// furnished to do so, subject to the following conditions:`
			`///`
			`/// The above copyright notice and this permission notice shall be included in`
			`/// all copies or substantial portions of the Software.`
			`///`
			`/// Restrictions:`
			`/// By making use of the Software for military purposes, you choose to make`
			`/// a Bunny unhappy.`
			`///`
			`/// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR`
			`/// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,`
			`/// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE`
			`/// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER`
			`/// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,`
			`/// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN`
			`/// THE SOFTWARE.`
			`///`
			`/// @ref gtx_simd_quat`
			`/// @file glm/gtx/simd_quat.inl`
			`/// @date 2013-04-22 / 2014-11-25`
			`/// @author Christophe Riccio`
			`///////////////////////////////////////////////////////////////////////////////////`

			`namespace glm{`
			`namespace detail{`


			`//////////////////////////////////////`
			`// Debugging`
			`#if 0`
			`void print(__m128 v)`
			`{`
			`GLM_ALIGN(16) float result[4];`
			`_mm_store_ps(result, v);`

			`printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]);`
			`}`

			`void print(const fvec4SIMD &v)`
			`{`
			`printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w);`
			`}`
			`#endif`


			`//////////////////////////////////////`
			`// Implicit basic constructors`

			`GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD()`
			`# ifdef GLM_FORCE_NO_CTOR_INIT`
			`: Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f))`
			`# endif`
			`{}`

			`GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) :`
			`Data(Data)`
			`{}`

			`GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) :`
			`Data(q.Data)`
			`{}`


			`//////////////////////////////////////`
			`// Explicit basic constructors`

			`GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) :`
			`Data(_mm_set_ps(w, z, y, x))`
			`{}`

			`GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) :`
			`Data(_mm_set_ps(q.w, q.z, q.y, q.x))`
			`{}`

			`GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles)`
			`{`
			`vec3 c = glm::cos(eulerAngles * 0.5f);`
			`vec3 s = glm::sin(eulerAngles * 0.5f);`

			`Data = _mm_set_ps(`
			`(c.x * c.y * c.z) + (s.x * s.y * s.z),`
			`(c.x * c.y * s.z) - (s.x * s.y * c.z),`
			`(c.x * s.y * c.z) + (s.x * c.y * s.z),`
			`(s.x * c.y * c.z) - (c.x * s.y * s.z));`
			`}`


			`//////////////////////////////////////`
			`// Unary arithmetic operators`

			`GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q)`
			`{`
			`this->Data = q.Data;`
			`return *this;`
			`}`

			`GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s)`
			`{`
			`this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s));`
			`return *this;`
			`}`

			`GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s)`
			`{`
			`this->Data = _mm_div_ps(Data, _mm_set1_ps(s));`
			`return *this;`
			`}`



			`// negate operator`
			`GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q)`
			`{`
			`return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f)));`
			`}`

			`// operator+`
			`GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2)`
			`{`
			`return fquatSIMD(_mm_add_ps(q1.Data, q2.Data));`
			`}`

			`//operator*`
			`GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2)`
			`{`
			`// SSE2 STATS:`
			`// 11 shuffle`
			`// 8 mul`
			`// 8 add`

			`// SSE4 STATS:`
			`// 3 shuffle`
			`// 4 mul`
			`// 4 dpps`

			`__m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3)));`
			`__m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2)));`
			`__m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1)));`
			`__m128 mul3 = _mm_mul_ps(q1.Data, q2.Data);`

			`# if((GLM_ARCH & GLM_ARCH_SSE4))`
			`__m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff);`
			`__m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff);`
			`__m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff);`
			`__m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff);`
			`# else`
			`mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f));`
			`__m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0));`
			`add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1));`

			`mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f));`
			`__m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1));`
			`add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1));`

			`mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f));`
			`__m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2));`
			`add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1));`

			`mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f));`
			`__m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3));`
			`add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1));`
			`#endif`


			`// This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than`
			`// the final code below. I'll keep this here for reference - maybe somebody else can do something better...`
			`//`
			`//__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0));`
			`//__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0));`
			`//`
			`//return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0));`

			`float x;`
			`float y;`
			`float z;`
			`float w;`

			`_mm_store_ss(&x, add0);`
			`_mm_store_ss(&y, add1);`
			`_mm_store_ss(&z, add2);`
			`_mm_store_ss(&w, add3);`

			`return detail::fquatSIMD(w, x, y, z);`
			`}`

			`GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v)`
			`{`
			`static const __m128 two = _mm_set1_ps(2.0f);`

			`__m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));`
			`__m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));`
			`__m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));`
			`__m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1));`
			`__m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2));`

			`__m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0));`
			`__m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1));`
			`__m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2));`
			`__m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0));`


			`uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two));`
			`uuv = _mm_mul_ps(uuv, two);`

			`return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv));`
			`}`

			`GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q)`
			`{`
			`return glm::inverse(q) * v;`
			`}`

			`GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s)`
			`{`
			`return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s)));`
			`}`

			`GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q)`
			`{`
			`return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data));`
			`}`


			`//operator/`
			`GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s)`
			`{`
			`return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s)));`
			`}`


			`}//namespace detail`


			`GLM_FUNC_QUALIFIER quat quat_cast`
			`(`
			`detail::fquatSIMD const & x`
			`)`
			`{`
			`GLM_ALIGN(16) quat Result;`
			`_mm_store_ps(&Result[0], x.Data);`

			`return Result;`
			`}`

			`template <typename T>`
			`GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[])`
			`{`
			`T trace = m0[0] + m1[1] + m2[2] + T(1.0);`
			`if (trace > T(0))`
			`{`
			`T s = static_cast<T>(0.5) / sqrt(trace);`

			`return _mm_set_ps(`
			`static_cast<float>(T(0.25) / s),`
			`static_cast<float>((m0[1] - m1[0]) * s),`
			`static_cast<float>((m2[0] - m0[2]) * s),`
			`static_cast<float>((m1[2] - m2[1]) * s));`
			`}`
			`else`
			`{`
			`if (m0[0] > m1[1])`
			`{`
			`if (m0[0] > m2[2])`
			`{`
			`// X is biggest.`
			`T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5);`

			`return _mm_set_ps(`
			`static_cast<float>((m1[2] - m2[1]) * s),`
			`static_cast<float>((m2[0] + m0[2]) * s),`
			`static_cast<float>((m0[1] + m1[0]) * s),`
			`static_cast<float>(T(0.5) * s));`
			`}`
			`}`
			`else`
			`{`
			`if (m1[1] > m2[2])`
			`{`
			`// Y is biggest.`
			`T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5);`

			`return _mm_set_ps(`
			`static_cast<float>((m2[0] - m0[2]) * s),`
			`static_cast<float>((m1[2] + m2[1]) * s),`
			`static_cast<float>(T(0.5) * s),`
			`static_cast<float>((m0[1] + m1[0]) * s));`
			`}`
			`}`

			`// Z is biggest.`
			`T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5);`

			`return _mm_set_ps(`
			`static_cast<float>((m0[1] - m1[0]) * s),`
			`static_cast<float>(T(0.5) * s),`
			`static_cast<float>((m1[2] + m2[1]) * s),`
			`static_cast<float>((m2[0] + m0[2]) * s));`
			`}`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast`
			`(`
			`detail::fmat4x4SIMD const & m`
			`)`
			`{`
			`// Scalar implementation for now.`
			`GLM_ALIGN(16) float m0[4];`
			`GLM_ALIGN(16) float m1[4];`
			`GLM_ALIGN(16) float m2[4];`

			`_mm_store_ps(m0, m[0].Data);`
			`_mm_store_ps(m1, m[1].Data);`
			`_mm_store_ps(m2, m[2].Data);`

			`return quatSIMD_cast_impl(m0, m1, m2);`
			`}`

			`template <typename T, precision P>`
			`GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast`
			`(`
			`tmat4x4<T, P> const & m`
			`)`
			`{`
			`return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);`
			`}`

			`template <typename T, precision P>`
			`GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast`
			`(`
			`tmat3x3<T, P> const & m`
			`)`
			`{`
			`return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]);`
			`}`


			`GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast`
			`(`
			`detail::fquatSIMD const & q`
			`)`
			`{`
			`detail::fmat4x4SIMD result;`

			`__m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3));`
			`__m128 _xyzw = q.Data;`
			`__m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2));`
			`__m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1));`

			`__m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw);`
			`__m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2));`
			`__m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1));`

			`__m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw));`
			`_tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw));`

			`__m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw);`
			`_tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww));`

			`__m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw);`
			`_tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww));`


			`// There's probably a better, more politically correct way of doing this...`
			`result[0].Data = _mm_set_ps(`
			`0.0f,`
			`reinterpret_cast<float*>(&_tmp2)[0],`
			`reinterpret_cast<float*>(&_tmp1)[0],`
			`reinterpret_cast<float*>(&_tmp0)[0]);`

			`result[1].Data = _mm_set_ps(`
			`0.0f,`
			`reinterpret_cast<float*>(&_tmp1)[1],`
			`reinterpret_cast<float*>(&_tmp0)[1],`
			`reinterpret_cast<float*>(&_tmp2)[1]);`

			`result[2].Data = _mm_set_ps(`
			`0.0f,`
			`reinterpret_cast<float*>(&_tmp0)[2],`
			`reinterpret_cast<float*>(&_tmp2)[2],`
			`reinterpret_cast<float*>(&_tmp1)[2]);`

			`result[3].Data = _mm_set_ps(`
			`1.0f,`
			`0.0f,`
			`0.0f,`
			`0.0f);`


			`return result;`
			`}`

			`GLM_FUNC_QUALIFIER mat4 mat4_cast`
			`(`
			`detail::fquatSIMD const & q`
			`)`
			`{`
			`return mat4_cast(mat4SIMD_cast(q));`
			`}`



			`GLM_FUNC_QUALIFIER float length`
			`(`
			`detail::fquatSIMD const & q`
			`)`
			`{`
			`return glm::sqrt(dot(q, q));`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD normalize`
			`(`
			`detail::fquatSIMD const & q`
			`)`
			`{`
			`return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q)));`
			`}`

			`GLM_FUNC_QUALIFIER float dot`
			`(`
			`detail::fquatSIMD const & q1,`
			`detail::fquatSIMD const & q2`
			`)`
			`{`
			`float result;`
			`_mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data));`

			`return result;`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD mix`
			`(`
			`detail::fquatSIMD const & x,`
			`detail::fquatSIMD const & y,`
			`float const & a`
			`)`
			`{`
			`float cosTheta = dot(x, y);`

			`if (cosTheta > 1.0f - glm::epsilon<float>())`
			`{`
			`return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));`
			`}`
			`else`
			`{`
			`float angle = glm::acos(cosTheta);`


			`float s0 = glm::sin((1.0f - a) * angle);`
			`float s1 = glm::sin(a * angle);`
			`float d = 1.0f / glm::sin(angle);`

			`return (s0 * x + s1 * y) * d;`
			`}`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD lerp`
			`(`
			`detail::fquatSIMD const & x,`
			`detail::fquatSIMD const & y,`
			`float const & a`
			`)`
			`{`
			`// Lerp is only defined in [0, 1]`
			`assert(a >= 0.0f);`
			`assert(a <= 1.0f);`

			`return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD slerp`
			`(`
			`detail::fquatSIMD const & x,`
			`detail::fquatSIMD const & y,`
			`float const & a`
			`)`
			`{`
			`detail::fquatSIMD z = y;`

			`float cosTheta = dot(x, y);`

			`// If cosTheta < 0, the interpolation will take the long way around the sphere.`
			`// To fix this, one quat must be negated.`
			`if (cosTheta < 0.0f)`
			`{`
			`z = -y;`
			`cosTheta = -cosTheta;`
			`}`

			`// Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator`
			`if(cosTheta > 1.0f - epsilon<float>())`
			`{`
			`return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));`
			`}`
			`else`
			`{`
			`float angle = glm::acos(cosTheta);`


			`float s0 = glm::sin((1.0f - a) * angle);`
			`float s1 = glm::sin(a * angle);`
			`float d = 1.0f / glm::sin(angle);`

			`return (s0 * x + s1 * y) * d;`
			`}`
			`}`


			`GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix`
			`(`
			`detail::fquatSIMD const & x,`
			`detail::fquatSIMD const & y,`
			`float const & a`
			`)`
			`{`
			`float cosTheta = dot(x, y);`

			`if (cosTheta > 1.0f - glm::epsilon<float>())`
			`{`
			`return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));`
			`}`
			`else`
			`{`
			`float angle = glm::fastAcos(cosTheta);`


			`__m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));`

			`__m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));`
			`__m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));`
			`__m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));`

			`return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);`
			`}`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp`
			`(`
			`detail::fquatSIMD const & x,`
			`detail::fquatSIMD const & y,`
			`float const & a`
			`)`
			`{`
			`detail::fquatSIMD z = y;`

			`float cosTheta = dot(x, y);`
			`if (cosTheta < 0.0f)`
			`{`
			`z = -y;`
			`cosTheta = -cosTheta;`
			`}`


			`if(cosTheta > 1.0f - epsilon<float>())`
			`{`
			`return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data)));`
			`}`
			`else`
			`{`
			`float angle = glm::fastAcos(cosTheta);`


			`__m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f));`

			`__m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3));`
			`__m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2));`
			`__m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1)));`

			`return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d);`
			`}`
			`}`



			`GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate`
			`(`
			`detail::fquatSIMD const & q`
			`)`
			`{`
			`return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)));`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD inverse`
			`(`
			`detail::fquatSIMD const & q`
			`)`
			`{`
			`return conjugate(q) / dot(q, q);`
			`}`


			`GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD`
			`(`
			`float const & angle,`
			`vec3 const & v`
			`)`
			`{`
			`float s = glm::sin(angle * 0.5f);`

			`return _mm_set_ps(`
			`glm::cos(angle * 0.5f),`
			`v.z * s,`
			`v.y * s,`
			`v.x * s);`
			`}`

			`GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD`
			`(`
			`float const & angle,`
			`float const & x,`
			`float const & y,`
			`float const & z`
			`)`
			`{`
			`return angleAxisSIMD(angle, vec3(x, y, z));`
			`}`


			`GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x)`
			`{`
			`static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f);`
			`static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f);`
			`static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f);`

			`__m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x));`
			`__m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x));`
			`__m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x));`

			`__m128 y0 = _mm_mul_ps(x3, c0);`
			`__m128 y1 = _mm_mul_ps(x5, c1);`
			`__m128 y2 = _mm_mul_ps(x7, c2);`

			`return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2);`
			`}`


			`}//namespace glm`