sse_optimized.cpp | sse_optimized.cpp | |||
---|---|---|---|---|
skipping to change at line 26 | skipping to change at line 26 | |||
/// | /// | |||
/// If the above URL is expired or removed, go to "http://msdn.microsoft.co m" and | /// If the above URL is expired or removed, go to "http://msdn.microsoft.co m" and | |||
/// perform a search with keywords "processor pack". | /// perform a search with keywords "processor pack". | |||
/// | /// | |||
/// Author : Copyright (c) Olli Parviainen | /// Author : Copyright (c) Olli Parviainen | |||
/// Author e-mail : oparviai 'at' iki.fi | /// Author e-mail : oparviai 'at' iki.fi | |||
/// SoundTouch WWW: http://www.surina.net/soundtouch | /// SoundTouch WWW: http://www.surina.net/soundtouch | |||
/// | /// | |||
/////////////////////////////////////////////////////////////////////////// ///// | /////////////////////////////////////////////////////////////////////////// ///// | |||
// | // | |||
// Last changed : $Date: 2015-02-21 21:24:29 +0000 (Sat, 21 Feb 2015) $ | // Last changed : $Date: 2015-08-09 00:00:15 +0300 (Sun, 09 Aug 2015) $ | |||
// File revision : $Revision: 4 $ | // File revision : $Revision: 4 $ | |||
// | // | |||
// $Id: sse_optimized.cpp 202 2015-02-21 21:24:29Z oparviai $ | // $Id: sse_optimized.cpp 226 2015-08-08 21:00:15Z oparviai $ | |||
// | // | |||
/////////////////////////////////////////////////////////////////////////// ///// | /////////////////////////////////////////////////////////////////////////// ///// | |||
// | // | |||
// License : | // License : | |||
// | // | |||
// SoundTouch audio processing library | // SoundTouch audio processing library | |||
// Copyright (c) Olli Parviainen | // Copyright (c) Olli Parviainen | |||
// | // | |||
// This library is free software; you can redistribute it and/or | // This library is free software; you can redistribute it and/or | |||
// modify it under the terms of the GNU Lesser General Public | // modify it under the terms of the GNU Lesser General Public | |||
skipping to change at line 74 | skipping to change at line 74 | |||
// | // | |||
// implementation of SSE optimized functions of class 'TDStretchSSE' | // implementation of SSE optimized functions of class 'TDStretchSSE' | |||
// | // | |||
/////////////////////////////////////////////////////////////////////////// /// | /////////////////////////////////////////////////////////////////////////// /// | |||
#include "TDStretch.h" | #include "TDStretch.h" | |||
#include <xmmintrin.h> | #include <xmmintrin.h> | |||
#include <math.h> | #include <math.h> | |||
// Calculates cross correlation of two buffers | // Calculates cross correlation of two buffers | |||
double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, doub le &anorm) const | double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2, doub le &anorm) | |||
{ | { | |||
int i; | int i; | |||
const float *pVec1; | const float *pVec1; | |||
const __m128 *pVec2; | const __m128 *pVec2; | |||
__m128 vSum, vNorm; | __m128 vSum, vNorm; | |||
// Note. It means a major slow-down if the routine needs to tolerate | // Note. It means a major slow-down if the routine needs to tolerate | |||
// unaligned __m128 memory accesses. It's way faster if we can skip | // unaligned __m128 memory accesses. It's way faster if we can skip | |||
// unaligned slots and use _mm_load_ps instruction instead of _mm_loadu _ps. | // unaligned slots and use _mm_load_ps instruction instead of _mm_loadu _ps. | |||
// This can mean up to ~ 10-fold difference (incl. part of which is | // This can mean up to ~ 10-fold difference (incl. part of which is | |||
skipping to change at line 184 | skipping to change at line 184 | |||
for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j]; | for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j]; | |||
pV1 += 16; | pV1 += 16; | |||
pV2 += 16; | pV2 += 16; | |||
} | } | |||
return corr / sqrt(norm); | return corr / sqrt(norm); | |||
*/ | */ | |||
} | } | |||
double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm) const | double TDStretchSSE::calcCrossCorrAccumulate(const float *pV1, const float *pV2, double &norm) | |||
{ | { | |||
// call usual calcCrossCorr function because SSE does not show big bene fit of | // call usual calcCrossCorr function because SSE does not show big bene fit of | |||
// accumulating "norm" value, and also the "norm" rolling algorithm wou ld get | // accumulating "norm" value, and also the "norm" rolling algorithm wou ld get | |||
// complicated due to SSE-specific alignment-vs-nonexact correlation ru les. | // complicated due to SSE-specific alignment-vs-nonexact correlation ru les. | |||
return calcCrossCorr(pV1, pV2, norm); | return calcCrossCorr(pV1, pV2, norm); | |||
} | } | |||
/////////////////////////////////////////////////////////////////////////// /// | /////////////////////////////////////////////////////////////////////////// /// | |||
// | // | |||
// implementation of SSE optimized functions of class 'FIRFilter' | // implementation of SSE optimized functions of class 'FIRFilter' | |||
End of changes. 4 change blocks. | ||||
4 lines changed or deleted | 4 lines changed or added | |||
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ |