114 lines
3.6 KiB
C++
114 lines
3.6 KiB
C++
// ==================================================================================
|
|
// Copyright (c) 2012 HiFi-LoFi
|
|
//
|
|
// This is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
// ==================================================================================
|
|
|
|
#include "Utilities.h"
|
|
|
|
|
|
namespace fftconvolver
|
|
{
|
|
|
|
bool SSEEnabled()
|
|
{
|
|
#if defined(FFTCONVOLVER_USE_SSE)
|
|
return true;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
|
|
void Sum(Sample* FFTCONVOLVER_RESTRICT result,
|
|
const Sample* FFTCONVOLVER_RESTRICT a,
|
|
const Sample* FFTCONVOLVER_RESTRICT b,
|
|
size_t len)
|
|
{
|
|
const size_t end4 = 4 * (len / 4);
|
|
for (size_t i=0; i<end4; i+=4)
|
|
{
|
|
result[i+0] = a[i+0] + b[i+0];
|
|
result[i+1] = a[i+1] + b[i+1];
|
|
result[i+2] = a[i+2] + b[i+2];
|
|
result[i+3] = a[i+3] + b[i+3];
|
|
}
|
|
for (size_t i=end4; i<len; ++i)
|
|
{
|
|
result[i] = a[i] + b[i];
|
|
}
|
|
}
|
|
|
|
|
|
void ComplexMultiplyAccumulate(SplitComplex& result, const SplitComplex& a, const SplitComplex& b)
|
|
{
|
|
assert(result.size() == a.size());
|
|
assert(result.size() == b.size());
|
|
ComplexMultiplyAccumulate(result.re(), result.im(), a.re(), a.im(), b.re(), b.im(), result.size());
|
|
}
|
|
|
|
|
|
void ComplexMultiplyAccumulate(Sample* FFTCONVOLVER_RESTRICT re,
|
|
Sample* FFTCONVOLVER_RESTRICT im,
|
|
const Sample* FFTCONVOLVER_RESTRICT reA,
|
|
const Sample* FFTCONVOLVER_RESTRICT imA,
|
|
const Sample* FFTCONVOLVER_RESTRICT reB,
|
|
const Sample* FFTCONVOLVER_RESTRICT imB,
|
|
const size_t len)
|
|
{
|
|
#if defined(FFTCONVOLVER_USE_SSE)
|
|
const size_t end4 = 4 * (len / 4);
|
|
for (size_t i=0; i<end4; i+=4)
|
|
{
|
|
const __m128 ra = _mm_load_ps(&reA[i]);
|
|
const __m128 rb = _mm_load_ps(&reB[i]);
|
|
const __m128 ia = _mm_load_ps(&imA[i]);
|
|
const __m128 ib = _mm_load_ps(&imB[i]);
|
|
__m128 real = _mm_load_ps(&re[i]);
|
|
__m128 imag = _mm_load_ps(&im[i]);
|
|
real = _mm_add_ps(real, _mm_mul_ps(ra, rb));
|
|
real = _mm_sub_ps(real, _mm_mul_ps(ia, ib));
|
|
_mm_store_ps(&re[i], real);
|
|
imag = _mm_add_ps(imag, _mm_mul_ps(ra, ib));
|
|
imag = _mm_add_ps(imag, _mm_mul_ps(ia, rb));
|
|
_mm_store_ps(&im[i], imag);
|
|
}
|
|
for (size_t i=end4; i<len; ++i)
|
|
{
|
|
re[i] += reA[i] * reB[i] - imA[i] * imB[i];
|
|
im[i] += reA[i] * imB[i] + imA[i] * reB[i];
|
|
}
|
|
#else
|
|
const size_t end4 = 4 * (len / 4);
|
|
for (size_t i=0; i<end4; i+=4)
|
|
{
|
|
re[i+0] += reA[i+0] * reB[i+0] - imA[i+0] * imB[i+0];
|
|
re[i+1] += reA[i+1] * reB[i+1] - imA[i+1] * imB[i+1];
|
|
re[i+2] += reA[i+2] * reB[i+2] - imA[i+2] * imB[i+2];
|
|
re[i+3] += reA[i+3] * reB[i+3] - imA[i+3] * imB[i+3];
|
|
im[i+0] += reA[i+0] * imB[i+0] + imA[i+0] * reB[i+0];
|
|
im[i+1] += reA[i+1] * imB[i+1] + imA[i+1] * reB[i+1];
|
|
im[i+2] += reA[i+2] * imB[i+2] + imA[i+2] * reB[i+2];
|
|
im[i+3] += reA[i+3] * imB[i+3] + imA[i+3] * reB[i+3];
|
|
}
|
|
for (size_t i=end4; i<len; ++i)
|
|
{
|
|
re[i] += reA[i] * reB[i] - imA[i] * imB[i];
|
|
im[i] += reA[i] * imB[i] + imA[i] * reB[i];
|
|
}
|
|
#endif
|
|
}
|
|
|
|
} // End of namespace fftconvolver
|