shairport-sync/FFTConvolver/Utilities.h

353 lines
7.0 KiB
C++

// ==================================================================================
// Copyright (c) 2012 HiFi-LoFi
//
// This is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
// ==================================================================================
#ifndef _FFTCONVOLVER_UTILITIES_H
#define _FFTCONVOLVER_UTILITIES_H
#include <algorithm>
#include <cassert>
#include <cstddef>
#include <cstring>
#include <new>
namespace fftconvolver
{
#if defined(__SSE__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2)
#if !defined(FFTCONVOLVER_USE_SSE) && !defined(FFTCONVOLVER_DONT_USE_SSE)
#define FFTCONVOLVER_USE_SSE
#endif
#endif
#if defined (FFTCONVOLVER_USE_SSE)
#include <xmmintrin.h>
#endif
#if defined(__GNUC__)
#define FFTCONVOLVER_RESTRICT __restrict__
#else
#define FFTCONVOLVER_RESTRICT
#endif
/**
* @brief Returns whether SSE optimization for the convolver is enabled
* @return true: Enabled - false: Disabled
*/
bool SSEEnabled();
/**
* @class Buffer
* @brief Simple buffer implementation (uses 16-byte alignment if SSE optimization is enabled)
*/
template<typename T>
class Buffer
{
public:
explicit Buffer(size_t initialSize = 0) :
_data(0),
_size(0)
{
resize(initialSize);
}
virtual ~Buffer()
{
clear();
}
void clear()
{
deallocate(_data);
_data = 0;
_size = 0;
}
void resize(size_t size)
{
if (_size != size)
{
clear();
if (size > 0)
{
assert(!_data && _size == 0);
_data = allocate(size);
_size = size;
}
}
if (_data)
setZero();
}
size_t size() const
{
return _size;
}
void setZero()
{
::memset(_data, 0, _size * sizeof(T));
}
void copyFrom(const Buffer<T>& other)
{
assert(_size == other._size);
if (this != &other)
{
::memcpy(_data, other._data, _size * sizeof(T));
}
}
T& operator[](size_t index)
{
assert(_data && index < _size);
return _data[index];
}
const T& operator[](size_t index) const
{
assert(_data && index < _size);
return _data[index];
}
operator bool() const
{
return (_data != 0 && _size > 0);
}
T* data()
{
return _data;
}
const T* data() const
{
return _data;
}
static void Swap(Buffer<T>& a, Buffer<T>& b)
{
std::swap(a._data, b._data);
std::swap(a._size, b._size);
}
private:
T* allocate(size_t size)
{
#if defined(FFTCONVOLVER_USE_SSE)
return static_cast<T*>(_mm_malloc(size * sizeof(T), 16));
#else
return new T[size];
#endif
}
void deallocate(T* ptr)
{
#if defined(FFTCONVOLVER_USE_SSE)
_mm_free(ptr);
#else
delete [] ptr;
#endif
}
T* _data;
size_t _size;
// Prevent uncontrolled usage
Buffer(const Buffer&);
Buffer& operator=(const Buffer&);
};
/**
* @brief Type of one sample
*/
typedef float Sample;
/**
* @brief Buffer for samples
*/
typedef Buffer<Sample> SampleBuffer;
/**
* @class SplitComplex
* @brief Buffer for split-complex representation of FFT results
*
* The split-complex representation stores the real and imaginary parts
* of FFT results in two different memory buffers which is useful e.g. for
* SIMD optimizations.
*/
class SplitComplex
{
public:
explicit SplitComplex(size_t initialSize = 0) :
_size(0),
_re(),
_im()
{
resize(initialSize);
}
~SplitComplex()
{
clear();
}
void clear()
{
_re.clear();
_im.clear();
_size = 0;
}
void resize(size_t newSize)
{
_re.resize(newSize);
_im.resize(newSize);
_size = newSize;
}
void setZero()
{
_re.setZero();
_im.setZero();
}
void copyFrom(const SplitComplex& other)
{
_re.copyFrom(other._re);
_im.copyFrom(other._im);
}
Sample* re()
{
return _re.data();
}
const Sample* re() const
{
return _re.data();
}
Sample* im()
{
return _im.data();
}
const Sample* im() const
{
return _im.data();
}
size_t size() const
{
return _size;
}
private:
size_t _size;
SampleBuffer _re;
SampleBuffer _im;
// Prevent uncontrolled usage
SplitComplex(const SplitComplex&);
SplitComplex& operator=(const SplitComplex&);
};
/**
* @brief Returns the next power of 2 of a given number
* @param val The number
* @return The next power of 2
*/
template<typename T>
T NextPowerOf2(const T& val)
{
T nextPowerOf2 = 1;
while (nextPowerOf2 < val)
{
nextPowerOf2 *= 2;
}
return nextPowerOf2;
}
/**
* @brief Sums two given sample arrays
* @param result The result array
* @param a The 1st array
* @param b The 2nd array
* @param len The length of the arrays
*/
void Sum(Sample* FFTCONVOLVER_RESTRICT result,
const Sample* FFTCONVOLVER_RESTRICT a,
const Sample* FFTCONVOLVER_RESTRICT b,
size_t len);
/**
* @brief Copies a source array into a destination buffer and pads the destination buffer with zeros
* @param dest The destination buffer
* @param src The source array
* @param srcSize The size of the source array
*/
template<typename T>
void CopyAndPad(Buffer<T>& dest, const T* src, size_t srcSize)
{
assert(dest.size() >= srcSize);
::memcpy(dest.data(), src, srcSize * sizeof(T));
::memset(dest.data() + srcSize, 0, (dest.size()-srcSize) * sizeof(T));
}
/**
* @brief Adds the complex product of two split-complex buffers to a result buffer
* @param result The result buffer
* @param a The 1st factor of the complex product
* @param b The 2nd factor of the complex product
*/
void ComplexMultiplyAccumulate(SplitComplex& result, const SplitComplex& a, const SplitComplex& b);
/**
* @brief Adds the complex product of two split-complex arrays to a result array
* @param re The real part of the result buffer
* @param im The imaginary part of the result buffer
* @param reA The real part of the 1st factor of the complex product
* @param imA The imaginary part of the 1st factor of the complex product
* @param reB The real part of the 2nd factor of the complex product
* @param imB The imaginary part of the 2nd factor of the complex product
*/
void ComplexMultiplyAccumulate(Sample* FFTCONVOLVER_RESTRICT re,
Sample* FFTCONVOLVER_RESTRICT im,
const Sample* FFTCONVOLVER_RESTRICT reA,
const Sample* FFTCONVOLVER_RESTRICT imA,
const Sample* FFTCONVOLVER_RESTRICT reB,
const Sample* FFTCONVOLVER_RESTRICT imB,
const size_t len);
} // End of namespace fftconvolver
#endif // Header guard