Libsimdpp
Header-only zero-overhead C++ wrapper for SIMD intrinsics of multiple instruction sets.
Example
/**
* @file AccelRaw.simd
* @brief AccelRaw class implementation.
* @author user
* @date 2018-01-08
*/
#include <libtbag/gpu/accel/AccelRaw.h>
#include <libtbag/algorithm/Pack.hpp>
#include <simdpp/simd.h>
#include <simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h>
#include <simdpp/dispatch/get_arch_linux_cpuinfo.h>
#include <simdpp/dispatch/get_arch_raw_cpuid.h>
#include <type_traits>
#if SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS == 1
# define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_gcc_builtin_cpu_supports()
#elif SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO == 1
# define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_linux_cpuinfo()
#elif SIMDPP_HAS_GET_ARCH_RAW_CPUID == 1
# define SIMDPP_USER_ARCH_INFO ::simdpp::get_arch_raw_cpuid()
#endif
// ------------------------------
namespace SIMDPP_ARCH_NAMESPACE {
// ------------------------------
// ---------------
namespace __impl {
// ---------------
template <typename BaseType, typename SimdType>
struct BaseDataPack
{
using Base = BaseType;
using Simd = SimdType;
using Element = typename Simd::element_type;
static_assert(std::is_same<typename std::remove_const<Base>::type , Element>::value,
"Incorrect element type");
TBAG_CONSTEXPR static unsigned getElementCount() TBAG_NOEXCEPT { return Simd::length; }
TBAG_CONSTEXPR static unsigned getBaseSize() TBAG_NOEXCEPT { return sizeof(Base); }
inline static int calcVectorSize(int count) TBAG_NOEXCEPT
{ return libtbag::algorithm::getPackedSize<int>(count, getElementCount()); }
Base * data;
Simd simd;
explicit BaseDataPack(Base * d) : data(d), simd(simdpp::make_zero<Simd>())
{ /* EMPTY. */ }
explicit BaseDataPack(Base const * d) : BaseDataPack(const_cast<Base*>(d))
{ /* EMPTY. */ }
~BaseDataPack()
{ /* EMPTY. */ }
inline void operator ++()
{ data += getElementCount(); }
inline void store()
{ simdpp::store(data, simd); }
};
template <typename BaseType> struct DataPack;
template <> struct DataPack<int> : public BaseDataPack<int, simdpp::int32x8>
{
DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
~DataPack() { /* EMPTY. */ }
inline void load() { simd = simdpp::make_int<Simd>(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); }
};
template <> struct DataPack<unsigned> : public BaseDataPack<unsigned, simdpp::uint32x8>
{
DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
~DataPack() { /* EMPTY. */ }
inline void load() { simd = simdpp::make_uint<Simd>(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); }
};
template <> struct DataPack<float> : public BaseDataPack<float, simdpp::float32x8>
{
DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
~DataPack() { /* EMPTY. */ }
inline void load() { simd = simdpp::make_float<Simd>(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); }
};
template <> struct DataPack<double> : public BaseDataPack<double, simdpp::float64x4>
{
DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
~DataPack() { /* EMPTY. */ }
inline void load() { simd = simdpp::make_float<Simd>(data[0], data[1], data[2], data[3]); }
};
template <typename T>
static bool addByAccel(T const * in1, T const * in2, T * out, int count)
{
DataPack<T> v1_pack(in1);
DataPack<T> v2_pack(in2);
DataPack<T> r_pack(out);
auto loop_count = DataPack<T>::calcVectorSize(count);
for (; loop_count; --loop_count) {
v1_pack.load();
v2_pack.load();
r_pack.simd = simdpp::add(v1_pack.simd, v2_pack.simd);
r_pack.store();
++v1_pack;
++v2_pack;
++r_pack;
}
return true;
}
// ------------------
} // namespace __impl
// ------------------
tbBOOL tbAccelAdd_i(int const * in1, int const * in2, int * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
tbBOOL tbAccelAdd_u(unsigned const * in1, unsigned const * in2, unsigned * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
tbBOOL tbAccelAdd_f(float const * in1, float const * in2, float * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
tbBOOL tbAccelAdd_d(double const * in1, double const * in2, double * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
// ---------------------------------
} // namespace SIMDPP_ARCH_NAMESPACE
// ---------------------------------
#if defined(SIMDPP_USER_ARCH_INFO)
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_i)((int const *)in1, (int const *)in2, (int *)out, (int)count));
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_u)((unsigned const *)in1, (unsigned const *)in2, (unsigned *)out, (int)count));
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_f)((float const *)in1, (float const *)in2, (float *)out, (int)count));
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_d)((double const *)in1, (double const *)in2, (double *)out, (int)count));
#endif
Local Download
- libsimdpp 2.0 release candidate 2 (C++11 version)
-
Libsimdpp-2.0-rc2.tar.gz