Skip to content

Libsimdpp

Header-only zero-overhead C++ wrapper for SIMD intrinsics of multiple instruction sets.

Example

/**
 * @file   AccelRaw.simd
 * @brief  AccelRaw class implementation.
 * @author user
 * @date   2018-01-08
 */

#include <libtbag/gpu/accel/AccelRaw.h>
#include <libtbag/algorithm/Pack.hpp>

#include <simdpp/simd.h>
#include <simdpp/dispatch/get_arch_gcc_builtin_cpu_supports.h>
#include <simdpp/dispatch/get_arch_linux_cpuinfo.h>
#include <simdpp/dispatch/get_arch_raw_cpuid.h>

#include <type_traits>

#if SIMDPP_HAS_GET_ARCH_GCC_BUILTIN_CPU_SUPPORTS == 1
# define SIMDPP_USER_ARCH_INFO  ::simdpp::get_arch_gcc_builtin_cpu_supports()
#elif SIMDPP_HAS_GET_ARCH_LINUX_CPUINFO == 1
# define SIMDPP_USER_ARCH_INFO  ::simdpp::get_arch_linux_cpuinfo()
#elif SIMDPP_HAS_GET_ARCH_RAW_CPUID == 1
# define SIMDPP_USER_ARCH_INFO  ::simdpp::get_arch_raw_cpuid()
#endif

// ------------------------------
namespace SIMDPP_ARCH_NAMESPACE {
// ------------------------------

// ---------------
namespace __impl {
// ---------------

template <typename BaseType, typename SimdType>
struct BaseDataPack
{
    using Base = BaseType;
    using Simd = SimdType;
    using Element = typename Simd::element_type;

    static_assert(std::is_same<typename std::remove_const<Base>::type , Element>::value,
                  "Incorrect element type");

    TBAG_CONSTEXPR static unsigned getElementCount() TBAG_NOEXCEPT { return Simd::length; }
    TBAG_CONSTEXPR static unsigned     getBaseSize() TBAG_NOEXCEPT { return sizeof(Base); }

    inline static int calcVectorSize(int count) TBAG_NOEXCEPT
    { return libtbag::algorithm::getPackedSize<int>(count, getElementCount()); }

    Base * data;
    Simd   simd;

    explicit BaseDataPack(Base * d) : data(d), simd(simdpp::make_zero<Simd>())
    { /* EMPTY. */ }
    explicit BaseDataPack(Base const * d) : BaseDataPack(const_cast<Base*>(d))
    { /* EMPTY. */ }
    ~BaseDataPack()
    { /* EMPTY. */ }

    inline void operator ++()
    { data += getElementCount(); }

    inline void store()
    { simdpp::store(data, simd); }
};

template <typename BaseType> struct DataPack;

template <> struct DataPack<int> : public BaseDataPack<int, simdpp::int32x8>
{
    DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
    DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
    ~DataPack() { /* EMPTY. */ }
    inline void load() { simd = simdpp::make_int<Simd>(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); }
};

template <> struct DataPack<unsigned> : public BaseDataPack<unsigned, simdpp::uint32x8>
{
    DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
    DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
    ~DataPack() { /* EMPTY. */ }
    inline void load() { simd = simdpp::make_uint<Simd>(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); }
};

template <> struct DataPack<float> : public BaseDataPack<float, simdpp::float32x8>
{
    DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
    DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
    ~DataPack() { /* EMPTY. */ }
    inline void load() { simd = simdpp::make_float<Simd>(data[0], data[1], data[2], data[3], data[4], data[5], data[6], data[7]); }
};

template <> struct DataPack<double> : public BaseDataPack<double, simdpp::float64x4>
{
    DataPack(Base * d) : BaseDataPack(d) { /* EMPTY. */ }
    DataPack(Base const * d) : BaseDataPack(d) { /* EMPTY. */ }
    ~DataPack() { /* EMPTY. */ }
    inline void load() { simd = simdpp::make_float<Simd>(data[0], data[1], data[2], data[3]); }
};

template <typename T>
static bool addByAccel(T const * in1, T const * in2, T * out, int count)
{
    DataPack<T> v1_pack(in1);
    DataPack<T> v2_pack(in2);
    DataPack<T>  r_pack(out);

    auto loop_count = DataPack<T>::calcVectorSize(count);
    for (; loop_count; --loop_count) {
        v1_pack.load();
        v2_pack.load();
        r_pack.simd = simdpp::add(v1_pack.simd, v2_pack.simd);
        r_pack.store();
        ++v1_pack;
        ++v2_pack;
        ++r_pack;
    }

    return true;
}

// ------------------
} // namespace __impl
// ------------------

tbBOOL tbAccelAdd_i(int const * in1, int const * in2, int * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
tbBOOL tbAccelAdd_u(unsigned const * in1, unsigned const * in2, unsigned * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
tbBOOL tbAccelAdd_f(float const * in1, float const * in2, float * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }
tbBOOL tbAccelAdd_d(double const * in1, double const * in2, double * out, int count)
{ return __impl::addByAccel(in1, in2, out, count) ? TB_TRUE : TB_FALSE; }

// ---------------------------------
} // namespace SIMDPP_ARCH_NAMESPACE
// ---------------------------------

#if defined(SIMDPP_USER_ARCH_INFO)
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_i)((int      const *)in1, (int      const *)in2, (int      *)out, (int)count));
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_u)((unsigned const *)in1, (unsigned const *)in2, (unsigned *)out, (int)count));
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_f)((float    const *)in1, (float    const *)in2, (float    *)out, (int)count));
SIMDPP_MAKE_DISPATCHER((tbBOOL)(tbAccelAdd_d)((double   const *)in1, (double   const *)in2, (double   *)out, (int)count));
#endif

Local Download

libsimdpp 2.0 release candidate 2 (C++11 version)
Libsimdpp-2.0-rc2.tar.gz

See also

Favorite site