32 #ifndef VSMC_OPENCL_BACKEND_CL_HPP
33 #define VSMC_OPENCL_BACKEND_CL_HPP
46 #define VSMC_STATIC_ASSERT_OPENCL_BACKEND_CL_DYNAMIC_STATE_SIZE_RESIZE(Dim) \
47 VSMC_STATIC_ASSERT((Dim == Dynamic), \
48 USE_METHOD_resize_state_WITH_A_FIXED_SIZE_StateCL_OBJECT)
50 #define VSMC_STATIC_ASSERT_OPENCL_BACKEND_CL_STATE_CL_TYPE(derived, user) \
51 VSMC_STATIC_ASSERT((internal::IsDerivedFromStateCL<derived>::value), \
52 USE_##user##_WITH_A_STATE_TYPE_NOT_DERIVED_FROM_StateCL)
54 #define VSMC_STATIC_ASSERT_OPENCL_BACKEND_CL_STATE_CL_FP_TYPE(type) \
55 VSMC_STATIC_ASSERT((cxx11::is_same<type, ::cl_float>::value \
56 || cxx11::is_same<type, ::cl_double>::value), \
57 USE_StateCL_WITH_A_FP_TYPE_OTHER_THAN_cl_float_AND_cl_double)
59 #define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_BUILD(func) \
60 VSMC_RUNTIME_ASSERT((build()), \
61 ("**StateCL::"#func"** CAN ONLY BE CALLED AFTER true " \
62 "**StateCL::build**"));
64 #define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_STATE_SIZE(state_size) \
65 VSMC_RUNTIME_ASSERT((state_size >= 1), ("STATE SIZE IS LESS THAN 1"))
67 #define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_COPY_SIZE_MISMATCH \
68 VSMC_RUNTIME_ASSERT((N == copy_.size()), \
69 ("**StateCL::copy** SIZE MISMATCH"))
71 #define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_UNPACK_SIZE(psize, dim) \
72 VSMC_RUNTIME_ASSERT((psize >= dim), \
73 ("**StateCL::state_unpack** INPUT PACK SIZE TOO SMALL"))
75 #if VSMC_HAS_CXX11_DEFAULTED_FUNCTIONS
77 #define VSMC_DEFINE_OPENCL_COPY(Name) \
78 Name () : build_id_(-1) {} \
79 Name (const Name<T, PlaceHolder> &) = default; \
80 Name<T, PlaceHolder> &operator= (const Name<T, PlaceHolder> &) = default; \
83 #if VSMC_HAS_CXX11_RVALUE_REFERENCES
84 #define VSMC_DEFINE_OPENCL_MOVE(Name) \
85 Name (Name<T, PlaceHolder> &&) = default; \
86 Name<T, PlaceHolder> &operator= (Name<T, PlaceHolder> &&) = default;
88 #define VSMC_DEFINE_OPENCL_MOVE(Name)
91 #else // VSMC_HAS_CXX11_DEFAULTED_FUNCTIONS
93 #define VSMC_DEFINE_OPENCL_COPY(Name) \
94 Name () : build_id_(-1) {} \
95 Name (const Name<T, PlaceHolder> &other) : \
96 configure_(other.configure_), \
97 build_id_(other.build_id_), \
98 kernel_(other.kernel_), \
99 kernel_name_(other.kernel_name_) {} \
100 Name<T, PlaceHolder> &operator= (const Name<T, PlaceHolder> &other) \
102 if (this != &other) { \
103 configure_ = other.configure_; \
104 build_id_ = other.build_id_; \
105 kernel_ = other.kernel_; \
106 kernel_name_ = other.kernel_name_; \
111 #if VSMC_HAS_CXX11_RVALUE_REFERENCES
112 #define VSMC_DEFINE_OPENCL_MOVE(Name) \
113 Name (Name<T, PlaceHolder> &&other) : \
114 configure_(cxx11::move(other.configure_)), \
115 build_id_(other.build_id_), \
116 kernel_(cxx11::move(other.kernel_)), \
117 kernel_name_(cxx11::move(other.kernel_name_)) {} \
119 Name<T, PlaceHolder> &operator= (Name<T, PlaceHolder> &&other) \
121 if (this != &other) { \
122 configure_ = cxx11::move(other.configure_); \
123 build_id_ = other.build_id_; \
124 kernel_ = cxx11::move(other.kernel_); \
125 kernel_name_ = cxx11::move(other.kernel_name_); \
131 #define VSMC_DEFINE_OPENCL_MOVE(Name)
134 #endif // VSMC_HAS_CXX11_DEFAULTED_FUNCTIONS
136 #define VSMC_DEFINE_OPENCL_CONFIGURE_KERNEL \
137 CLConfigure &configure () {return configure_;} \
138 const CLConfigure &configure () const {return configure_;} \
139 ::cl::Kernel &kernel () {return kernel_;} \
140 const ::cl::Kernel &kernel () const {return kernel_;} \
141 const std::string &kernel_name () const {return kernel_name_;}
143 #define VSMC_DEFINE_OPENCL_SET_KERNEL \
144 if (kname.empty()) { \
145 kernel_name_.clear(); \
148 if (build_id_ != particle.value().build_id() || kernel_name_ != kname) { \
149 build_id_ = particle.value().build_id(); \
150 kernel_name_ = kname; \
151 kernel_ = particle.value().create_kernel(kernel_name_); \
152 configure_.local_size(particle.size(), \
153 kernel_, particle.value().manager().device()); \
156 #define VSMC_DEFINE_OPENCL_MEMBER_DATA \
157 CLConfigure configure_; \
159 ::cl::Kernel kernel_; \
160 std::string kernel_name_
171 ss <<
"#ifndef FP_TYPE\n";
172 ss <<
"#define FP_TYPE float\n";
173 ss <<
"typedef float fp_type;\n";
176 ss <<
"#ifndef VSMC_HAS_OPENCL_DOUBLE\n";
177 ss <<
"#define VSMC_HAS_OPENCL_DOUBLE 0\n";
184 ss <<
"#if defined(cl_khr_fp64)\n";
185 ss <<
"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
186 ss <<
"#elif defined(cl_amd_fp64)\n";
187 ss <<
"#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n";
190 ss <<
"#define FP_TYPE double\n";
191 ss <<
"typedef double fp_type;\n";
194 ss <<
"#ifndef VSMC_HAS_OPENCL_DOUBLE\n";
195 ss <<
"#define VSMC_HAS_OPENCL_DOUBLE 1\n";
199 template <
typename FPType>
201 std::size_t size, std::size_t state_size, std::size_t seed)
203 std::stringstream ss;
204 set_cl_fp_type<FPType>(ss);
206 ss <<
"#ifndef SIZE\n";
207 ss <<
"#define SIZE " << size <<
"UL\n";
210 ss <<
"#ifndef STATE_SIZE\n";
211 ss <<
"#define STATE_SIZE " << state_size <<
"UL\n";
214 ss <<
"#ifndef SEED\n";
215 ss <<
"#define SEED " << seed <<
"UL\n";
221 template <
typename D>
226 struct char2 {
char c1;
char c2;};
228 template <std::
size_t Dim,
typename T,
typename ID>
230 static char2 test (...);
234 enum {
value =
sizeof(test(static_cast<const D *>(0))) ==
sizeof(
char)};
237 template <
typename D>
245 template <std::
size_t StateSize,
typename FPType,
typename ID>
258 state_size_(StateSize ==
Dynamic ? 1 : StateSize),
259 size_(N), build_(false), build_id_(0),
260 state_buffer_(state_size_ * size_)
262 #if VSMC_OPENCL_VERSION >= 120
263 if (
manager().opencl_version() >= 120) {
264 copy_from_buffer_.resize(size_,
265 CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY);
267 copy_from_buffer_.resize(size_, CL_MEM_READ_ONLY);
270 copy_from_buffer_.resize(size_, CL_MEM_READ_ONLY);
274 size_type
size ()
const {
return size_;}
286 state_buffer_.
resize(state_size_ * size_);
291 {state_buffer_.
resize(state_size_ * size_, flag);}
295 {state_buffer_.
resize(state_size_ * size_, flag, host_ptr);}
305 const ::cl::Program &
program ()
const {
return program_;}
344 template <
typename CharT,
typename Traits>
345 void build (
const std::string &source,
346 const std::string &flags, std::basic_ostream<CharT, Traits> &os)
350 std::string src(internal::cl_source_macros<fp_type>(
354 build_program(flags, os);
357 void build (
const std::string &source,
358 const std::string &flags = std::string())
359 {
build(source, flags, std::cout);}
362 template <
typename CharT,
typename Traits>
364 const std::string &flags, std::basic_ostream<CharT, Traits> &os)
367 build_program(flags, os);
371 const std::string &flags = std::string())
372 {
build(program, flags, std::cout);}
375 bool build ()
const {
return build_;}
391 return ::cl::Kernel(program_, name.c_str());
394 template <
typename IntType>
395 void copy (std::size_t N,
const IntType *copy_from)
399 manager().template write_buffer<size_type>(
400 copy_from_buffer_.data(), N, copy_from);
401 copy_(copy_from_buffer_.data(), state_buffer_.
data());
406 state_idx_host_.resize(size_);
407 #if VSMC_OPENCL_VERSION >= 120
408 if (
manager().opencl_version() >= 120) {
409 state_idx_buffer_.
resize(size_,
410 CL_MEM_READ_ONLY|CL_MEM_HOST_WRITE_ONLY|
411 CL_MEM_USE_HOST_PTR, &state_idx_host_[0]);
413 state_idx_buffer_.
resize(size_,
414 CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &state_idx_host_[0]);
417 state_idx_buffer_.
resize(size_,
418 CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &state_idx_host_[0]);
421 state_tmp_host_.resize(size_ * state_size_);
422 state_tmp_buffer_.
resize(size_ * state_size_,
423 CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, &state_tmp_host_[0]);
427 &state_tmp_host_[0]);
433 &state_idx_host_[0]);
435 &state_tmp_host_[0]);
436 copy_(state_idx_buffer_.
data(), state_tmp_buffer_.
data(),
437 state_buffer_.
data());
442 state_pack_type pack(create_pack());
443 std::memcpy(&pack[0], &state_tmp_host_[
id * state_size_], state_size_);
451 pack.
size(), state_size_);
453 state_idx_host_[id] = 1;
454 std::memcpy(&state_tmp_host_[
id * state_size_], &pack[0], state_size_);
467 std::size_t state_size_;
470 ::cl::Program program_;
481 std::vector<char, AlignedAllocator<char> > state_idx_host_;
482 std::vector<char, AlignedAllocator<char> > state_tmp_host_;
484 template <
typename CharT,
typename Traits>
485 void build_program (
const std::string flags,
486 std::basic_ostream<CharT, Traits> &os)
492 program_.build(
manager().device_vec(), flags.c_str());
493 copy_.
build(size_, state_size_);
495 }
catch (const ::cl::Error &err) {
502 state_pack_type create_pack ()
const
504 return create_pack_dispatch(
505 cxx11::integral_constant<bool, StateSize == Dynamic>());
509 {
return std::vector<char>(this->
state_size());}
512 {
return Array<char, StateSize>();}
542 template <
typename T,
typename PlaceHolder = NullType>
559 if (kernel_name_.empty())
565 particle.
value().manager().run_kernel(
578 const ::cl::Buffer &accept_buffer)
580 particle.
value().manager().read_buffer(
581 accept_buffer, particle.
size(), &accept_host_[0]);
583 return static_cast<std::size_t
>(std::accumulate(
584 accept_host_.begin(), accept_host_.end(),
597 accept_host_.resize(particle.
size());
598 #if VSMC_OPENCL_VERSION >= 120
599 if (particle.
value().manager().opencl_version() >= 120) {
601 CL_MEM_READ_WRITE|CL_MEM_HOST_READ_ONLY|
602 CL_MEM_USE_HOST_PTR, &accept_host_[0]);
605 CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, &accept_host_[0]);
609 CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, &accept_host_[0]);
612 particle.
value().state_buffer().data(), accept_buffer_.
data());
626 std::vector< ::cl_ulong> accept_host_;
638 template <typename T, typename PlaceHolder =
NullType>
655 if (kernel_name_.empty())
660 particle.
value().manager().run_kernel(
672 const ::cl::Buffer &accept_buffer)
674 particle.
value().manager().read_buffer(
675 accept_buffer, particle.
size(), &accept_host_[0]);
677 return static_cast<std::size_t
>(std::accumulate(
678 accept_host_.begin(), accept_host_.end(),
679 static_cast< ::cl_ulong
>(0)));
685 move_state(iter, kname);
692 accept_host_.resize(particle.
size());
693 #if VSMC_OPENCL_VERSION >= 120
694 if (particle.
value().manager().opencl_version() >= 120) {
695 accept_buffer_.resize(particle.
size(),
696 CL_MEM_READ_WRITE|CL_MEM_HOST_READ_ONLY|
697 CL_MEM_USE_HOST_PTR, &accept_host_[0]);
699 accept_buffer_.resize(particle.
size(),
700 CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, &accept_host_[0]);
703 accept_buffer_.resize(particle.
size(),
704 CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, &accept_host_[0]);
707 particle.
value().state_buffer().data(), accept_buffer_.data());
719 VSMC_DEFINE_OPENCL_MEMBER_DATA;
720 CLBuffer< ::cl_ulong, typename T::cl_id> accept_buffer_;
721 std::vector< ::cl_ulong> accept_host_;
734 template <typename T, typename PlaceHolder =
NullType>
752 if (kernel_name_.empty())
757 particle.
value().manager().run_kernel(
759 particle.
value().manager().template
760 read_buffer<typename T::fp_type>(
761 buffer_.data(), particle.
value().size() * dim, res);
773 monitor_state(iter, kname);
780 #if VSMC_OPENCL_VERSION >= 120
781 if (particle.
value().manager().opencl_version() >= 120) {
782 buffer_.resize(particle.
size() * dim,
783 CL_MEM_READ_WRITE|CL_MEM_HOST_READ_ONLY);
785 buffer_.resize(particle.
size() * dim);
788 buffer_.resize(particle.
size() * dim);
791 static_cast< ::cl_ulong>(dim),
792 particle.
value().state_buffer().data(), buffer_.data());
804 VSMC_DEFINE_OPENCL_MEMBER_DATA;
805 CLBuffer<typename T::fp_type, typename T::cl_id> buffer_;
818 template <typename T, typename PlaceHolder =
NullType>
836 if (kernel_name_.empty())
841 particle.
value().manager().run_kernel(
843 particle.
value().manager().template
844 read_buffer<typename T::fp_type>(
845 buffer_.data(), particle.
value().size(), res);
848 return this->path_grid(iter, particle);
859 path_state(iter, kname);
866 #if VSMC_OPENCL_VERSION >= 120
867 if (particle.
value().manager().opencl_version() >= 120) {
868 buffer_.resize(particle.
size(),
869 CL_MEM_READ_WRITE|CL_MEM_HOST_READ_ONLY);
871 buffer_.resize(particle.
size());
874 buffer_.resize(particle.
size());
877 particle.
value().state_buffer().data(), buffer_.data());
889 VSMC_DEFINE_OPENCL_MEMBER_DATA;
890 CLBuffer<typename T::fp_type, typename T::cl_id> buffer_;
895 #endif // VSMC_OPENCL_BACKEND_CL_HPP
#define VSMC_STATIC_ASSERT_OPENCL_BACKEND_CL_STATE_CL_TYPE(derived, user)
void copy_post_processor()
static SeedGenerator< ID, ResultType > & instance()
Particle class representing the whole particle set.
#define VSMC_DEFINE_OPENCL_CONFIGURE_KERNEL
void copy_pre_processor()
#define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_BUILD(func)
const CLBuffer< char, ID > & state_buffer() const
The OpenCL buffer that stores the state values.
void build(const ::cl::Program &program, const std::string &flags=std::string())
#define VSMC_DEFINE_OPENCL_COPY(Name)
virtual void pre_processor(std::size_t, const Particle< T > &)
void build(std::size_t size, std::size_t state_size)
void read_buffer(const ::cl::Buffer &buf, std::size_t num, OutputIter first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Read an OpenCL buffer of a given type and number of elements into an iterator.
bool build() const
Whether the last attempted building success.
::cl::Kernel & copy_kernel()
void state_unpack(size_type id, const state_pack_type &pack)
#define VSMC_STATIC_ASSERT_OPENCL_BACKEND_CL_DYNAMIC_STATE_SIZE_RESIZE(Dim)
void set_cl_fp_type< cl_double >(std::stringstream &ss)
virtual void set_kernel_args(std::size_t iter, std::size_t dim, const Particle< T > &particle)
virtual void set_kernel(std::size_t iter, const Particle< T > &particle)
value_type & value()
Read and write access to the value collection object.
#define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_COPY_SIZE_MISMATCH
::cl::Kernel create_kernel(const std::string &name) const
Create kernel with the current program.
static constexpr::cl_uint kernel_args_offset()
The index offset of additional kernel arguments set by the user.
void build(const ::cl::Program &program, const std::string &flags, std::basic_ostream< CharT, Traits > &os)
Build from an existing program.
static std::vector< std::pair< ::cl_build_status, std::string > > program_build_log(const ::cl::Program &program)
Program build log and status.
void update_state(::cl_mem_flags flag, void *host_ptr)
Change state buffer flag and host pointer (cause reallocation)
void build(const std::string &source, const std::string &flags, std::basic_ostream< CharT, Traits > &os)
Build the OpenCL program from source.
state_pack_type state_pack(size_type id) const
void build(const std::string &source, const std::string &flags=std::string())
virtual void initialize_state(std::string &)
virtual void set_kernel(std::size_t iter, const Particle< T > &particle)
virtual std::size_t accept_count(Particle< T > &particle, const ::cl::Buffer &accept_buffer)
virtual void pre_processor(std::size_t, Particle< T > &)
void update_state(::cl_mem_flags flag)
Change state buffer flag (cause reallocation)
Monitor::eval_type subtype using OpenCL.
CLManager< ID > manager_type
::cl::Program & program()
void write_buffer(const ::cl::Buffer &buf, std::size_t num, InputIter first, std::size_t offset=0, const std::vector< ::cl::Event > *events=nullptr,::cl::Event *event=nullptr, bool block=true) const
Write an OpenCL buffer of a given type and number of elements from an iterator.
integral_constant< bool, false > false_type
void * memset(void *dst, int ch, std::size_t n)
SIMD optimized memset with non-temporal store for large buffers.
const ::cl::Program & program() const
The OpenCL program associated with this value collection.
virtual void pre_processor(std::size_t, const Particle< T > &)
Sampler::move_type subtype using OpenCL.
#define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_UNPACK_SIZE(psize, dim)
virtual std::size_t accept_count(Particle< T > &particle, const ::cl::Buffer &accept_buffer)
const ::cl::Buffer & data() const
Read only access to the raw cl::Buffer object.
void set_cl_fp_type< cl_float >(std::stringstream &ss)
virtual void path_state(std::size_t, std::string &)
static constexpr::cl_uint kernel_args_offset()
The index offset of additional kernel arguments set by the user.
Used to specify a dimension template parameter is dynamic.
static constexpr::cl_uint kernel_args_offset()
The index offset of additional kernel arguments set by the user.
#define VSMC_RUNTIME_ASSERT_OPENCL_BACKEND_CL_STATE_SIZE(state_size)
std::size_t operator()(Particle< T > &particle, void *param)
void copy(std::size_t N, const IntType *copy_from)
virtual void set_kernel(std::size_t iter, std::size_t, const Particle< T > &particle)
Path::eval_type subtype using OpenCL.
virtual void monitor_state(std::size_t, std::string &)
void cl_set_kernel_args(::cl::Kernel &,::cl_uint)
void resize_state(std::size_t state_size)
Change state size.
virtual double path_grid(std::size_t, const Particle< T > &)
virtual void post_processor(std::size_t, Particle< T > &)
#define VSMC_DEFINE_OPENCL_MEMBER_DATA
cxx11::conditional< StateSize==Dynamic, std::vector< char >, Array< char, StateSize > >::type state_pack_type
#define VSMC_DEFINE_OPENCL_SET_KERNEL
virtual void pre_processor(Particle< T > &)
#define VSMC_DEFINE_OPENCL_MOVE(Name)
integral_constant< bool, true > true_type
const ::cl::Kernel & copy_kernel() const
Sampler::init_type subtype using OpenCL.
void * memcpy(void *dst, const void *src, std::size_t n)
SIMD optimized memcpy with non-temporal store for large buffers.
int build_id() const
The build id of the last attempted of building.
#define VSMC_STATIC_ASSERT_OPENCL_BACKEND_CL_STATE_CL_FP_TYPE(type)
const CLConfigure & copy_configure() const
virtual void post_processor(std::size_t, const Particle< T > &)
virtual void initialize_param(Particle< T > &, void *)
CLConfigure & copy_configure()
virtual void set_kernel(const Particle< T > &particle)
std::string cl_source_macros(std::size_t size, std::size_t state_size, std::size_t seed)
virtual void post_processor(std::size_t, const Particle< T > &)
virtual void set_kernel_args(std::size_t iter, const Particle< T > &particle)
size_type size() const
Number of particles.
Particle::value_type subtype using OpenCL.
static constexpr::cl_uint kernel_args_offset()
The index offset of additional kernel arguments set by the user.
::cl::Program create_program(const std::string &source) const
Create a program given the source within the current context.
virtual void set_kernel_args(const Particle< T > &particle)
static CLManager< ID > & instance()
Get an instance of the manager singleton.
virtual void set_kernel_args(std::size_t iter, const Particle< T > &particle)
virtual void move_state(std::size_t, std::string &)
static constexpr size_type size()
static manager_type & manager()
The instance of the CLManager signleton associated with this value collcection.
std::size_t state_size() const
void set_cl_fp_type(std::stringstream &)
virtual void post_processor(Particle< T > &)